From 1b932ceddd19de1cdf2a86f1fca77c37dad758cc Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:04 +0800 Subject: iommu: Remove detach_dev callbacks The iommu core calls the driver's detach_dev domain op callback only when a device is finished assigning to user space and iommu_group_release_dma_owner() is called to return the device to the kernel, where iommu core wants to set the default domain to the device but the driver didn't provide one. In other words, if any iommu driver provides default domain support, the .detach_dev callback will never be called. This removes the detach_dev callbacks in those IOMMU drivers that support default domain. Reviewed-by: Jason Gunthorpe Reviewed-by: Sven Peter # apple-dart Acked-by: Chunyan Zhang # sprd Reviewed-by: Vasant Hegde # amd Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 26 -------------------------- drivers/iommu/apple-dart.c | 24 ------------------------ drivers/iommu/arm/arm-smmu/qcom_iommu.c | 23 ----------------------- drivers/iommu/exynos-iommu.c | 1 - drivers/iommu/ipmmu-vmsa.c | 16 ---------------- drivers/iommu/mtk_iommu.c | 9 --------- drivers/iommu/rockchip-iommu.c | 1 - drivers/iommu/sprd-iommu.c | 16 ---------------- drivers/iommu/sun50i-iommu.c | 1 - 9 files changed, 117 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cbeaab55c0db..92319c9b877c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2126,31 +2126,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) protection_domain_free(domain); } -static void amd_iommu_detach_device(struct iommu_domain *dom, - struct device *dev) -{ - struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); - struct amd_iommu *iommu; - - if (!check_device(dev)) - return; - - if (dev_data->domain != NULL) - detach_device(dev); - - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return; - -#ifdef CONFIG_IRQ_REMAP - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - (dom->type == IOMMU_DOMAIN_UNMANAGED)) - dev_data->use_vapic = 0; -#endif - - iommu_completion_wait(iommu); -} - static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { @@ -2416,7 +2391,6 @@ const struct iommu_ops amd_iommu_ops = { .def_domain_type = amd_iommu_def_domain_type, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, - .detach_dev = amd_iommu_detach_device, .map_pages = amd_iommu_map_pages, .unmap_pages = amd_iommu_unmap_pages, .iotlb_sync_map = amd_iommu_iotlb_sync_map, diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 4f4a323be0d0..96843d468801 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -486,13 +486,6 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain, true); } -static int apple_dart_domain_remove_streams(struct apple_dart_domain *domain, - struct apple_dart_master_cfg *cfg) -{ - return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps, - false); -} - static int apple_dart_attach_dev(struct iommu_domain *domain, struct device *dev) { @@ -535,22 +528,6 @@ static int apple_dart_attach_dev(struct iommu_domain *domain, return ret; } -static void apple_dart_detach_dev(struct iommu_domain *domain, - struct device *dev) -{ - int i; - struct apple_dart_stream_map *stream_map; - struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); - struct apple_dart_domain *dart_domain = to_dart_domain(domain); - - for_each_stream_map(i, cfg, stream_map) - apple_dart_hw_disable_dma(stream_map); - - if (domain->type == IOMMU_DOMAIN_DMA || - domain->type == IOMMU_DOMAIN_UNMANAGED) - apple_dart_domain_remove_streams(dart_domain, cfg); -} - static struct iommu_device *apple_dart_probe_device(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); @@ -780,7 +757,6 @@ static const struct iommu_ops apple_dart_iommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = apple_dart_attach_dev, - .detach_dev = apple_dart_detach_dev, .map_pages = apple_dart_map_pages, .unmap_pages = apple_dart_unmap_pages, .flush_iotlb_all = apple_dart_flush_iotlb_all, diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 270c3d9128ba..d7be3adee426 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -387,28 +387,6 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev return 0; } -static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) -{ - struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); - unsigned i; - - if (WARN_ON(!qcom_domain->iommu)) - return; - - pm_runtime_get_sync(qcom_iommu->dev); - for (i = 0; i < fwspec->num_ids; i++) { - struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]); - - /* Disable the context bank: */ - iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); - - ctx->domain = NULL; - } - pm_runtime_put_sync(qcom_iommu->dev); -} - static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -583,7 +561,6 @@ static const struct iommu_ops qcom_iommu_ops = { .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, - .detach_dev = qcom_iommu_detach_dev, .map_pages = qcom_iommu_map, .unmap_pages = qcom_iommu_unmap, .flush_iotlb_all = qcom_iommu_flush_iotlb_all, diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index b0cde2211987..29ec713e8a21 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1404,7 +1404,6 @@ static const struct iommu_ops exynos_iommu_ops = { .of_xlate = exynos_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = exynos_iommu_attach_device, - .detach_dev = exynos_iommu_detach_device, .map = exynos_iommu_map, .unmap = exynos_iommu_unmap, .iova_to_phys = exynos_iommu_iova_to_phys, diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a003bd5fc65c..3112822ac7be 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -643,21 +643,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, return 0; } -static void ipmmu_detach_device(struct iommu_domain *io_domain, - struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - unsigned int i; - - for (i = 0; i < fwspec->num_ids; ++i) - ipmmu_utlb_disable(domain, fwspec->ids[i]); - - /* - * TODO: Optimize by disabling the context when no device is attached. - */ -} - static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -876,7 +861,6 @@ static const struct iommu_ops ipmmu_ops = { .of_xlate = ipmmu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = ipmmu_attach_device, - .detach_dev = ipmmu_detach_device, .map_pages = ipmmu_map, .unmap_pages = ipmmu_unmap, .flush_iotlb_all = ipmmu_flush_iotlb_all, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2badd6acfb23..d5a4955910ff 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -710,14 +710,6 @@ err_unlock: return ret; } -static void mtk_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - - mtk_iommu_config(data, dev, false, 0); -} - static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -946,7 +938,6 @@ static const struct iommu_ops mtk_iommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, .map_pages = mtk_iommu_map, .unmap_pages = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index a68eadd64f38..f30db22ea5d7 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1192,7 +1192,6 @@ static const struct iommu_ops rk_iommu_ops = { .of_xlate = rk_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = rk_iommu_attach_device, - .detach_dev = rk_iommu_detach_device, .map = rk_iommu_map, .unmap = rk_iommu_unmap, .iova_to_phys = rk_iommu_iova_to_phys, diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 219bfa11f7f4..ae94d74b73f4 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -255,21 +255,6 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain, return 0; } -static void sprd_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - struct sprd_iommu_device *sdev = dom->sdev; - size_t pgt_size = sprd_iommu_pgt_size(domain); - - if (!sdev) - return; - - dma_free_coherent(sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); - sprd_iommu_hw_en(sdev, false); - dom->sdev = NULL; -} - static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -414,7 +399,6 @@ static const struct iommu_ops sprd_iommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sprd_iommu_attach_device, - .detach_dev = sprd_iommu_detach_device, .map_pages = sprd_iommu_map, .unmap_pages = sprd_iommu_unmap, .iotlb_sync_map = sprd_iommu_sync_map, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 5b585eace3d4..2d993d0cea7d 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -834,7 +834,6 @@ static const struct iommu_ops sun50i_iommu_ops = { .probe_device = sun50i_iommu_probe_device, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sun50i_iommu_attach_device, - .detach_dev = sun50i_iommu_detach_device, .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, .iotlb_sync_map = sun50i_iommu_iotlb_sync_map, .iotlb_sync = sun50i_iommu_iotlb_sync, -- cgit v1.2.3 From 6caeb33fa986151f745fc62190bc28a593b8a0d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:05 +0800 Subject: iommu: Add set_platform_dma_ops iommu ops When VFIO finishes assigning a device to user space and calls iommu_group_release_dma_owner() to return the device to kernel, the IOMMU core will attach the default domain to the device. Unfortunately, some IOMMU drivers don't support default domain, hence in the end, the core calls .detach_dev instead. This adds set_platform_dma_ops iommu ops to make it clear that what it does is returning control back to the platform DMA ops. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 28 ++++++++++++++++++++++++---- include/linux/iommu.h | 4 ++++ 2 files changed, 28 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index de91dd88705b..1c8b2c7678f7 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2163,6 +2163,16 @@ static int iommu_group_do_detach_device(struct device *dev, void *data) return 0; } +static int iommu_group_do_set_platform_dma(struct device *dev, void *data) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!WARN_ON(!ops->set_platform_dma_ops)) + ops->set_platform_dma_ops(dev); + + return 0; +} + static int __iommu_group_set_domain(struct iommu_group *group, struct iommu_domain *new_domain) { @@ -2177,10 +2187,20 @@ static int __iommu_group_set_domain(struct iommu_group *group, * platform specific behavior. */ if (!new_domain) { - if (WARN_ON(!group->domain->ops->detach_dev)) - return -EINVAL; - __iommu_group_for_each_dev(group, group->domain, - iommu_group_do_detach_device); + struct group_device *grp_dev; + + grp_dev = list_first_entry(&group->devices, + struct group_device, list); + + if (dev_iommu_ops(grp_dev->dev)->set_platform_dma_ops) + __iommu_group_for_each_dev(group, NULL, + iommu_group_do_set_platform_dma); + else if (group->domain->ops->detach_dev) + __iommu_group_for_each_dev(group, group->domain, + iommu_group_do_detach_device); + else + WARN_ON_ONCE(1); + group->domain = NULL; return 0; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..7b3e3775b069 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -228,6 +228,9 @@ struct iommu_iotlb_gather { * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain + * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op + * is to support old IOMMU drivers, new drivers should use + * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -256,6 +259,7 @@ struct iommu_ops { struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); + void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ -- cgit v1.2.3 From c1fe9119ee707459421d8f7387e803b0ea78c21b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:06 +0800 Subject: iommu: Add set_platform_dma_ops callbacks For those IOMMU drivers that don't provide default domain support, add an implementation of set_platform_dma_ops callback so that the IOMMU core could return the DMA control to platform DMA ops. At the same time, with the set_platform_dma_ops implemented, there is no need for detach_dev. Remove it to avoid dead code. Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 6 +++--- drivers/iommu/msm_iommu.c | 6 +++--- drivers/iommu/mtk_iommu_v1.c | 4 ++-- drivers/iommu/omap-iommu.c | 6 +++--- drivers/iommu/s390-iommu.c | 7 ++----- drivers/iommu/tegra-gart.c | 6 +++--- drivers/iommu/tegra-smmu.c | 5 +++-- 7 files changed, 19 insertions(+), 21 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 4408ac3c49b6..e123161c211a 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -283,9 +283,9 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, return ret; } -static void fsl_pamu_detach_device(struct iommu_domain *domain, - struct device *dev) +static void fsl_pamu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); const u32 *prop; int len; @@ -452,9 +452,9 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_alloc = fsl_pamu_domain_alloc, .probe_device = fsl_pamu_probe_device, .device_group = fsl_pamu_device_group, + .set_platform_dma_ops = fsl_pamu_set_platform_dma; .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = fsl_pamu_attach_device, - .detach_dev = fsl_pamu_detach_device, .iova_to_phys = fsl_pamu_iova_to_phys, .free = fsl_pamu_domain_free, } diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index c60624910872..454f6331c889 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -443,9 +443,9 @@ fail: return ret; } -static void msm_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void msm_iommu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; struct msm_iommu_dev *iommu; @@ -678,11 +678,11 @@ static struct iommu_ops msm_iommu_ops = { .domain_alloc = msm_iommu_domain_alloc, .probe_device = msm_iommu_probe_device, .device_group = generic_device_group, + .set_platform_dma_ops = msm_iommu_set_platform_dma, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = msm_iommu_attach_dev, - .detach_dev = msm_iommu_detach_dev, .map_pages = msm_iommu_map, .unmap_pages = msm_iommu_unmap, /* diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 69682ee068d2..78d0a84c704f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -319,7 +319,7 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device return 0; } -static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct device *dev) +static void mtk_iommu_v1_set_platform_dma(struct device *dev) { struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); @@ -585,10 +585,10 @@ static const struct iommu_ops mtk_iommu_v1_ops = { .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, + .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, - .detach_dev = mtk_iommu_v1_detach_device, .map_pages = mtk_iommu_v1_map, .unmap_pages = mtk_iommu_v1_unmap, .iova_to_phys = mtk_iommu_v1_iova_to_phys, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 2fd7702c6709..3ab078112a7c 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1556,9 +1556,9 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, omap_domain->dev = NULL; } -static void omap_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void omap_iommu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct omap_iommu_domain *omap_domain = to_omap_domain(domain); spin_lock(&omap_domain->lock); @@ -1737,10 +1737,10 @@ static const struct iommu_ops omap_iommu_ops = { .probe_device = omap_iommu_probe_device, .release_device = omap_iommu_release_device, .device_group = omap_iommu_device_group, + .set_platform_dma_ops = omap_iommu_set_platform_dma, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = omap_iommu_attach_dev, - .detach_dev = omap_iommu_detach_dev, .map = omap_iommu_map, .unmap = omap_iommu_unmap, .iova_to_phys = omap_iommu_iova_to_phys, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ed33c6cce083..5591dab99446 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -144,13 +144,10 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; } -static void s390_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) +static void s390_iommu_set_platform_dma(struct device *dev) { struct zpci_dev *zdev = to_zpci_dev(dev); - WARN_ON(zdev->s390_domain != to_s390_domain(domain)); - __s390_iommu_detach_device(zdev); zpci_dma_init_device(zdev); } @@ -435,11 +432,11 @@ static const struct iommu_ops s390_iommu_ops = { .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, + .set_platform_dma_ops = s390_iommu_set_platform_dma, .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, - .detach_dev = s390_iommu_detach_device, .map_pages = s390_iommu_map_pages, .unmap_pages = s390_iommu_unmap_pages, .flush_iotlb_all = s390_iommu_flush_iotlb_all, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index ed53279d1106..a482ff838b53 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -124,9 +124,9 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, return ret; } -static void gart_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void gart_iommu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct gart_device *gart = gart_handle; spin_lock(&gart->dom_lock); @@ -270,11 +270,11 @@ static const struct iommu_ops gart_iommu_ops = { .domain_alloc = gart_iommu_domain_alloc, .probe_device = gart_iommu_probe_device, .device_group = generic_device_group, + .set_platform_dma_ops = gart_iommu_set_platform_dma, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = gart_iommu_attach_dev, - .detach_dev = gart_iommu_detach_dev, .map = gart_iommu_map, .unmap = gart_iommu_unmap, .iova_to_phys = gart_iommu_iova_to_phys, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5b1af40221ec..4c4ac22d5fb1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -511,8 +511,9 @@ disable: return err; } -static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) +static void tegra_smmu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct tegra_smmu_as *as = to_smmu_as(domain); struct tegra_smmu *smmu = as->smmu; @@ -965,11 +966,11 @@ static const struct iommu_ops tegra_smmu_ops = { .domain_alloc = tegra_smmu_domain_alloc, .probe_device = tegra_smmu_probe_device, .device_group = tegra_smmu_device_group, + .set_platform_dma_ops = tegra_smmu_set_platform_dma, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = tegra_smmu_attach_dev, - .detach_dev = tegra_smmu_detach_dev, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, .iova_to_phys = tegra_smmu_iova_to_phys, -- cgit v1.2.3 From dd8a25c557e109f868430bd2e3e8f394cb40eaa7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jan 2023 10:54:07 +0800 Subject: iommu: Remove deferred attach check from __iommu_detach_device() At the current moment, __iommu_detach_device() is only called via call chains that are after the device driver is attached - eg via explicit attach APIs called by the device driver. Commit bd421264ed30 ("iommu: Fix deferred domain attachment") has removed deferred domain attachment check from __iommu_attach_device() path, so it should just unconditionally work in the __iommu_detach_device() path. It actually looks like a bug that we were blocking detach on these paths since the attach was unconditional and the caller is going to free the (probably) UNAMANGED domain once this returns. The only place we should be testing for deferred attach is during the initial point the dma device is linked to the group, and then again during the dma api calls. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 70 ++++++++++++++++++++++++++------------------------- include/linux/iommu.h | 2 ++ 2 files changed, 38 insertions(+), 34 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1c8b2c7678f7..85ae20c8ff5e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -371,6 +371,30 @@ err_unlock: return ret; } +static bool iommu_is_attach_deferred(struct device *dev) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (ops->is_attach_deferred) + return ops->is_attach_deferred(dev); + + return false; +} + +static int iommu_group_do_dma_first_attach(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + lockdep_assert_held(&dev->iommu_group->mutex); + + if (iommu_is_attach_deferred(dev)) { + dev->iommu->attach_deferred = 1; + return 0; + } + + return __iommu_attach_device(domain, dev); +} + int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops; @@ -401,7 +425,7 @@ int iommu_probe_device(struct device *dev) * attach the default domain. */ if (group->default_domain && !group->owner) { - ret = __iommu_attach_device(group->default_domain, dev); + ret = iommu_group_do_dma_first_attach(dev, group->default_domain); if (ret) { mutex_unlock(&group->mutex); iommu_group_put(group); @@ -947,16 +971,6 @@ out: return ret; } -static bool iommu_is_attach_deferred(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->is_attach_deferred) - return ops->is_attach_deferred(dev); - - return false; -} - /** * iommu_group_add_device - add a device to an iommu group * @group: the group into which to add the device (reference should be held) @@ -1009,8 +1023,8 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain && !iommu_is_attach_deferred(dev)) - ret = __iommu_attach_device(group->domain, dev); + if (group->domain) + ret = iommu_group_do_dma_first_attach(dev, group->domain); mutex_unlock(&group->mutex); if (ret) goto err_put_group; @@ -1776,21 +1790,10 @@ static void probe_alloc_default_domain(struct bus_type *bus, } -static int iommu_group_do_dma_attach(struct device *dev, void *data) -{ - struct iommu_domain *domain = data; - int ret = 0; - - if (!iommu_is_attach_deferred(dev)) - ret = __iommu_attach_device(domain, dev); - - return ret; -} - -static int __iommu_group_dma_attach(struct iommu_group *group) +static int __iommu_group_dma_first_attach(struct iommu_group *group) { return __iommu_group_for_each_dev(group, group->default_domain, - iommu_group_do_dma_attach); + iommu_group_do_dma_first_attach); } static int iommu_group_do_probe_finalize(struct device *dev, void *data) @@ -1855,7 +1858,7 @@ int bus_iommu_probe(struct bus_type *bus) iommu_group_create_direct_mappings(group); - ret = __iommu_group_dma_attach(group); + ret = __iommu_group_dma_first_attach(group); mutex_unlock(&group->mutex); @@ -1987,9 +1990,11 @@ static int __iommu_attach_device(struct iommu_domain *domain, return -ENODEV; ret = domain->ops->attach_dev(domain, dev); - if (!ret) - trace_attach_device_to_domain(dev); - return ret; + if (ret) + return ret; + dev->iommu->attach_deferred = 0; + trace_attach_device_to_domain(dev); + return 0; } /** @@ -2034,7 +2039,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) { - if (iommu_is_attach_deferred(dev)) + if (dev->iommu && dev->iommu->attach_deferred) return __iommu_attach_device(domain, dev); return 0; @@ -2043,9 +2048,6 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - if (iommu_is_attach_deferred(dev)) - return; - domain->ops->detach_dev(domain, dev); trace_detach_device_from_domain(dev); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7b3e3775b069..0d10566b3cb2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -405,6 +405,7 @@ struct iommu_fault_param { * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume + * @attach_deferred: the dma domain attachment is deferred * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -417,6 +418,7 @@ struct dev_iommu { struct iommu_device *iommu_dev; void *priv; u32 max_pasids; + u32 attach_deferred:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3 From 8f9930fa016134ea07db4775ec596b16c3d03f05 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:08 +0800 Subject: iommu: Remove detach_dev callback The detach_dev callback of domain ops is not called in the IOMMU core. Remove this callback to avoid dead code. The trace event for detaching domain from device is removed accordingly. Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-traces.c | 1 - drivers/iommu/iommu.c | 36 ++++-------------------------------- include/linux/iommu.h | 2 -- include/trace/events/iommu.h | 7 ------- 4 files changed, 4 insertions(+), 42 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu-traces.c b/drivers/iommu/iommu-traces.c index 1e9ca7789de1..23416bf76df9 100644 --- a/drivers/iommu/iommu-traces.c +++ b/drivers/iommu/iommu-traces.c @@ -18,7 +18,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(remove_device_from_group); /* iommu_device_event */ EXPORT_TRACEPOINT_SYMBOL_GPL(attach_device_to_domain); -EXPORT_TRACEPOINT_SYMBOL_GPL(detach_device_from_domain); /* iommu_map_unmap */ EXPORT_TRACEPOINT_SYMBOL_GPL(map); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 85ae20c8ff5e..9135540d7d59 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2045,13 +2045,6 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) return 0; } -static void __iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - domain->ops->detach_dev(domain, dev); - trace_detach_device_from_domain(dev); -} - void iommu_detach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; @@ -2156,15 +2149,6 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_attach_group); -static int iommu_group_do_detach_device(struct device *dev, void *data) -{ - struct iommu_domain *domain = data; - - __iommu_detach_device(domain, dev); - - return 0; -} - static int iommu_group_do_set_platform_dma(struct device *dev, void *data) { const struct iommu_ops *ops = dev_iommu_ops(dev); @@ -2184,25 +2168,13 @@ static int __iommu_group_set_domain(struct iommu_group *group, return 0; /* - * New drivers should support default domains and so the detach_dev() op - * will never be called. Otherwise the NULL domain represents some + * New drivers should support default domains, so set_platform_dma() + * op will never be called. Otherwise the NULL domain represents some * platform specific behavior. */ if (!new_domain) { - struct group_device *grp_dev; - - grp_dev = list_first_entry(&group->devices, - struct group_device, list); - - if (dev_iommu_ops(grp_dev->dev)->set_platform_dma_ops) - __iommu_group_for_each_dev(group, NULL, - iommu_group_do_set_platform_dma); - else if (group->domain->ops->detach_dev) - __iommu_group_for_each_dev(group, group->domain, - iommu_group_do_detach_device); - else - WARN_ON_ONCE(1); - + __iommu_group_for_each_dev(group, NULL, + iommu_group_do_set_platform_dma); group->domain = NULL; return 0; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0d10566b3cb2..a8063f26ff69 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -299,7 +299,6 @@ struct iommu_ops { * * EBUSY - device is attached to a domain and cannot be changed * * ENODEV - device specific errors, not able to be attached * * - treated as ENODEV by the caller. Use is discouraged - * @detach_dev: detach an iommu domain from a device * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to @@ -320,7 +319,6 @@ struct iommu_ops { */ struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 29096fe12623..70743db1fb75 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -76,13 +76,6 @@ DEFINE_EVENT(iommu_device_event, attach_device_to_domain, TP_ARGS(dev) ); -DEFINE_EVENT(iommu_device_event, detach_device_from_domain, - - TP_PROTO(struct device *dev), - - TP_ARGS(dev) -); - TRACE_EVENT(map, TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), -- cgit v1.2.3 From d286a58bc8f4d5cf3246481b4e38e4c0c65550ac Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 12 Jan 2023 19:59:47 +0000 Subject: iommu: Tidy up io-pgtable dependencies Some io-pgtable implementations, and thus their users too, carry a slightly odd dependency to get around the GENERIC_ATOMIC64 version of cmpxchg64() often failing to compile. Since this is a functional dependency, it's a bit misleading and untidy to tie it explicitly to COMPILE_TEST while assuming that it's also implied by the other platform/architecture options. Make things clearer by separating these functional dependencies into distinct statements from those controlling visibility, and since they do look a bit non-obvious to the uninitiated, also commenting them for good measure. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/51d8c78e2ecc6696ac5907526580209ea6da167f.1673553587.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 79707685d54a..889c7efd050b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -32,7 +32,8 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for cpmxchg64() help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -70,7 +71,8 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST config IOMMU_IO_PGTABLE_DART bool "Apple DART Formats" select IOMMU_IO_PGTABLE - depends on ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for cpmxchg64() help Enable support for the Apple DART pagetable formats. These include the t8020 and t6000/t8110 DART formats used in Apple M1/M2 family @@ -284,7 +286,8 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARCH_RENESAS || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU @@ -304,7 +307,8 @@ config SPAPR_TCE_IOMMU config APPLE_DART tristate "Apple DART IOMMU Support" - depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARCH_APPLE || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_DART select IOMMU_API select IOMMU_IO_PGTABLE_DART default ARCH_APPLE @@ -319,7 +323,8 @@ config APPLE_DART # ARM IOMMU support config ARM_SMMU tristate "ARM Ltd. System MMU (SMMU) Support" - depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM64 || ARM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM @@ -466,7 +471,8 @@ config MTK_IOMMU_V1 config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" - depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARCH_QCOM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select QCOM_SCM select IOMMU_API select IOMMU_IO_PGTABLE_LPAE -- cgit v1.2.3 From ba9bee7f59fd0687837222bc7b79dd9a5d127d0b Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 5 Jan 2023 09:17:28 +0000 Subject: iommu/amd: Do not allocate io_pgtable_ops for passthrough domain In passthrough mode we do not use IOMMU page table. Hence we don't need to allocate io_pgtable_ops. Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20230105091728.42469-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cbeaab55c0db..b7c1eac1dfa5 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2072,6 +2072,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) if (ret) goto out_err; + /* No need to allocate io pgtable ops in passthrough mode */ + if (type == IOMMU_DOMAIN_IDENTITY) + return domain; + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); if (!pgtbl_ops) { domain_id_free(domain->id); -- cgit v1.2.3 From 080920e52148b4fbbf9360d5345fdcd7846e4841 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 11 Jan 2023 12:15:03 +0000 Subject: iommu/amd: Fix error handling for pdev_pri_ats_enable() Current code throws kernel warning if it fails to enable pasid/pri [1]. Do not call pci_disable_[pasid/pri] if pci_enable_[pasid/pri] failed. [1] https://lore.kernel.org/linux-iommu/15d0f9ff-2a56-b3e9-5b45-e6b23300ae3b@leemhuis.info/ Reported-by: Matt Fagnani Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20230111121503.5931-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b7c1eac1dfa5..7b4390a2f726 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1702,27 +1702,29 @@ static int pdev_pri_ats_enable(struct pci_dev *pdev) /* Only allow access to user-accessible pages */ ret = pci_enable_pasid(pdev, 0); if (ret) - goto out_err; + return ret; /* First reset the PRI state of the device */ ret = pci_reset_pri(pdev); if (ret) - goto out_err; + goto out_err_pasid; /* Enable PRI */ /* FIXME: Hardcode number of outstanding requests for now */ ret = pci_enable_pri(pdev, 32); if (ret) - goto out_err; + goto out_err_pasid; ret = pci_enable_ats(pdev, PAGE_SHIFT); if (ret) - goto out_err; + goto out_err_pri; return 0; -out_err: +out_err_pri: pci_disable_pri(pdev); + +out_err_pasid: pci_disable_pasid(pdev); return ret; -- cgit v1.2.3 From 584d334b139325aa7468fc2eb045864f260298d1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 13 Jan 2023 19:56:40 +0100 Subject: iommu/ipmmu-vmsa: Remove ipmmu_utlb_disable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function is unused after commit 1b932ceddd19 ("iommu: Remove detach_dev callbacks") and so compilation fails with drivers/iommu/ipmmu-vmsa.c:305:13: error: ‘ipmmu_utlb_disable’ defined but not used [-Werror=unused-function] 305 | static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, | ^~~~~~~~~~~~~~~~~~ Remove the function to fix the compile error. Fixes: 1b932ceddd19 ("iommu: Remove detach_dev callbacks") Signed-off-by: Joerg Roedel Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230113185640.8050-1-joro@8bytes.org --- drivers/iommu/ipmmu-vmsa.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3112822ac7be..bdf1a4e5eae0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -299,18 +299,6 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, mmu->utlb_ctx[utlb] = domain->context_id; } -/* - * Disable MMU translation for the microTLB. - */ -static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, - unsigned int utlb) -{ - struct ipmmu_vmsa_device *mmu = domain->mmu; - - ipmmu_imuctr_write(mmu, utlb, 0); - mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; -} - static void ipmmu_tlb_flush_all(void *cookie) { struct ipmmu_vmsa_domain *domain = cookie; -- cgit v1.2.3 From bb649412d39f18eb2a8b7956209a068a08f4bc1e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 13 Jan 2023 20:15:28 +0100 Subject: iommu/fsl_pamu: Fix compile error after adding set_platform_dma_ops The struct initializer for set_platform_dma_ops uses a semicolon as separator where a comma is required. Fix the compile error by using the correct separator. Fixes: c1fe9119ee70 ("iommu: Add set_platform_dma_ops callbacks") Signed-off-by: Joerg Roedel Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230113191528.23638-1-joro@8bytes.org --- drivers/iommu/fsl_pamu_domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index e123161c211a..bce372297099 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -452,7 +452,7 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_alloc = fsl_pamu_domain_alloc, .probe_device = fsl_pamu_probe_device, .device_group = fsl_pamu_device_group, - .set_platform_dma_ops = fsl_pamu_set_platform_dma; + .set_platform_dma_ops = fsl_pamu_set_platform_dma, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = fsl_pamu_attach_device, .iova_to_phys = fsl_pamu_iova_to_phys, -- cgit v1.2.3 From 3d68bbb81b1a64e279180eee1ed0e2c590b5029e Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 13 Jan 2023 19:50:25 +0900 Subject: iommu: dart: Add suspend/resume support We need to save/restore the TCR/TTBR registers, since they are lost on power gate. Reviewed-by: Sven Peter Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113105029.26654-3-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 4f4a323be0d0..2458416122f8 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -121,6 +121,9 @@ struct apple_dart { struct iommu_group *sid2group[DART_MAX_STREAMS]; struct iommu_device iommu; + + u32 save_tcr[DART_MAX_STREAMS]; + u32 save_ttbr[DART_MAX_STREAMS][DART_MAX_TTBR]; }; /* @@ -932,6 +935,45 @@ static const struct apple_dart_hw apple_dart_hw_t6000 = { .fmt = APPLE_DART2, }; +static __maybe_unused int apple_dart_suspend(struct device *dev) +{ + struct apple_dart *dart = dev_get_drvdata(dev); + unsigned int sid, idx; + + for (sid = 0; sid < DART_MAX_STREAMS; sid++) { + dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(sid)); + for (idx = 0; idx < DART_MAX_TTBR; idx++) + dart->save_ttbr[sid][idx] = + readl(dart->regs + DART_TTBR(sid, idx)); + } + + return 0; +} + +static __maybe_unused int apple_dart_resume(struct device *dev) +{ + struct apple_dart *dart = dev_get_drvdata(dev); + unsigned int sid, idx; + int ret; + + ret = apple_dart_hw_reset(dart); + if (ret) { + dev_err(dev, "Failed to reset DART on resume\n"); + return ret; + } + + for (sid = 0; sid < DART_MAX_STREAMS; sid++) { + for (idx = 0; idx < DART_MAX_TTBR; idx++) + writel(dart->save_ttbr[sid][idx], + dart->regs + DART_TTBR(sid, idx)); + writel(dart->save_tcr[sid], dart->regs + DART_TCR(sid)); + } + + return 0; +} + +DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume); + static const struct of_device_id apple_dart_of_match[] = { { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, { .compatible = "apple,t6000-dart", .data = &apple_dart_hw_t6000 }, @@ -944,6 +986,7 @@ static struct platform_driver apple_dart_driver = { .name = "apple-dart", .of_match_table = apple_dart_of_match, .suppress_bind_attrs = true, + .pm = pm_sleep_ptr(&apple_dart_pm_ops), }, .probe = apple_dart_probe, .remove = apple_dart_remove, -- cgit v1.2.3 From 510d4072df7fcf27dcd2dc1942d58b2cc02b03f2 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 13 Jan 2023 19:50:26 +0900 Subject: iommu: dart: Support >64 stream IDs T8110 DARTs have up to 256 SIDs, so we need to switch to a bitmap to handle them properly. Reviewed-by: Sven Peter Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113105029.26654-4-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 114 ++++++++++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 43 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 2458416122f8..80e4436ee4de 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -34,11 +34,10 @@ #include "dma-iommu.h" -#define DART_MAX_STREAMS 16 +#define DART_MAX_STREAMS 256 #define DART_MAX_TTBR 4 #define MAX_DARTS_PER_DEVICE 2 -#define DART_STREAM_ALL 0xffff #define DART_PARAMS1 0x00 #define DART_PARAMS_PAGE_SHIFT GENMASK(27, 24) @@ -85,6 +84,8 @@ struct apple_dart_hw { u32 oas; enum io_pgtable_fmt fmt; + + int max_sid_count; }; /* @@ -116,6 +117,7 @@ struct apple_dart { spinlock_t lock; u32 pgsize; + u32 num_streams; u32 supports_bypass : 1; u32 force_bypass : 1; @@ -143,11 +145,11 @@ struct apple_dart { */ struct apple_dart_stream_map { struct apple_dart *dart; - unsigned long sidmap; + DECLARE_BITMAP(sidmap, DART_MAX_STREAMS); }; struct apple_dart_atomic_stream_map { struct apple_dart *dart; - atomic64_t sidmap; + atomic_long_t sidmap[BITS_TO_LONGS(DART_MAX_STREAMS)]; }; /* @@ -205,50 +207,55 @@ static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom) static void apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map) { + struct apple_dart *dart = stream_map->dart; int sid; - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) writel(DART_TCR_TRANSLATE_ENABLE, - stream_map->dart->regs + DART_TCR(sid)); + dart->regs + DART_TCR(sid)); } static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map) { + struct apple_dart *dart = stream_map->dart; int sid; - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(0, stream_map->dart->regs + DART_TCR(sid)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(0, dart->regs + DART_TCR(sid)); } static void apple_dart_hw_enable_bypass(struct apple_dart_stream_map *stream_map) { + struct apple_dart *dart = stream_map->dart; int sid; WARN_ON(!stream_map->dart->supports_bypass); - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) writel(DART_TCR_BYPASS0_ENABLE | DART_TCR_BYPASS1_ENABLE, - stream_map->dart->regs + DART_TCR(sid)); + dart->regs + DART_TCR(sid)); } static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map, u8 idx, phys_addr_t paddr) { + struct apple_dart *dart = stream_map->dart; int sid; WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1)); - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT), - stream_map->dart->regs + DART_TTBR(sid, idx)); + dart->regs + DART_TTBR(sid, idx)); } static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map, u8 idx) { + struct apple_dart *dart = stream_map->dart; int sid; - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(0, stream_map->dart->regs + DART_TTBR(sid, idx)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(0, dart->regs + DART_TTBR(sid, idx)); } static void @@ -270,7 +277,7 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, spin_lock_irqsave(&stream_map->dart->lock, flags); - writel(stream_map->sidmap, stream_map->dart->regs + DART_STREAM_SELECT); + writel(stream_map->sidmap[0], stream_map->dart->regs + DART_STREAM_SELECT); writel(command, stream_map->dart->regs + DART_STREAM_COMMAND); ret = readl_poll_timeout_atomic( @@ -283,7 +290,7 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, if (ret) { dev_err(stream_map->dart->dev, "busy bit did not clear after command %x for streams %lx\n", - command, stream_map->sidmap); + command, stream_map->sidmap[0]); return ret; } @@ -301,6 +308,7 @@ static int apple_dart_hw_reset(struct apple_dart *dart) { u32 config; struct apple_dart_stream_map stream_map; + int i; config = readl(dart->regs + DART_CONFIG); if (config & DART_CONFIG_LOCK) { @@ -310,12 +318,14 @@ static int apple_dart_hw_reset(struct apple_dart *dart) } stream_map.dart = dart; - stream_map.sidmap = DART_STREAM_ALL; + bitmap_zero(stream_map.sidmap, DART_MAX_STREAMS); + bitmap_set(stream_map.sidmap, 0, dart->num_streams); apple_dart_hw_disable_dma(&stream_map); apple_dart_hw_clear_all_ttbrs(&stream_map); /* enable all streams globally since TCR is used to control isolation */ - writel(DART_STREAM_ALL, dart->regs + DART_STREAMS_ENABLE); + for (i = 0; i < BITS_TO_U32(dart->num_streams); i++) + writel(U32_MAX, dart->regs + DART_STREAMS_ENABLE + 4 * i); /* clear any pending errors before the interrupt is unmasked */ writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR); @@ -325,13 +335,16 @@ static int apple_dart_hw_reset(struct apple_dart *dart) static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain) { - int i; + int i, j; struct apple_dart_atomic_stream_map *domain_stream_map; struct apple_dart_stream_map stream_map; for_each_stream_map(i, domain, domain_stream_map) { stream_map.dart = domain_stream_map->dart; - stream_map.sidmap = atomic64_read(&domain_stream_map->sidmap); + + for (j = 0; j < BITS_TO_LONGS(stream_map.dart->num_streams); j++) + stream_map.sidmap[j] = atomic_long_read(&domain_stream_map->sidmap[j]); + apple_dart_hw_invalidate_tlb(&stream_map); } } @@ -416,7 +429,7 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, struct apple_dart *dart = cfg->stream_maps[0].dart; struct io_pgtable_cfg pgtbl_cfg; int ret = 0; - int i; + int i, j; mutex_lock(&dart_domain->init_lock); @@ -425,8 +438,9 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { dart_domain->stream_maps[i].dart = cfg->stream_maps[i].dart; - atomic64_set(&dart_domain->stream_maps[i].sidmap, - cfg->stream_maps[i].sidmap); + for (j = 0; j < BITS_TO_LONGS(dart->num_streams); j++) + atomic_long_set(&dart_domain->stream_maps[i].sidmap[j], + cfg->stream_maps[i].sidmap[j]); } pgtbl_cfg = (struct io_pgtable_cfg){ @@ -461,7 +475,7 @@ apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps, struct apple_dart_stream_map *master_maps, bool add_streams) { - int i; + int i, j; for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (domain_maps[i].dart != master_maps[i].dart) @@ -471,12 +485,14 @@ apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps, for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (!domain_maps[i].dart) break; - if (add_streams) - atomic64_or(master_maps[i].sidmap, - &domain_maps[i].sidmap); - else - atomic64_and(~master_maps[i].sidmap, - &domain_maps[i].sidmap); + for (j = 0; j < BITS_TO_LONGS(domain_maps[i].dart->num_streams); j++) { + if (add_streams) + atomic_long_or(master_maps[i].sidmap[j], + &domain_maps[i].sidmap[j]); + else + atomic_long_and(~master_maps[i].sidmap[j], + &domain_maps[i].sidmap[j]); + } } return 0; @@ -640,14 +656,14 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args) for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (cfg->stream_maps[i].dart == dart) { - cfg->stream_maps[i].sidmap |= 1 << sid; + set_bit(sid, cfg->stream_maps[i].sidmap); return 0; } } for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (!cfg->stream_maps[i].dart) { cfg->stream_maps[i].dart = dart; - cfg->stream_maps[i].sidmap = 1 << sid; + set_bit(sid, cfg->stream_maps[i].sidmap); return 0; } } @@ -666,7 +682,7 @@ static void apple_dart_release_group(void *iommu_data) mutex_lock(&apple_dart_groups_lock); for_each_stream_map(i, group_master_cfg, stream_map) - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) stream_map->dart->sid2group[sid] = NULL; kfree(iommu_data); @@ -685,7 +701,7 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) mutex_lock(&apple_dart_groups_lock); for_each_stream_map(i, cfg, stream_map) { - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) { + for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) { struct iommu_group *stream_group = stream_map->dart->sid2group[sid]; @@ -724,7 +740,7 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) apple_dart_release_group); for_each_stream_map(i, cfg, stream_map) - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) stream_map->dart->sid2group[sid] = group; res = group; @@ -869,16 +885,26 @@ static int apple_dart_probe(struct platform_device *pdev) if (ret) return ret; - ret = apple_dart_hw_reset(dart); - if (ret) - goto err_clk_disable; - dart_params[0] = readl(dart->regs + DART_PARAMS1); dart_params[1] = readl(dart->regs + DART_PARAMS2); dart->pgsize = 1 << FIELD_GET(DART_PARAMS_PAGE_SHIFT, dart_params[0]); dart->supports_bypass = dart_params[1] & DART_PARAMS_BYPASS_SUPPORT; + + dart->num_streams = dart->hw->max_sid_count; + + if (dart->num_streams > DART_MAX_STREAMS) { + dev_err(&pdev->dev, "Too many streams (%d > %d)\n", + dart->num_streams, DART_MAX_STREAMS); + ret = -EINVAL; + goto err_clk_disable; + } + dart->force_bypass = dart->pgsize > PAGE_SIZE; + ret = apple_dart_hw_reset(dart); + if (ret) + goto err_clk_disable; + ret = request_irq(dart->irq, apple_dart_irq, IRQF_SHARED, "apple-dart fault handler", dart); if (ret) @@ -897,8 +923,8 @@ static int apple_dart_probe(struct platform_device *pdev) dev_info( &pdev->dev, - "DART [pagesize %x, bypass support: %d, bypass forced: %d] initialized\n", - dart->pgsize, dart->supports_bypass, dart->force_bypass); + "DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n", + dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass); return 0; err_sysfs_remove: @@ -929,10 +955,12 @@ static int apple_dart_remove(struct platform_device *pdev) static const struct apple_dart_hw apple_dart_hw_t8103 = { .oas = 36, .fmt = APPLE_DART, + .max_sid_count = 16, }; static const struct apple_dart_hw apple_dart_hw_t6000 = { .oas = 42, .fmt = APPLE_DART2, + .max_sid_count = 16, }; static __maybe_unused int apple_dart_suspend(struct device *dev) @@ -940,7 +968,7 @@ static __maybe_unused int apple_dart_suspend(struct device *dev) struct apple_dart *dart = dev_get_drvdata(dev); unsigned int sid, idx; - for (sid = 0; sid < DART_MAX_STREAMS; sid++) { + for (sid = 0; sid < dart->num_streams; sid++) { dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(sid)); for (idx = 0; idx < DART_MAX_TTBR; idx++) dart->save_ttbr[sid][idx] = @@ -962,7 +990,7 @@ static __maybe_unused int apple_dart_resume(struct device *dev) return ret; } - for (sid = 0; sid < DART_MAX_STREAMS; sid++) { + for (sid = 0; sid < dart->num_streams; sid++) { for (idx = 0; idx < DART_MAX_TTBR; idx++) writel(dart->save_ttbr[sid][idx], dart->regs + DART_TTBR(sid, idx)); -- cgit v1.2.3 From 0b459bcdc5a8b8929ec0f568723c7c639f1673cf Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 13 Jan 2023 19:50:27 +0900 Subject: iommu: dart: Support a variable number of TTBRs per stream T8110 only has one TTBR per stream, so un-hardcode that. Reviewed-by: Sven Peter Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113105029.26654-5-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 80e4436ee4de..82b2581d909c 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -77,15 +77,21 @@ #define DART_TCR_BYPASS0_ENABLE BIT(8) #define DART_TCR_BYPASS1_ENABLE BIT(12) -#define DART_TTBR(sid, idx) (0x200 + 16 * (sid) + 4 * (idx)) #define DART_TTBR_VALID BIT(31) #define DART_TTBR_SHIFT 12 +#define DART_TTBR(dart, sid, idx) (0x200 + \ + (((dart)->hw->ttbr_count * (sid)) << 2) + \ + ((idx) << 2)) + + struct apple_dart_hw { u32 oas; enum io_pgtable_fmt fmt; int max_sid_count; + + int ttbr_count; }; /* @@ -245,7 +251,7 @@ static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map, WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1)); for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT), - dart->regs + DART_TTBR(sid, idx)); + dart->regs + DART_TTBR(dart, sid, idx)); } static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map, @@ -255,7 +261,7 @@ static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map, int sid; for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) - writel(0, dart->regs + DART_TTBR(sid, idx)); + writel(0, dart->regs + DART_TTBR(dart, sid, idx)); } static void @@ -263,7 +269,7 @@ apple_dart_hw_clear_all_ttbrs(struct apple_dart_stream_map *stream_map) { int i; - for (i = 0; i < DART_MAX_TTBR; ++i) + for (i = 0; i < stream_map->dart->hw->ttbr_count; ++i) apple_dart_hw_clear_ttbr(stream_map, i); } @@ -415,7 +421,7 @@ apple_dart_setup_translation(struct apple_dart_domain *domain, for (i = 0; i < pgtbl_cfg->apple_dart_cfg.n_ttbrs; ++i) apple_dart_hw_set_ttbr(stream_map, i, pgtbl_cfg->apple_dart_cfg.ttbr[i]); - for (; i < DART_MAX_TTBR; ++i) + for (; i < stream_map->dart->hw->ttbr_count; ++i) apple_dart_hw_clear_ttbr(stream_map, i); apple_dart_hw_enable_translation(stream_map); @@ -956,11 +962,15 @@ static const struct apple_dart_hw apple_dart_hw_t8103 = { .oas = 36, .fmt = APPLE_DART, .max_sid_count = 16, + + .ttbr_count = 4, }; static const struct apple_dart_hw apple_dart_hw_t6000 = { .oas = 42, .fmt = APPLE_DART2, .max_sid_count = 16, + + .ttbr_count = 4, }; static __maybe_unused int apple_dart_suspend(struct device *dev) @@ -970,9 +980,9 @@ static __maybe_unused int apple_dart_suspend(struct device *dev) for (sid = 0; sid < dart->num_streams; sid++) { dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(sid)); - for (idx = 0; idx < DART_MAX_TTBR; idx++) + for (idx = 0; idx < dart->hw->ttbr_count; idx++) dart->save_ttbr[sid][idx] = - readl(dart->regs + DART_TTBR(sid, idx)); + readl(dart->regs + DART_TTBR(dart, sid, idx)); } return 0; @@ -991,9 +1001,9 @@ static __maybe_unused int apple_dart_resume(struct device *dev) } for (sid = 0; sid < dart->num_streams; sid++) { - for (idx = 0; idx < DART_MAX_TTBR; idx++) + for (idx = 0; idx < dart->hw->ttbr_count; idx++) writel(dart->save_ttbr[sid][idx], - dart->regs + DART_TTBR(sid, idx)); + dart->regs + DART_TTBR(dart, sid, idx)); writel(dart->save_tcr[sid], dart->regs + DART_TCR(sid)); } -- cgit v1.2.3 From a772a02c18342bcc14bdc499ca400c2139b4ddde Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 13 Jan 2023 19:50:28 +0900 Subject: iommu: dart: Fix DART_PARAMS1/2 bit define names They didn't have the PARAMS reg index in them, but they should. Reviewed-by: Sven Peter Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113105029.26654-6-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 82b2581d909c..89be055a44be 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -40,10 +40,10 @@ #define DART_PARAMS1 0x00 -#define DART_PARAMS_PAGE_SHIFT GENMASK(27, 24) +#define DART_PARAMS1_PAGE_SHIFT GENMASK(27, 24) #define DART_PARAMS2 0x04 -#define DART_PARAMS_BYPASS_SUPPORT BIT(0) +#define DART_PARAMS2_BYPASS_SUPPORT BIT(0) #define DART_STREAM_COMMAND 0x20 #define DART_STREAM_COMMAND_BUSY BIT(2) @@ -893,8 +893,8 @@ static int apple_dart_probe(struct platform_device *pdev) dart_params[0] = readl(dart->regs + DART_PARAMS1); dart_params[1] = readl(dart->regs + DART_PARAMS2); - dart->pgsize = 1 << FIELD_GET(DART_PARAMS_PAGE_SHIFT, dart_params[0]); - dart->supports_bypass = dart_params[1] & DART_PARAMS_BYPASS_SUPPORT; + dart->pgsize = 1 << FIELD_GET(DART_PARAMS1_PAGE_SHIFT, dart_params[0]); + dart->supports_bypass = dart_params[1] & DART_PARAMS2_BYPASS_SUPPORT; dart->num_streams = dart->hw->max_sid_count; -- cgit v1.2.3 From b76c68fcb44025c1b6ee329fa00738a4e9420d57 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 13 Jan 2023 19:50:29 +0900 Subject: iommu: dart: Support different variants with different registers T8110 has a new register layout. To accommodate this, first move all the register offsets to the hw structure, and rename all the existing registers to DART_T8020_*. Reviewed-by: Sven Peter Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113105029.26654-7-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 187 ++++++++++++++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 63 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 89be055a44be..75647433df9d 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -38,6 +38,7 @@ #define DART_MAX_TTBR 4 #define MAX_DARTS_PER_DEVICE 2 +/* Common registers */ #define DART_PARAMS1 0x00 #define DART_PARAMS1_PAGE_SHIFT GENMASK(27, 24) @@ -45,52 +46,78 @@ #define DART_PARAMS2 0x04 #define DART_PARAMS2_BYPASS_SUPPORT BIT(0) -#define DART_STREAM_COMMAND 0x20 -#define DART_STREAM_COMMAND_BUSY BIT(2) -#define DART_STREAM_COMMAND_INVALIDATE BIT(20) +/* T8020/T6000 registers */ -#define DART_STREAM_SELECT 0x34 +#define DART_T8020_STREAM_COMMAND 0x20 +#define DART_T8020_STREAM_COMMAND_BUSY BIT(2) +#define DART_T8020_STREAM_COMMAND_INVALIDATE BIT(20) -#define DART_ERROR 0x40 -#define DART_ERROR_STREAM GENMASK(27, 24) -#define DART_ERROR_CODE GENMASK(11, 0) -#define DART_ERROR_FLAG BIT(31) +#define DART_T8020_STREAM_SELECT 0x34 -#define DART_ERROR_READ_FAULT BIT(4) -#define DART_ERROR_WRITE_FAULT BIT(3) -#define DART_ERROR_NO_PTE BIT(2) -#define DART_ERROR_NO_PMD BIT(1) -#define DART_ERROR_NO_TTBR BIT(0) +#define DART_T8020_ERROR 0x40 +#define DART_T8020_ERROR_STREAM GENMASK(27, 24) +#define DART_T8020_ERROR_CODE GENMASK(11, 0) +#define DART_T8020_ERROR_FLAG BIT(31) -#define DART_CONFIG 0x60 -#define DART_CONFIG_LOCK BIT(15) +#define DART_T8020_ERROR_READ_FAULT BIT(4) +#define DART_T8020_ERROR_WRITE_FAULT BIT(3) +#define DART_T8020_ERROR_NO_PTE BIT(2) +#define DART_T8020_ERROR_NO_PMD BIT(1) +#define DART_T8020_ERROR_NO_TTBR BIT(0) + +#define DART_T8020_CONFIG 0x60 +#define DART_T8020_CONFIG_LOCK BIT(15) #define DART_STREAM_COMMAND_BUSY_TIMEOUT 100 -#define DART_ERROR_ADDR_HI 0x54 -#define DART_ERROR_ADDR_LO 0x50 +#define DART_T8020_ERROR_ADDR_HI 0x54 +#define DART_T8020_ERROR_ADDR_LO 0x50 + +#define DART_T8020_STREAMS_ENABLE 0xfc -#define DART_STREAMS_ENABLE 0xfc +#define DART_T8020_TCR 0x100 +#define DART_T8020_TCR_TRANSLATE_ENABLE BIT(7) +#define DART_T8020_TCR_BYPASS_DART BIT(8) +#define DART_T8020_TCR_BYPASS_DAPF BIT(12) -#define DART_TCR(sid) (0x100 + 4 * (sid)) -#define DART_TCR_TRANSLATE_ENABLE BIT(7) -#define DART_TCR_BYPASS0_ENABLE BIT(8) -#define DART_TCR_BYPASS1_ENABLE BIT(12) +#define DART_T8020_TTBR 0x200 +#define DART_T8020_TTBR_VALID BIT(31) +#define DART_T8020_TTBR_ADDR_FIELD_SHIFT 0 +#define DART_T8020_TTBR_SHIFT 12 -#define DART_TTBR_VALID BIT(31) -#define DART_TTBR_SHIFT 12 +#define DART_TCR(dart, sid) ((dart)->hw->tcr + ((sid) << 2)) -#define DART_TTBR(dart, sid, idx) (0x200 + \ +#define DART_TTBR(dart, sid, idx) ((dart)->hw->ttbr + \ (((dart)->hw->ttbr_count * (sid)) << 2) + \ ((idx) << 2)) +struct apple_dart_stream_map; struct apple_dart_hw { + irqreturn_t (*irq_handler)(int irq, void *dev); + int (*invalidate_tlb)(struct apple_dart_stream_map *stream_map); + u32 oas; enum io_pgtable_fmt fmt; int max_sid_count; + u64 lock; + u64 lock_bit; + + u64 error; + + u64 enable_streams; + + u64 tcr; + u64 tcr_enabled; + u64 tcr_disabled; + u64 tcr_bypass; + + u64 ttbr; + u64 ttbr_valid; + u64 ttbr_addr_field_shift; + u64 ttbr_shift; int ttbr_count; }; @@ -217,8 +244,7 @@ apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map) int sid; for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) - writel(DART_TCR_TRANSLATE_ENABLE, - dart->regs + DART_TCR(sid)); + writel(dart->hw->tcr_enabled, dart->regs + DART_TCR(dart, sid)); } static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map) @@ -227,7 +253,7 @@ static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map) int sid; for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) - writel(0, dart->regs + DART_TCR(sid)); + writel(dart->hw->tcr_disabled, dart->regs + DART_TCR(dart, sid)); } static void @@ -238,8 +264,8 @@ apple_dart_hw_enable_bypass(struct apple_dart_stream_map *stream_map) WARN_ON(!stream_map->dart->supports_bypass); for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) - writel(DART_TCR_BYPASS0_ENABLE | DART_TCR_BYPASS1_ENABLE, - dart->regs + DART_TCR(sid)); + writel(dart->hw->tcr_bypass, + dart->regs + DART_TCR(dart, sid)); } static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map, @@ -248,9 +274,10 @@ static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map, struct apple_dart *dart = stream_map->dart; int sid; - WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1)); + WARN_ON(paddr & ((1 << dart->hw->ttbr_shift) - 1)); for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) - writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT), + writel(dart->hw->ttbr_valid | + (paddr >> dart->hw->ttbr_shift) << dart->hw->ttbr_addr_field_shift, dart->regs + DART_TTBR(dart, sid, idx)); } @@ -274,7 +301,7 @@ apple_dart_hw_clear_all_ttbrs(struct apple_dart_stream_map *stream_map) } static int -apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, +apple_dart_t8020_hw_stream_command(struct apple_dart_stream_map *stream_map, u32 command) { unsigned long flags; @@ -283,12 +310,12 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, spin_lock_irqsave(&stream_map->dart->lock, flags); - writel(stream_map->sidmap[0], stream_map->dart->regs + DART_STREAM_SELECT); - writel(command, stream_map->dart->regs + DART_STREAM_COMMAND); + writel(stream_map->sidmap[0], stream_map->dart->regs + DART_T8020_STREAM_SELECT); + writel(command, stream_map->dart->regs + DART_T8020_STREAM_COMMAND); ret = readl_poll_timeout_atomic( - stream_map->dart->regs + DART_STREAM_COMMAND, command_reg, - !(command_reg & DART_STREAM_COMMAND_BUSY), 1, + stream_map->dart->regs + DART_T8020_STREAM_COMMAND, command_reg, + !(command_reg & DART_T8020_STREAM_COMMAND_BUSY), 1, DART_STREAM_COMMAND_BUSY_TIMEOUT); spin_unlock_irqrestore(&stream_map->dart->lock, flags); @@ -304,10 +331,10 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, } static int -apple_dart_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) +apple_dart_t8020_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) { - return apple_dart_hw_stream_command(stream_map, - DART_STREAM_COMMAND_INVALIDATE); + return apple_dart_t8020_hw_stream_command( + stream_map, DART_T8020_STREAM_COMMAND_INVALIDATE); } static int apple_dart_hw_reset(struct apple_dart *dart) @@ -316,8 +343,8 @@ static int apple_dart_hw_reset(struct apple_dart *dart) struct apple_dart_stream_map stream_map; int i; - config = readl(dart->regs + DART_CONFIG); - if (config & DART_CONFIG_LOCK) { + config = readl(dart->regs + dart->hw->lock); + if (config & dart->hw->lock_bit) { dev_err(dart->dev, "DART is locked down until reboot: %08x\n", config); return -EINVAL; @@ -331,12 +358,12 @@ static int apple_dart_hw_reset(struct apple_dart *dart) /* enable all streams globally since TCR is used to control isolation */ for (i = 0; i < BITS_TO_U32(dart->num_streams); i++) - writel(U32_MAX, dart->regs + DART_STREAMS_ENABLE + 4 * i); + writel(U32_MAX, dart->regs + dart->hw->enable_streams + 4 * i); /* clear any pending errors before the interrupt is unmasked */ - writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR); + writel(readl(dart->regs + dart->hw->error), dart->regs + dart->hw->error); - return apple_dart_hw_invalidate_tlb(&stream_map); + return dart->hw->invalidate_tlb(&stream_map); } static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain) @@ -351,7 +378,7 @@ static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain) for (j = 0; j < BITS_TO_LONGS(stream_map.dart->num_streams); j++) stream_map.sidmap[j] = atomic_long_read(&domain_stream_map->sidmap[j]); - apple_dart_hw_invalidate_tlb(&stream_map); + stream_map.dart->hw->invalidate_tlb(&stream_map); } } @@ -425,7 +452,7 @@ apple_dart_setup_translation(struct apple_dart_domain *domain, apple_dart_hw_clear_ttbr(stream_map, i); apple_dart_hw_enable_translation(stream_map); - apple_dart_hw_invalidate_tlb(stream_map); + stream_map->dart->hw->invalidate_tlb(stream_map); } static int apple_dart_finalize_domain(struct iommu_domain *domain, @@ -816,30 +843,30 @@ static const struct iommu_ops apple_dart_iommu_ops = { } }; -static irqreturn_t apple_dart_irq(int irq, void *dev) +static irqreturn_t apple_dart_t8020_irq(int irq, void *dev) { struct apple_dart *dart = dev; const char *fault_name = NULL; - u32 error = readl(dart->regs + DART_ERROR); - u32 error_code = FIELD_GET(DART_ERROR_CODE, error); - u32 addr_lo = readl(dart->regs + DART_ERROR_ADDR_LO); - u32 addr_hi = readl(dart->regs + DART_ERROR_ADDR_HI); + u32 error = readl(dart->regs + DART_T8020_ERROR); + u32 error_code = FIELD_GET(DART_T8020_ERROR_CODE, error); + u32 addr_lo = readl(dart->regs + DART_T8020_ERROR_ADDR_LO); + u32 addr_hi = readl(dart->regs + DART_T8020_ERROR_ADDR_HI); u64 addr = addr_lo | (((u64)addr_hi) << 32); - u8 stream_idx = FIELD_GET(DART_ERROR_STREAM, error); + u8 stream_idx = FIELD_GET(DART_T8020_ERROR_STREAM, error); - if (!(error & DART_ERROR_FLAG)) + if (!(error & DART_T8020_ERROR_FLAG)) return IRQ_NONE; /* there should only be a single bit set but let's use == to be sure */ - if (error_code == DART_ERROR_READ_FAULT) + if (error_code == DART_T8020_ERROR_READ_FAULT) fault_name = "READ FAULT"; - else if (error_code == DART_ERROR_WRITE_FAULT) + else if (error_code == DART_T8020_ERROR_WRITE_FAULT) fault_name = "WRITE FAULT"; - else if (error_code == DART_ERROR_NO_PTE) + else if (error_code == DART_T8020_ERROR_NO_PTE) fault_name = "NO PTE FOR IOVA"; - else if (error_code == DART_ERROR_NO_PMD) + else if (error_code == DART_T8020_ERROR_NO_PMD) fault_name = "NO PMD FOR IOVA"; - else if (error_code == DART_ERROR_NO_TTBR) + else if (error_code == DART_T8020_ERROR_NO_TTBR) fault_name = "NO TTBR FOR IOVA"; else fault_name = "unknown"; @@ -849,7 +876,7 @@ static irqreturn_t apple_dart_irq(int irq, void *dev) "translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx", error, stream_idx, error_code, fault_name, addr); - writel(error, dart->regs + DART_ERROR); + writel(error, dart->regs + DART_T8020_ERROR); return IRQ_HANDLED; } @@ -911,7 +938,7 @@ static int apple_dart_probe(struct platform_device *pdev) if (ret) goto err_clk_disable; - ret = request_irq(dart->irq, apple_dart_irq, IRQF_SHARED, + ret = request_irq(dart->irq, dart->hw->irq_handler, IRQF_SHARED, "apple-dart fault handler", dart); if (ret) goto err_clk_disable; @@ -959,17 +986,51 @@ static int apple_dart_remove(struct platform_device *pdev) } static const struct apple_dart_hw apple_dart_hw_t8103 = { + .irq_handler = apple_dart_t8020_irq, + .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, .oas = 36, .fmt = APPLE_DART, .max_sid_count = 16, + .enable_streams = DART_T8020_STREAMS_ENABLE, + .lock = DART_T8020_CONFIG, + .lock_bit = DART_T8020_CONFIG_LOCK, + + .error = DART_T8020_ERROR, + + .tcr = DART_T8020_TCR, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART, + + .ttbr = DART_T8020_TTBR, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8020_TTBR_SHIFT, .ttbr_count = 4, }; static const struct apple_dart_hw apple_dart_hw_t6000 = { + .irq_handler = apple_dart_t8020_irq, + .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, .oas = 42, .fmt = APPLE_DART2, .max_sid_count = 16, + .enable_streams = DART_T8020_STREAMS_ENABLE, + .lock = DART_T8020_CONFIG, + .lock_bit = DART_T8020_CONFIG_LOCK, + + .error = DART_T8020_ERROR, + + .tcr = DART_T8020_TCR, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART, + + .ttbr = DART_T8020_TTBR, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8020_TTBR_SHIFT, .ttbr_count = 4, }; @@ -979,7 +1040,7 @@ static __maybe_unused int apple_dart_suspend(struct device *dev) unsigned int sid, idx; for (sid = 0; sid < dart->num_streams; sid++) { - dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(sid)); + dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(dart, sid)); for (idx = 0; idx < dart->hw->ttbr_count; idx++) dart->save_ttbr[sid][idx] = readl(dart->regs + DART_TTBR(dart, sid, idx)); @@ -1004,7 +1065,7 @@ static __maybe_unused int apple_dart_resume(struct device *dev) for (idx = 0; idx < dart->hw->ttbr_count; idx++) writel(dart->save_ttbr[sid][idx], dart->regs + DART_TTBR(dart, sid, idx)); - writel(dart->save_tcr[sid], dart->regs + DART_TCR(sid)); + writel(dart->save_tcr[sid], dart->regs + DART_TCR(dart, sid)); } return 0; -- cgit v1.2.3 From d8bcc870d99d7e523df31ab60f1228a8d9061191 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Fri, 13 Jan 2023 19:50:30 +0900 Subject: iommu: dart: Add t8110 DART support Now that we have the driver properly parameterized, we can add support for T8110 DARTs. These DARTs drop the multiple TTBRs (which only make sense with legacy 4K page platforms) and instead add support for new features and more stream IDs. The register layout is different, but the pagetable format is the same as T6000. Reviewed-by: Sven Peter Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20230113105029.26654-8-marcan@marcan.st Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 205 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 200 insertions(+), 5 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 75647433df9d..42666617803d 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -85,6 +85,62 @@ #define DART_T8020_TTBR_ADDR_FIELD_SHIFT 0 #define DART_T8020_TTBR_SHIFT 12 +/* T8110 registers */ + +#define DART_T8110_PARAMS3 0x08 +#define DART_T8110_PARAMS3_PA_WIDTH GENMASK(29, 24) +#define DART_T8110_PARAMS3_VA_WIDTH GENMASK(21, 16) +#define DART_T8110_PARAMS3_VER_MAJ GENMASK(15, 8) +#define DART_T8110_PARAMS3_VER_MIN GENMASK(7, 0) + +#define DART_T8110_PARAMS4 0x0c +#define DART_T8110_PARAMS4_NUM_CLIENTS GENMASK(24, 16) +#define DART_T8110_PARAMS4_NUM_SIDS GENMASK(8, 0) + +#define DART_T8110_TLB_CMD 0x80 +#define DART_T8110_TLB_CMD_BUSY BIT(31) +#define DART_T8110_TLB_CMD_OP GENMASK(10, 8) +#define DART_T8110_TLB_CMD_OP_FLUSH_ALL 0 +#define DART_T8110_TLB_CMD_OP_FLUSH_SID 1 +#define DART_T8110_TLB_CMD_STREAM GENMASK(7, 0) + +#define DART_T8110_ERROR 0x100 +#define DART_T8110_ERROR_STREAM GENMASK(27, 20) +#define DART_T8110_ERROR_CODE GENMASK(14, 0) +#define DART_T8110_ERROR_FLAG BIT(31) + +#define DART_T8110_ERROR_MASK 0x104 + +#define DART_T8110_ERROR_READ_FAULT BIT(4) +#define DART_T8110_ERROR_WRITE_FAULT BIT(3) +#define DART_T8110_ERROR_NO_PTE BIT(3) +#define DART_T8110_ERROR_NO_PMD BIT(2) +#define DART_T8110_ERROR_NO_PGD BIT(1) +#define DART_T8110_ERROR_NO_TTBR BIT(0) + +#define DART_T8110_ERROR_ADDR_LO 0x170 +#define DART_T8110_ERROR_ADDR_HI 0x174 + +#define DART_T8110_PROTECT 0x200 +#define DART_T8110_UNPROTECT 0x204 +#define DART_T8110_PROTECT_LOCK 0x208 +#define DART_T8110_PROTECT_TTBR_TCR BIT(0) + +#define DART_T8110_ENABLE_STREAMS 0xc00 +#define DART_T8110_DISABLE_STREAMS 0xc20 + +#define DART_T8110_TCR 0x1000 +#define DART_T8110_TCR_REMAP GENMASK(11, 8) +#define DART_T8110_TCR_REMAP_EN BIT(7) +#define DART_T8110_TCR_BYPASS_DAPF BIT(2) +#define DART_T8110_TCR_BYPASS_DART BIT(1) +#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0) + +#define DART_T8110_TTBR 0x1400 +#define DART_T8110_TTBR_VALID BIT(0) +#define DART_T8110_TTBR_ADDR_FIELD_SHIFT 2 +#define DART_T8110_TTBR_SHIFT 14 + #define DART_TCR(dart, sid) ((dart)->hw->tcr + ((sid) << 2)) #define DART_TTBR(dart, sid, idx) ((dart)->hw->ttbr + \ @@ -93,7 +149,14 @@ struct apple_dart_stream_map; +enum dart_type { + DART_T8020, + DART_T6000, + DART_T8110, +}; + struct apple_dart_hw { + enum dart_type type; irqreturn_t (*irq_handler)(int irq, void *dev); int (*invalidate_tlb)(struct apple_dart_stream_map *stream_map); @@ -149,6 +212,8 @@ struct apple_dart { spinlock_t lock; + u32 ias; + u32 oas; u32 pgsize; u32 num_streams; u32 supports_bypass : 1; @@ -330,6 +395,44 @@ apple_dart_t8020_hw_stream_command(struct apple_dart_stream_map *stream_map, return 0; } +static int +apple_dart_t8110_hw_tlb_command(struct apple_dart_stream_map *stream_map, + u32 command) +{ + struct apple_dart *dart = stream_map->dart; + unsigned long flags; + int ret = 0; + int sid; + + spin_lock_irqsave(&dart->lock, flags); + + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) { + u32 val = FIELD_PREP(DART_T8110_TLB_CMD_OP, command) | + FIELD_PREP(DART_T8110_TLB_CMD_STREAM, sid); + writel(val, dart->regs + DART_T8110_TLB_CMD); + + ret = readl_poll_timeout_atomic( + dart->regs + DART_T8110_TLB_CMD, val, + !(val & DART_T8110_TLB_CMD_BUSY), 1, + DART_STREAM_COMMAND_BUSY_TIMEOUT); + + if (ret) + break; + + } + + spin_unlock_irqrestore(&dart->lock, flags); + + if (ret) { + dev_err(stream_map->dart->dev, + "busy bit did not clear after command %x for stream %d\n", + command, sid); + return ret; + } + + return 0; +} + static int apple_dart_t8020_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) { @@ -337,6 +440,13 @@ apple_dart_t8020_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) stream_map, DART_T8020_STREAM_COMMAND_INVALIDATE); } +static int +apple_dart_t8110_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) +{ + return apple_dart_t8110_hw_tlb_command( + stream_map, DART_T8110_TLB_CMD_OP_FLUSH_SID); +} + static int apple_dart_hw_reset(struct apple_dart *dart) { u32 config; @@ -363,6 +473,9 @@ static int apple_dart_hw_reset(struct apple_dart *dart) /* clear any pending errors before the interrupt is unmasked */ writel(readl(dart->regs + dart->hw->error), dart->regs + dart->hw->error); + if (dart->hw->type == DART_T8110) + writel(0, dart->regs + DART_T8110_ERROR_MASK); + return dart->hw->invalidate_tlb(&stream_map); } @@ -478,8 +591,8 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, pgtbl_cfg = (struct io_pgtable_cfg){ .pgsize_bitmap = dart->pgsize, - .ias = 32, - .oas = dart->hw->oas, + .ias = dart->ias, + .oas = dart->oas, .coherent_walk = 1, .iommu_dev = dart->dev, }; @@ -493,7 +606,7 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_start = 0; - domain->geometry.aperture_end = DMA_BIT_MASK(32); + domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias); domain->geometry.force_aperture = true; dart_domain->finalized = true; @@ -880,10 +993,49 @@ static irqreturn_t apple_dart_t8020_irq(int irq, void *dev) return IRQ_HANDLED; } +static irqreturn_t apple_dart_t8110_irq(int irq, void *dev) +{ + struct apple_dart *dart = dev; + const char *fault_name = NULL; + u32 error = readl(dart->regs + DART_T8110_ERROR); + u32 error_code = FIELD_GET(DART_T8110_ERROR_CODE, error); + u32 addr_lo = readl(dart->regs + DART_T8110_ERROR_ADDR_LO); + u32 addr_hi = readl(dart->regs + DART_T8110_ERROR_ADDR_HI); + u64 addr = addr_lo | (((u64)addr_hi) << 32); + u8 stream_idx = FIELD_GET(DART_T8110_ERROR_STREAM, error); + + if (!(error & DART_T8110_ERROR_FLAG)) + return IRQ_NONE; + + /* there should only be a single bit set but let's use == to be sure */ + if (error_code == DART_T8110_ERROR_READ_FAULT) + fault_name = "READ FAULT"; + else if (error_code == DART_T8110_ERROR_WRITE_FAULT) + fault_name = "WRITE FAULT"; + else if (error_code == DART_T8110_ERROR_NO_PTE) + fault_name = "NO PTE FOR IOVA"; + else if (error_code == DART_T8110_ERROR_NO_PMD) + fault_name = "NO PMD FOR IOVA"; + else if (error_code == DART_T8110_ERROR_NO_PGD) + fault_name = "NO PGD FOR IOVA"; + else if (error_code == DART_T8110_ERROR_NO_TTBR) + fault_name = "NO TTBR FOR IOVA"; + else + fault_name = "unknown"; + + dev_err_ratelimited( + dart->dev, + "translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx", + error, stream_idx, error_code, fault_name, addr); + + writel(error, dart->regs + DART_T8110_ERROR); + return IRQ_HANDLED; +} + static int apple_dart_probe(struct platform_device *pdev) { int ret; - u32 dart_params[2]; + u32 dart_params[4]; struct resource *res; struct apple_dart *dart; struct device *dev = &pdev->dev; @@ -923,7 +1075,22 @@ static int apple_dart_probe(struct platform_device *pdev) dart->pgsize = 1 << FIELD_GET(DART_PARAMS1_PAGE_SHIFT, dart_params[0]); dart->supports_bypass = dart_params[1] & DART_PARAMS2_BYPASS_SUPPORT; - dart->num_streams = dart->hw->max_sid_count; + switch (dart->hw->type) { + case DART_T8020: + case DART_T6000: + dart->ias = 32; + dart->oas = dart->hw->oas; + dart->num_streams = dart->hw->max_sid_count; + break; + + case DART_T8110: + dart_params[2] = readl(dart->regs + DART_T8110_PARAMS3); + dart_params[3] = readl(dart->regs + DART_T8110_PARAMS4); + dart->ias = FIELD_GET(DART_T8110_PARAMS3_VA_WIDTH, dart_params[2]); + dart->oas = FIELD_GET(DART_T8110_PARAMS3_PA_WIDTH, dart_params[2]); + dart->num_streams = FIELD_GET(DART_T8110_PARAMS4_NUM_SIDS, dart_params[3]); + break; + } if (dart->num_streams > DART_MAX_STREAMS) { dev_err(&pdev->dev, "Too many streams (%d > %d)\n", @@ -986,6 +1153,7 @@ static int apple_dart_remove(struct platform_device *pdev) } static const struct apple_dart_hw apple_dart_hw_t8103 = { + .type = DART_T8020, .irq_handler = apple_dart_t8020_irq, .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, .oas = 36, @@ -1010,6 +1178,7 @@ static const struct apple_dart_hw apple_dart_hw_t8103 = { .ttbr_count = 4, }; static const struct apple_dart_hw apple_dart_hw_t6000 = { + .type = DART_T6000, .irq_handler = apple_dart_t8020_irq, .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, .oas = 42, @@ -1034,6 +1203,31 @@ static const struct apple_dart_hw apple_dart_hw_t6000 = { .ttbr_count = 4, }; +static const struct apple_dart_hw apple_dart_hw_t8110 = { + .type = DART_T8110, + .irq_handler = apple_dart_t8110_irq, + .invalidate_tlb = apple_dart_t8110_hw_invalidate_tlb, + .fmt = APPLE_DART2, + .max_sid_count = 256, + + .enable_streams = DART_T8110_ENABLE_STREAMS, + .lock = DART_T8110_PROTECT, + .lock_bit = DART_T8110_PROTECT_TTBR_TCR, + + .error = DART_T8110_ERROR, + + .tcr = DART_T8110_TCR, + .tcr_enabled = DART_T8110_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART, + + .ttbr = DART_T8110_TTBR, + .ttbr_valid = DART_T8110_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8110_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8110_TTBR_SHIFT, + .ttbr_count = 1, +}; + static __maybe_unused int apple_dart_suspend(struct device *dev) { struct apple_dart *dart = dev_get_drvdata(dev); @@ -1075,6 +1269,7 @@ DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resum static const struct of_device_id apple_dart_of_match[] = { { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, + { .compatible = "apple,t8110-dart", .data = &apple_dart_hw_t8110 }, { .compatible = "apple,t6000-dart", .data = &apple_dart_hw_t6000 }, {}, }; -- cgit v1.2.3 From 05d227efbd8d3ce28c3a87b66b5794235be197f4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 17 Jan 2023 04:40:38 +0000 Subject: iommu/amd: Do not clear event/ppr log buffer when snp is enabled Current code clears event log and ppr log entry after processing it due to hardware errata ([1] erratum #732, #733). We do not have hardware issue on SNP enabled system. When SNP is enabled, the event logs, PPR log and completion wait buffer are read-only to the host (see SNP FW ABI spec [2]). Clearing those entry will result in a kernel #PF for an RMP violation. Hence do not clear event and ppr log entry after processing it. [1] http://developer.amd.com/wordpress/media/2012/10/48931_15h_Mod_10h-1Fh_Rev_Guide.pdf [2] https://www.amd.com/system/files/TechDocs/56860.pdf Signed-off-by: Tom Lendacky Signed-off-by: Vasant Hegde Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20230117044038.5728-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7b4390a2f726..33c134a2b581 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -667,7 +667,14 @@ retry: event[0], event[1], event[2], event[3]); } - memset(__evt, 0, 4 * sizeof(u32)); + /* + * To detect the hardware errata 732 we need to clear the + * entry back to zero. This issue does not exist on SNP + * enabled system. Also this buffer is not writeable on + * SNP enabled system. + */ + if (!amd_iommu_snp_en) + memset(__evt, 0, 4 * sizeof(u32)); } static void iommu_poll_events(struct amd_iommu *iommu) @@ -736,10 +743,13 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) entry[1] = raw[1]; /* - * To detect the hardware bug we need to clear the entry - * back to zero. + * To detect the hardware errata 733 we need to clear the + * entry back to zero. This issue does not exist on SNP + * enabled system. Also this buffer is not writeable on + * SNP enabled system. */ - raw[0] = raw[1] = 0UL; + if (!amd_iommu_snp_en) + raw[0] = raw[1] = 0UL; /* Update head pointer of hardware ring-buffer */ head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; -- cgit v1.2.3 From 53719876e064643a6e58b5e6067a149a0fd191ec Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 4 Jan 2023 17:57:02 +0800 Subject: iommu/exynos: Fix error handling in exynos_iommu_init() If platform_driver_register() fails, it don't need unregister and call kmem_cache_free() to free the memory allocated before calling register. Fixes: bbc4d205d93f ("iommu/exynos: Fix driver initialization sequence") Signed-off-by: Yang Yingliang Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20230104095702.2591122-1-yangyingliang@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index b0cde2211987..c1d579c24740 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1446,7 +1446,7 @@ static int __init exynos_iommu_init(void) return 0; err_reg_driver: - platform_driver_unregister(&exynos_sysmmu_driver); + kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); err_zero_lv2: kmem_cache_destroy(lv2table_kmem_cache); return ret; -- cgit v1.2.3 From c2b83395e558ada255fead5b9c9c724d9d487cdb Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Mon, 12 Dec 2022 13:10:53 +0100 Subject: iommu/arm-smmu-qcom: Add SM8150 DPU compatible Add the SM8150 DPU compatible to clients compatible list, as it also needs the workarounds. Signed-off-by: Marijn Suijten Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20221212121054.193059-1-konrad.dybcio@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 91d404deb115..ed884c85e262 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -250,6 +250,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, + { .compatible = "qcom,sm8150-mdss" }, { .compatible = "qcom,sm8250-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, -- cgit v1.2.3 From 5fba66d42746c9b8063de7ce4565d0173da657a2 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 12 Jan 2023 20:11:04 -0800 Subject: iommu/arm-smmu-qcom: Select identity domain for sc8280xp MDSS The Qualcomm display driver installs a translation domain once it has mapped a framebuffer. Use the identity domain for this device on SC8280XP as well, to avoid faults from EFI FB accessing the framebuffer while this is being set up. Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230113041104.4189152-1-quic_bjorande@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index ed884c85e262..d7ad49aa997e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -250,6 +250,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, + { .compatible = "qcom,sc8280xp-mdss" }, { .compatible = "qcom,sm8150-mdss" }, { .compatible = "qcom,sm8250-mdss" }, { .compatible = "qcom,sdm845-mdss" }, -- cgit v1.2.3 From a5bf3cfce8cb77d9d24613ab52d520896f83dd48 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:42:50 +0100 Subject: iommu: Implement of_iommu_get_resv_regions() This is an implementation that IOMMU drivers can use to obtain reserved memory regions from a device tree node. It uses the reserved-memory DT bindings to find the regions associated with a given device. If these regions are marked accordingly, identity mappings will be created for them in the IOMMU domain that the devices will be attached to. Cc: Frank Rowand Cc: devicetree@vger.kernel.org Reviewed-by: Rob Herring Acked-by: Robin Murphy Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20230120174251.4004100-4-thierry.reding@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/of_iommu.h | 8 +++++ 2 files changed, 102 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 00d98f08732f..3c2e2a13cf9c 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -171,3 +172,96 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, return ops; } + +static enum iommu_resv_type iommu_resv_region_get_type(struct device *dev, struct resource *phys, + phys_addr_t start, size_t length) +{ + phys_addr_t end = start + length - 1; + + /* + * IOMMU regions without an associated physical region cannot be + * mapped and are simply reservations. + */ + if (phys->start >= phys->end) + return IOMMU_RESV_RESERVED; + + /* may be IOMMU_RESV_DIRECT_RELAXABLE for certain cases */ + if (start == phys->start && end == phys->end) + return IOMMU_RESV_DIRECT; + + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys, + &start, &end); + return IOMMU_RESV_RESERVED; +} + +/** + * of_iommu_get_resv_regions - reserved region driver helper for device tree + * @dev: device for which to get reserved regions + * @list: reserved region list + * + * IOMMU drivers can use this to implement their .get_resv_regions() callback + * for memory regions attached to a device tree node. See the reserved-memory + * device tree bindings on how to use these: + * + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt + */ +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) +{ +#if IS_ENABLED(CONFIG_OF_ADDRESS) + struct of_phandle_iterator it; + int err; + + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { + const __be32 *maps, *end; + struct resource phys; + int size; + + memset(&phys, 0, sizeof(phys)); + + /* + * The "reg" property is optional and can be omitted by reserved-memory regions + * that represent reservations in the IOVA space, which are regions that should + * not be mapped. + */ + if (of_find_property(it.node, "reg", NULL)) { + err = of_address_to_resource(it.node, 0, &phys); + if (err < 0) { + dev_err(dev, "failed to parse memory region %pOF: %d\n", + it.node, err); + continue; + } + } + + maps = of_get_property(it.node, "iommu-addresses", &size); + if (!maps) + continue; + + end = maps + size / sizeof(__be32); + + while (maps < end) { + struct device_node *np; + u32 phandle; + + phandle = be32_to_cpup(maps++); + np = of_find_node_by_phandle(phandle); + + if (np == dev->of_node) { + int prot = IOMMU_READ | IOMMU_WRITE; + struct iommu_resv_region *region; + enum iommu_resv_type type; + phys_addr_t iova; + size_t length; + + maps = of_translate_dma_region(np, maps, &iova, &length); + type = iommu_resv_region_get_type(dev, &phys, iova, length); + + region = iommu_alloc_resv_region(iova, length, prot, type, + GFP_KERNEL); + if (region) + list_add_tail(®ion->list, list); + } + } + } +#endif +} +EXPORT_SYMBOL(of_iommu_get_resv_regions); diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 55c1eb300a86..9a5e6b410dd2 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); +extern void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list); + #else static inline const struct iommu_ops *of_iommu_configure(struct device *dev, @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, return NULL; } +static inline void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ +} + #endif /* CONFIG_OF_IOMMU */ #endif /* __OF_IOMMU_H */ -- cgit v1.2.3 From 5cef282e295f7cf623672470d040716d1e3eacf2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:42:51 +0100 Subject: iommu: dma: Use of_iommu_get_resv_regions() For device tree nodes, use the standard of_iommu_get_resv_regions() implementation to obtain the reserved memory regions associated with a device. Cc: Rob Herring Cc: Frank Rowand Cc: devicetree@vger.kernel.org Acked-by: Robin Murphy Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20230120174251.4004100-5-thierry.reding@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f798c44e0903..8e023f032b76 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -391,6 +392,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) iort_iommu_get_resv_regions(dev, list); + if (dev->of_node) + of_iommu_get_resv_regions(dev, list); } EXPORT_SYMBOL(iommu_dma_get_resv_regions); -- cgit v1.2.3 From 1369459b2e219a6f4c861404c4f195cd81dcbb40 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:54 -0400 Subject: iommu: Add a gfp parameter to iommu_map() The internal mechanisms support this, but instead of exposting the gfp to the caller it wrappers it into iommu_map() and iommu_map_atomic() Fix this instead of adding more variants for GFP_KERNEL_ACCOUNT. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/1-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- arch/arm/mm/dma-mapping.c | 11 +++++++---- .../gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 3 ++- drivers/gpu/drm/tegra/drm.c | 2 +- drivers/gpu/host1x/cdma.c | 2 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 4 ++-- drivers/iommu/dma-iommu.c | 2 +- drivers/iommu/iommu.c | 22 +++++++++++----------- drivers/iommu/iommufd/pages.c | 6 ++++-- drivers/media/platform/qcom/venus/firmware.c | 2 +- drivers/net/ipa/ipa_mem.c | 6 ++++-- drivers/net/wireless/ath/ath10k/snoc.c | 2 +- drivers/net/wireless/ath/ath11k/ahb.c | 4 ++-- drivers/remoteproc/remoteproc_core.c | 5 +++-- drivers/vfio/vfio_iommu_type1.c | 9 +++++---- drivers/vhost/vdpa.c | 2 +- include/linux/iommu.h | 4 ++-- 16 files changed, 48 insertions(+), 38 deletions(-) (limited to 'drivers/iommu') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c135f6e37a00..8bc01071474a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -984,7 +984,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, len = (j - i) << PAGE_SHIFT; ret = iommu_map(mapping->domain, iova, phys, len, - __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs)); + __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs), + GFP_KERNEL); if (ret < 0) goto fail; iova += len; @@ -1207,7 +1208,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, prot = __dma_info_to_prot(dir, attrs); - ret = iommu_map(mapping->domain, iova, phys, len, prot); + ret = iommu_map(mapping->domain, iova, phys, len, prot, + GFP_KERNEL); if (ret < 0) goto fail; count += len >> PAGE_SHIFT; @@ -1379,7 +1381,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, prot = __dma_info_to_prot(dir, attrs); - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, + prot, GFP_KERNEL); if (ret < 0) goto fail; @@ -1443,7 +1446,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; - ret = iommu_map(mapping->domain, dma_addr, addr, len, prot); + ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL); if (ret < 0) goto fail; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 648ecf5a8fbc..a4ac94a2ab57 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -475,7 +475,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, u32 offset = (r->offset + i) << imem->iommu_pgshift; ret = iommu_map(imem->domain, offset, node->dma_addrs[i], - PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret < 0) { nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 7bd2e65c2a16..6ca9f396e55b 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1057,7 +1057,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma) *dma = iova_dma_addr(&tegra->carveout.domain, alloc); err = iommu_map(tegra->domain, *dma, virt_to_phys(virt), - size, IOMMU_READ | IOMMU_WRITE); + size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (err < 0) goto free_iova; diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 103fda055394..4ddfcd2138c9 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) pb->dma = iova_dma_addr(&host1x->iova, alloc); err = iommu_map(host1x->domain, pb->dma, pb->phys, size, - IOMMU_READ); + IOMMU_READ, GFP_KERNEL); if (err) goto iommu_free_iova; } else { diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index c301b3be9f30..aeeaca65ace9 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -277,7 +277,7 @@ iter_chunk: usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", va_start, &pa_start, size, flags); err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + size, flags, GFP_KERNEL); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); @@ -294,7 +294,7 @@ iter_chunk: usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", va_start, &pa_start, size, flags); err = iommu_map(pd->domain, va_start, pa_start, - size, flags); + size, flags, GFP_KERNEL); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f798c44e0903..8bdb65e7686f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1615,7 +1615,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!iova) goto out_free_page; - if (iommu_map(domain, iova, msi_addr, size, prot)) + if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL)) goto out_free_iova; INIT_LIST_HEAD(&msi_page->list); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index de91dd88705b..4d596b8b89e8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -930,7 +930,7 @@ map_end: if (map_size) { ret = iommu_map(domain, addr - map_size, addr - map_size, map_size, - entry->prot); + entry->prot, GFP_KERNEL); if (ret) goto out; map_size = 0; @@ -2360,31 +2360,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static int _iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); if (ret == 0 && ops->iotlb_sync_map) ops->iotlb_sync_map(domain, iova, size); return ret; } - -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - might_sleep(); - return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map); int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); + return iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iommu_map_atomic); diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 1e1d3509efae..22cc3bb0c6c5 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain, size % PAGE_SIZE); while (size) { - rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); + rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, + GFP_KERNEL); if (rc) goto err_unmap; iova += PAGE_SIZE; @@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, - next_iova - iova, area->iommu_prot); + next_iova - iova, area->iommu_prot, + GFP_KERNEL); if (rc) goto err_unmap; iova = next_iova; diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 142d4c74017c..07d4dceb5e72 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -158,7 +158,7 @@ static int venus_boot_no_tz(struct venus_core *core, phys_addr_t mem_phys, core->fw.mapped_mem_size = mem_size; ret = iommu_map(iommu, VENUS_FW_START_ADDR, mem_phys, mem_size, - IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV, GFP_KERNEL); if (ret) { dev_err(dev, "could not map video firmware region\n"); return ret; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 9ec5af323f73..991a7d39f066 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -466,7 +466,8 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ - ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE); + ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret) return ret; @@ -574,7 +575,8 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ - ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE); + ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret) return ret; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index cfcb759a87de..9a82f0336d95 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1639,7 +1639,7 @@ static int ath10k_fw_init(struct ath10k *ar) ret = iommu_map(iommu_dom, ar_snoc->fw.fw_start_addr, ar->msa.paddr, ar->msa.mem_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath10k_err(ar, "failed to map firmware region: %d\n", ret); goto err_iommu_detach; diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index d34a4d6325b2..df8fdc7067f9 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1021,7 +1021,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ret = iommu_map(iommu_dom, ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath11k_err(ab, "failed to map firmware region: %d\n", ret); goto err_iommu_detach; @@ -1029,7 +1029,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ret = iommu_map(iommu_dom, ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath11k_err(ab, "failed to map firmware CE region: %d\n", ret); goto err_iommu_unmap; diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 1cd4815a6dd1..80072b6b6283 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -643,7 +643,8 @@ static int rproc_handle_devmem(struct rproc *rproc, void *ptr, if (!mapping) return -ENOMEM; - ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags); + ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags, + GFP_KERNEL); if (ret) { dev_err(dev, "failed to map devmem: %d\n", ret); goto out; @@ -737,7 +738,7 @@ static int rproc_alloc_carveout(struct rproc *rproc, } ret = iommu_map(rproc->domain, mem->da, dma, mem->len, - mem->flags); + mem->flags, GFP_KERNEL); if (ret) { dev_err(dev, "iommu_map failed: %d\n", ret); goto free_mapping; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 23c24fe98c00..e14f86a8ef52 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1480,7 +1480,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | IOMMU_CACHE); + npage << PAGE_SHIFT, prot | IOMMU_CACHE, + GFP_KERNEL); if (ret) goto unwind; @@ -1777,8 +1778,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, size = npage << PAGE_SHIFT; } - ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | IOMMU_CACHE); + ret = iommu_map(domain->domain, iova, phys, size, + dma->prot | IOMMU_CACHE, GFP_KERNEL); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1866,7 +1867,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) return; ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ec32f785dfde..fd1536de5b1d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -792,7 +792,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm)); + perm_to_iommu_flags(perm), GFP_KERNEL); } if (r) { vhost_iotlb_del_range(iotlb, iova, iova + size - 1); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..d2020994f292 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -467,7 +467,7 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -773,7 +773,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { return -ENODEV; } -- cgit v1.2.3 From 4dc6376af596d9b2a46fa9baf94c9f2fa5a3d246 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:55 -0400 Subject: iommu: Remove iommu_map_atomic() There is only one call site and it can now just pass the GFP_ATOMIC to the normal iommu_map(). Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 2 +- drivers/iommu/iommu.c | 7 ------- include/linux/iommu.h | 9 --------- 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8bdb65e7686f..7016db569f81 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -713,7 +713,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, if (!iova) return DMA_MAPPING_ERROR; - if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { + if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { iommu_dma_free_iova(cookie, iova, size, NULL); return DMA_MAPPING_ERROR; } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4d596b8b89e8..19b1c3315a2c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2381,13 +2381,6 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, } EXPORT_SYMBOL_GPL(iommu_map); -int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - return iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); -} -EXPORT_SYMBOL_GPL(iommu_map_atomic); - static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d2020994f292..521cd79700f4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -468,8 +468,6 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); -extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, @@ -778,13 +776,6 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } -static inline int iommu_map_atomic(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - return -ENODEV; -} - static inline size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { -- cgit v1.2.3 From f2b2c051be6262edc3c6fce50d3d4f01b59ba228 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:56 -0400 Subject: iommu: Add a gfp parameter to iommu_map_sg() Follow the pattern for iommu_map() and remove iommu_map_sg_atomic(). This allows __iommu_dma_alloc_noncontiguous() to use a GFP_KERNEL allocation here, based on the provided gfp flags. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 5 +++-- drivers/iommu/iommu.c | 26 ++++++++++---------------- include/linux/iommu.h | 18 +++++------------- 3 files changed, 18 insertions(+), 31 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7016db569f81..72cfa24503b8 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -833,7 +833,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, arch_dma_prep_coherent(sg_page(sg), sg->length); } - ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot); + ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, + GFP_ATOMIC); if (ret < 0 || ret < size) goto out_free_sg; @@ -1281,7 +1282,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * We'll leave any physical concatenation to the IOMMU driver's * implementation - it knows better than we do. */ - ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot); + ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); if (ret < 0 || ret < iova_len) goto out_free_iova; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 19b1c3315a2c..1b7d0698b045 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2470,9 +2470,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap_fast); -static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot, - gfp_t gfp) +ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot, + gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; @@ -2480,6 +2480,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, unsigned int i = 0; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + while (i <= nents) { phys_addr_t s_phys = sg_phys(sg); @@ -2519,21 +2526,8 @@ out_err: return ret; } - -ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - might_sleep(); - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map_sg); -ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); -} - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 521cd79700f4..d5c16dc33c87 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -474,10 +474,8 @@ extern size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot); -extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot); + struct scatterlist *sg, unsigned int nents, + int prot, gfp_t gfp); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -791,14 +789,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain, static inline ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) -{ - return -ENODEV; -} - -static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) + unsigned int nents, int prot, gfp_t gfp) { return -ENODEV; } @@ -1109,7 +1100,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, static inline size_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { - return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot); + return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, + GFP_KERNEL); } #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3 From 96d57808808595d33f98cef5b3c0f75dde6a72f2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:57 -0400 Subject: iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous() This function does an allocation of a buffer to return to the caller and then goes on to allocate some internal memory, eg the scatterlist and IOPTEs. Instead of hard wiring GFP_KERNEL and a wrong GFP_ATOMIC, continue to use the passed in gfp flags for all of the allocations. Clear the zone and policy bits that are only relevant for the buffer allocation before re-using them for internal allocations. Auditing says this is never called from an atomic context, so the GFP_ATOMIC is the incorrect flag. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 72cfa24503b8..c99e4bc55d8c 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -822,7 +822,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, if (!iova) goto out_free_pages; - if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL)) + /* + * Remove the zone/policy flags from the GFP - these are applied to the + * __iommu_dma_alloc_pages() but are not used for the supporting + * internal allocations that follow. + */ + gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP); + + if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp)) goto out_free_iova; if (!(ioprot & IOMMU_CACHE)) { @@ -834,7 +841,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, } ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, - GFP_ATOMIC); + gfp); if (ret < 0 || ret < size) goto out_free_sg; -- cgit v1.2.3 From e787a38e31e57c35682d363e71891956d804f7e3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:58 -0400 Subject: iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map() iommufd follows the same design as KVM and uses memory cgroups to limit the amount of kernel memory a iommufd file descriptor can pin down. The various internal data structures already use GFP_KERNEL_ACCOUNT. However, one of the biggest consumers of kernel memory is the IOPTEs stored under the iommu_domain. Many drivers will allocate these at iommu_map() time and will trivially do the right thing if we pass in GFP_KERNEL_ACCOUNT. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommufd/pages.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 22cc3bb0c6c5..f8d92c9bb65b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -457,7 +457,7 @@ static int batch_iommu_map_small(struct iommu_domain *domain, while (size) { rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova += PAGE_SIZE; @@ -502,7 +502,7 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, next_iova - iova, area->iommu_prot, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova = next_iova; -- cgit v1.2.3 From 2552d3a2292aa9bc14e84afe846027e9bd98eb78 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:59 -0400 Subject: iommu/intel: Add a gfp parameter to alloc_pgtable_page() This is eventually called by iommufd through intel_iommu_map_pages() and it should not be forced to atomic. Push the GFP_ATOMIC to all callers. Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 14 +++++++------- drivers/iommu/intel/iommu.h | 2 +- drivers/iommu/intel/pasid.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 59df7e42fd53..aa29561d3549 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node, gfp_t gfp) { struct page *page; void *vaddr = NULL; - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); if (page) vaddr = page_address(page); return vaddr; @@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node); + context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -935,7 +935,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid); + tmp_page = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!tmp_page) return NULL; @@ -1186,7 +1186,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node); + root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2676,7 +2676,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node); + new_ce = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!new_ce) goto out_unmap; @@ -4136,7 +4136,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 06e61e474856..ca9a035e0110 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -737,7 +737,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, extern int dmar_ir_support(void); -void *alloc_pgtable_page(int node); +void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb3c7020028d..c5bf74e9372d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -200,7 +200,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node); + entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; -- cgit v1.2.3 From 2d4d767659ff4d48464836a0a6de47e7540c6b13 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:36:00 -0400 Subject: iommu/intel: Support the gfp argument to the map_pages op Flow it down to alloc_pgtable_page() via pfn_to_dma_pte() and __domain_mapping(). Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index aa29561d3549..e95f7703ce7b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -908,7 +908,8 @@ pgtable_walk: #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_ATOMIC); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) + -- cgit v1.2.3 From 4951eb262384c24602b61b97d0bedcb54c3e4c9f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:36:01 -0400 Subject: iommu/intel: Use GFP_KERNEL in sleepable contexts These contexts are sleepable, so use the proper annotation. The GFP_ATOMIC was added mechanically in the prior patches. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e95f7703ce7b..a1a66798e1f0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2372,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE, GFP_ATOMIC); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2680,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; -- cgit v1.2.3 From d3b82825217aab0ed56e2426a0d07af2e2ebd7df Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:36:02 -0400 Subject: iommu/s390: Push the gfp parameter to the kmem_cache_alloc()'s dma_alloc_cpu_table() and dma_alloc_page_table() are eventually called by iommufd through s390_iommu_map_pages() and it should not be forced to atomic. Thread the gfp parameter through the call chain starting from s390_iommu_map_pages(). Reviewed-by: Niklas Schnelle Reviewed-by: Matthew Rosato Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci_dma.h | 5 +++-- arch/s390/pci/pci_dma.c | 31 +++++++++++++++++-------------- drivers/iommu/s390-iommu.c | 15 +++++++++------ 3 files changed, 29 insertions(+), 22 deletions(-) (limited to 'drivers/iommu') diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 91e63426bdc5..7119c04c51c5 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry) /* Prototypes */ void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(void); +unsigned long *dma_alloc_cpu_table(gfp_t gfp); void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr); +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp); void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); extern const struct dma_map_ops s390_pci_dma_ops; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index ea478d11fbd1..2f6d05d6da4f 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev) zdev->iommu_pages * PAGE_SIZE); } -unsigned long *dma_alloc_cpu_table(void) +unsigned long *dma_alloc_cpu_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_region_table_cache, gfp); if (!table) return NULL; @@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table) kmem_cache_free(dma_region_table_cache, table); } -static unsigned long *dma_alloc_page_table(void) +static unsigned long *dma_alloc_page_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_page_table_cache, gfp); if (!table) return NULL; @@ -63,7 +63,7 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) { unsigned long old_rte, rte; unsigned long *sto; @@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) if (reg_entry_isvalid(rte)) { sto = get_rt_sto(rte); } else { - sto = dma_alloc_cpu_table(); + sto = dma_alloc_cpu_table(gfp); if (!sto) return NULL; @@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *step) +static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) { unsigned long old_ste, ste; unsigned long *pto; @@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) if (reg_entry_isvalid(ste)) { pto = get_st_pto(ste); } else { - pto = dma_alloc_page_table(); + pto = dma_alloc_page_table(gfp); if (!pto) return NULL; set_st_pto(&ste, virt_to_phys(pto)); @@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) return pto; } -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp) { unsigned long *sto, *pto; unsigned int rtx, sx, px; rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx]); + sto = dma_get_seg_table_origin(&rto[rtx], gfp); if (!sto) return NULL; sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx]); + pto = dma_get_page_table_origin(&sto[sx], gfp); if (!pto) return NULL; @@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, return -EINVAL; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) { rc = -ENOMEM; goto undo_cpu_trans; @@ -186,7 +188,8 @@ undo_cpu_trans: while (i-- > 0) { page_addr -= PAGE_SIZE; dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) break; dma_update_cpu_trans(entry, page_addr, flags); @@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) spin_lock_init(&zdev->iommu_bitmap_lock); - zdev->dma_table = dma_alloc_cpu_table(); + zdev->dma_table = dma_alloc_cpu_table(GFP_ATOMIC); if (!zdev->dma_table) { rc = -ENOMEM; goto out; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ed33c6cce083..654ec4411fe3 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -52,7 +52,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) if (!s390_domain) return NULL; - s390_domain->dma_table = dma_alloc_cpu_table(); + s390_domain->dma_table = dma_alloc_cpu_table(GFP_ATOMIC); if (!s390_domain->dma_table) { kfree(s390_domain); return NULL; @@ -260,7 +260,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, static int s390_iommu_validate_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, - unsigned long nr_pages, int flags) + unsigned long nr_pages, int flags, + gfp_t gfp) { phys_addr_t page_addr = pa & PAGE_MASK; unsigned long *entry; @@ -268,7 +269,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, int rc; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + gfp); if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; @@ -284,7 +286,7 @@ undo_cpu_trans: while (i-- > 0) { dma_addr -= PAGE_SIZE; entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); + dma_addr, gfp); if (!entry) break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); @@ -301,7 +303,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, int rc = 0; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + GFP_ATOMIC); if (unlikely(!entry)) { rc = -EINVAL; break; @@ -339,7 +342,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_validate_trans(s390_domain, paddr, iova, - pgcount, flags); + pgcount, flags, gfp); if (!rc) *mapped = size; -- cgit v1.2.3 From 429f27e36874e34727a1f8495be2ea3a7060732c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:36:03 -0400 Subject: iommu/s390: Use GFP_KERNEL in sleepable contexts These contexts are sleepable, so use the proper annotation. The GFP_ATOMIC was added mechanically in the prior patches. Reviewed-by: Niklas Schnelle Reviewed-by: Matthew Rosato Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/10-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- arch/s390/pci/pci_dma.c | 2 +- drivers/iommu/s390-iommu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 2f6d05d6da4f..2d9b01d7ca4c 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -579,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) spin_lock_init(&zdev->iommu_bitmap_lock); - zdev->dma_table = dma_alloc_cpu_table(GFP_ATOMIC); + zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!zdev->dma_table) { rc = -ENOMEM; goto out; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 654ec4411fe3..7dcfffed260e 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -52,7 +52,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) if (!s390_domain) return NULL; - s390_domain->dma_table = dma_alloc_cpu_table(GFP_ATOMIC); + s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!s390_domain->dma_table) { kfree(s390_domain); return NULL; -- cgit v1.2.3 From c64074bfe2abbe18a57eb19c95f686298e8692b5 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Tue, 26 Jul 2022 23:07:38 +0300 Subject: iommu/exynos: Abstract getting the fault info Fault info obtaining is implemented for SysMMU v1..v5 in a very hardware specific way, as it relies on: - interrupt bits being tied to read or write access - having separate registers for the fault address w.r.t. AR/AW ops Newer SysMMU versions (like SysMMU v7) have different way of providing the fault info via registers: - the transaction type (read or write) should be read from the register (instead of hard-coding it w.r.t. corresponding interrupt status bit) - there is only one single register for storing the fault address Because of that, it is not possible to add newer SysMMU support into existing paradigm. Also it's not very effective performance-wise: - checking SysMMU version in ISR each time is not necessary - performing linear search to find the fault info by interrupt bit can be replaced with a single lookup operation Pave the way for adding support for new SysMMU versions by abstracting the getting of fault info in ISR. While at it, do some related style cleanups as well. This is mostly a refactoring patch, but there are some minor functional changes: - fault message format is a bit different; now instead of AR/AW prefixes for the fault's name, the request direction is printed as [READ]/[WRITE]. It has to be done to prepare an abstraction for SysMMU v7 support - don't panic on unknown interrupts; print corresponding message and continue - if fault wasn't recovered, panic with some sane message instead of just doing BUG_ON() The whole fault message looks like this now: [READ] PAGE FAULT occurred at 0x12341000 Signed-off-by: Sam Protsenko Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20220726200739.30017-2-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 162 ++++++++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 62 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c1d579c24740..75cade7382ab 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -185,38 +185,36 @@ static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova) lv2table_base(sent)) + lv2ent_offset(iova); } -/* - * IOMMU fault information register - */ -struct sysmmu_fault_info { - unsigned int bit; /* bit number in STATUS register */ - unsigned short addr_reg; /* register to read VA fault address */ +struct sysmmu_fault { + sysmmu_iova_t addr; /* IOVA address that caused fault */ + const char *name; /* human readable fault name */ + unsigned int type; /* fault type for report_iommu_fault() */ +}; + +struct sysmmu_v1_fault_info { + unsigned short addr_reg; /* register to read IOVA fault address */ const char *name; /* human readable fault name */ unsigned int type; /* fault type for report_iommu_fault */ }; -static const struct sysmmu_fault_info sysmmu_faults[] = { - { 0, REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ }, - { 1, REG_AR_FAULT_ADDR, "AR MULTI-HIT", IOMMU_FAULT_READ }, - { 2, REG_AW_FAULT_ADDR, "AW MULTI-HIT", IOMMU_FAULT_WRITE }, - { 3, REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ }, - { 4, REG_AR_FAULT_ADDR, "AR SECURITY PROTECTION", IOMMU_FAULT_READ }, - { 5, REG_AR_FAULT_ADDR, "AR ACCESS PROTECTION", IOMMU_FAULT_READ }, - { 6, REG_AW_FAULT_ADDR, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE }, - { 7, REG_AW_FAULT_ADDR, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE }, +static const struct sysmmu_v1_fault_info sysmmu_v1_faults[] = { + { REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ }, + { REG_AR_FAULT_ADDR, "MULTI-HIT", IOMMU_FAULT_READ }, + { REG_AW_FAULT_ADDR, "MULTI-HIT", IOMMU_FAULT_WRITE }, + { REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ }, + { REG_AR_FAULT_ADDR, "SECURITY PROTECTION", IOMMU_FAULT_READ }, + { REG_AR_FAULT_ADDR, "ACCESS PROTECTION", IOMMU_FAULT_READ }, + { REG_AW_FAULT_ADDR, "SECURITY PROTECTION", IOMMU_FAULT_WRITE }, + { REG_AW_FAULT_ADDR, "ACCESS PROTECTION", IOMMU_FAULT_WRITE }, }; -static const struct sysmmu_fault_info sysmmu_v5_faults[] = { - { 0, REG_V5_FAULT_AR_VA, "AR PTW", IOMMU_FAULT_READ }, - { 1, REG_V5_FAULT_AR_VA, "AR PAGE", IOMMU_FAULT_READ }, - { 2, REG_V5_FAULT_AR_VA, "AR MULTI-HIT", IOMMU_FAULT_READ }, - { 3, REG_V5_FAULT_AR_VA, "AR ACCESS PROTECTION", IOMMU_FAULT_READ }, - { 4, REG_V5_FAULT_AR_VA, "AR SECURITY PROTECTION", IOMMU_FAULT_READ }, - { 16, REG_V5_FAULT_AW_VA, "AW PTW", IOMMU_FAULT_WRITE }, - { 17, REG_V5_FAULT_AW_VA, "AW PAGE", IOMMU_FAULT_WRITE }, - { 18, REG_V5_FAULT_AW_VA, "AW MULTI-HIT", IOMMU_FAULT_WRITE }, - { 19, REG_V5_FAULT_AW_VA, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE }, - { 20, REG_V5_FAULT_AW_VA, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE }, +/* SysMMU v5 has the same faults for AR (0..4 bits) and AW (16..20 bits) */ +static const char * const sysmmu_v5_fault_names[] = { + "PTW", + "PAGE", + "MULTI-HIT", + "ACCESS PROTECTION", + "SECURITY PROTECTION" }; /* @@ -246,9 +244,12 @@ struct exynos_iommu_domain { struct iommu_domain domain; /* generic domain data structure */ }; +struct sysmmu_drvdata; + /* * SysMMU version specific data. Contains offsets for the registers which can * be found in different SysMMU variants, but have different offset values. + * Also contains version specific callbacks to abstract the hardware. */ struct sysmmu_variant { u32 pt_base; /* page table base address (physical) */ @@ -259,6 +260,9 @@ struct sysmmu_variant { u32 flush_end; /* end address of range invalidation */ u32 int_status; /* interrupt status information */ u32 int_clear; /* clear the interrupt */ + + int (*get_fault_info)(struct sysmmu_drvdata *data, unsigned int itype, + struct sysmmu_fault *fault); }; /* @@ -293,6 +297,46 @@ struct sysmmu_drvdata { #define SYSMMU_REG(data, reg) ((data)->sfrbase + (data)->variant->reg) +static int exynos_sysmmu_v1_get_fault_info(struct sysmmu_drvdata *data, + unsigned int itype, + struct sysmmu_fault *fault) +{ + const struct sysmmu_v1_fault_info *finfo; + + if (itype >= ARRAY_SIZE(sysmmu_v1_faults)) + return -ENXIO; + + finfo = &sysmmu_v1_faults[itype]; + fault->addr = readl(data->sfrbase + finfo->addr_reg); + fault->name = finfo->name; + fault->type = finfo->type; + + return 0; +} + +static int exynos_sysmmu_v5_get_fault_info(struct sysmmu_drvdata *data, + unsigned int itype, + struct sysmmu_fault *fault) +{ + unsigned int addr_reg; + + if (itype < ARRAY_SIZE(sysmmu_v5_fault_names)) { + fault->type = IOMMU_FAULT_READ; + addr_reg = REG_V5_FAULT_AR_VA; + } else if (itype >= 16 && itype <= 20) { + fault->type = IOMMU_FAULT_WRITE; + addr_reg = REG_V5_FAULT_AW_VA; + itype -= 16; + } else { + return -ENXIO; + } + + fault->name = sysmmu_v5_fault_names[itype]; + fault->addr = readl(data->sfrbase + addr_reg); + + return 0; +} + /* SysMMU v1..v3 */ static const struct sysmmu_variant sysmmu_v1_variant = { .flush_all = 0x0c, @@ -300,6 +344,8 @@ static const struct sysmmu_variant sysmmu_v1_variant = { .pt_base = 0x14, .int_status = 0x18, .int_clear = 0x1c, + + .get_fault_info = exynos_sysmmu_v1_get_fault_info, }; /* SysMMU v5 and v7 (non-VM capable) */ @@ -312,6 +358,8 @@ static const struct sysmmu_variant sysmmu_v5_variant = { .flush_end = 0x24, .int_status = 0x60, .int_clear = 0x64, + + .get_fault_info = exynos_sysmmu_v5_get_fault_info, }; /* SysMMU v7: VM capable register set */ @@ -324,6 +372,8 @@ static const struct sysmmu_variant sysmmu_v7_vm_variant = { .flush_end = 0x8024, .int_status = 0x60, .int_clear = 0x64, + + .get_fault_info = exynos_sysmmu_v5_get_fault_info, }; static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) @@ -453,68 +503,56 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data) } static void show_fault_information(struct sysmmu_drvdata *data, - const struct sysmmu_fault_info *finfo, - sysmmu_iova_t fault_addr) + const struct sysmmu_fault *fault) { sysmmu_pte_t *ent; - dev_err(data->sysmmu, "%s: %s FAULT occurred at %#x\n", - dev_name(data->master), finfo->name, fault_addr); + dev_err(data->sysmmu, "%s: [%s] %s FAULT occurred at %#x\n", + dev_name(data->master), + fault->type == IOMMU_FAULT_READ ? "READ" : "WRITE", + fault->name, fault->addr); dev_dbg(data->sysmmu, "Page table base: %pa\n", &data->pgtable); - ent = section_entry(phys_to_virt(data->pgtable), fault_addr); + ent = section_entry(phys_to_virt(data->pgtable), fault->addr); dev_dbg(data->sysmmu, "\tLv1 entry: %#x\n", *ent); if (lv1ent_page(ent)) { - ent = page_entry(ent, fault_addr); + ent = page_entry(ent, fault->addr); dev_dbg(data->sysmmu, "\t Lv2 entry: %#x\n", *ent); } } static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) { - /* SYSMMU is in blocked state when interrupt occurred. */ struct sysmmu_drvdata *data = dev_id; - const struct sysmmu_fault_info *finfo; - unsigned int i, n, itype; - sysmmu_iova_t fault_addr; + unsigned int itype; + struct sysmmu_fault fault; int ret = -ENOSYS; WARN_ON(!data->active); - if (MMU_MAJ_VER(data->version) < 5) { - finfo = sysmmu_faults; - n = ARRAY_SIZE(sysmmu_faults); - } else { - finfo = sysmmu_v5_faults; - n = ARRAY_SIZE(sysmmu_v5_faults); - } - spin_lock(&data->lock); - clk_enable(data->clk_master); itype = __ffs(readl(SYSMMU_REG(data, int_status))); - for (i = 0; i < n; i++, finfo++) - if (finfo->bit == itype) - break; - /* unknown/unsupported fault */ - BUG_ON(i == n); - - /* print debug message */ - fault_addr = readl(data->sfrbase + finfo->addr_reg); - show_fault_information(data, finfo, fault_addr); - - if (data->domain) - ret = report_iommu_fault(&data->domain->domain, - data->master, fault_addr, finfo->type); - /* fault is not recovered by fault handler */ - BUG_ON(ret != 0); + ret = data->variant->get_fault_info(data, itype, &fault); + if (ret) { + dev_err(data->sysmmu, "Unhandled interrupt bit %u\n", itype); + goto out; + } + show_fault_information(data, &fault); + if (data->domain) { + ret = report_iommu_fault(&data->domain->domain, data->master, + fault.addr, fault.type); + } + if (ret) + panic("Unrecoverable System MMU Fault!"); + +out: writel(1 << itype, SYSMMU_REG(data, int_clear)); + /* SysMMU is in blocked state when interrupt occurred */ sysmmu_unblock(data); - clk_disable(data->clk_master); - spin_unlock(&data->lock); return IRQ_HANDLED; -- cgit v1.2.3 From 2f599c3ff4e6f89dfd27fe2f4e1a4c040ba5f92f Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Tue, 26 Jul 2022 23:07:39 +0300 Subject: iommu/exynos: Implement fault handling on SysMMU v7 SysMMU v7 has a bit different registers for getting the fault info: - there is one single register (MMU_FAULT_VA) to get the fault address - fault access type (R/W) can be read from MMU_FAULT_TRANS_INFO register now - interrupt status register has different bits w.r.t. previous SysMMU versions - VM and non-VM layouts have different register addresses Add correct fault handling implementation for SysMMU v7, according to all mentioned differences. Only VID #0 (default) is handled, as VM domains support is not implemented yet. Signed-off-by: Sam Protsenko Link: https://lore.kernel.org/r/20220726200739.30017-3-semen.protsenko@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 48 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 75cade7382ab..69fd7fa3e49a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -217,6 +217,13 @@ static const char * const sysmmu_v5_fault_names[] = { "SECURITY PROTECTION" }; +static const char * const sysmmu_v7_fault_names[] = { + "PTW", + "PAGE", + "ACCESS PROTECTION", + "RESERVED" +}; + /* * This structure is attached to dev->iommu->priv of the master device * on device add, contains a list of SYSMMU controllers defined by device tree, @@ -260,6 +267,8 @@ struct sysmmu_variant { u32 flush_end; /* end address of range invalidation */ u32 int_status; /* interrupt status information */ u32 int_clear; /* clear the interrupt */ + u32 fault_va; /* IOVA address that caused fault */ + u32 fault_info; /* fault transaction info */ int (*get_fault_info)(struct sysmmu_drvdata *data, unsigned int itype, struct sysmmu_fault *fault); @@ -337,6 +346,19 @@ static int exynos_sysmmu_v5_get_fault_info(struct sysmmu_drvdata *data, return 0; } +static int exynos_sysmmu_v7_get_fault_info(struct sysmmu_drvdata *data, + unsigned int itype, + struct sysmmu_fault *fault) +{ + u32 info = readl(SYSMMU_REG(data, fault_info)); + + fault->addr = readl(SYSMMU_REG(data, fault_va)); + fault->name = sysmmu_v7_fault_names[itype % 4]; + fault->type = (info & BIT(20)) ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; + + return 0; +} + /* SysMMU v1..v3 */ static const struct sysmmu_variant sysmmu_v1_variant = { .flush_all = 0x0c, @@ -348,7 +370,7 @@ static const struct sysmmu_variant sysmmu_v1_variant = { .get_fault_info = exynos_sysmmu_v1_get_fault_info, }; -/* SysMMU v5 and v7 (non-VM capable) */ +/* SysMMU v5 */ static const struct sysmmu_variant sysmmu_v5_variant = { .pt_base = 0x0c, .flush_all = 0x10, @@ -362,7 +384,23 @@ static const struct sysmmu_variant sysmmu_v5_variant = { .get_fault_info = exynos_sysmmu_v5_get_fault_info, }; -/* SysMMU v7: VM capable register set */ +/* SysMMU v7: non-VM capable register layout */ +static const struct sysmmu_variant sysmmu_v7_variant = { + .pt_base = 0x0c, + .flush_all = 0x10, + .flush_entry = 0x14, + .flush_range = 0x18, + .flush_start = 0x20, + .flush_end = 0x24, + .int_status = 0x60, + .int_clear = 0x64, + .fault_va = 0x70, + .fault_info = 0x78, + + .get_fault_info = exynos_sysmmu_v7_get_fault_info, +}; + +/* SysMMU v7: VM capable register layout */ static const struct sysmmu_variant sysmmu_v7_vm_variant = { .pt_base = 0x800c, .flush_all = 0x8010, @@ -372,8 +410,10 @@ static const struct sysmmu_variant sysmmu_v7_vm_variant = { .flush_end = 0x8024, .int_status = 0x60, .int_clear = 0x64, + .fault_va = 0x1000, + .fault_info = 0x1004, - .get_fault_info = exynos_sysmmu_v5_get_fault_info, + .get_fault_info = exynos_sysmmu_v7_get_fault_info, }; static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) @@ -496,7 +536,7 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data) if (data->has_vcr) data->variant = &sysmmu_v7_vm_variant; else - data->variant = &sysmmu_v5_variant; + data->variant = &sysmmu_v7_variant; } __sysmmu_disable_clocks(data); -- cgit v1.2.3 From 189d496b48b1731f21f19ff3b7109ec724f5dcb2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 23 Jan 2023 10:31:01 +0100 Subject: iommu/exynos: Add missing set_platform_dma_ops callback Add set_platform_dma_ops() required for proper driver operation on ARM 32bit arch after recent changes in the IOMMU framework (detach ops removal). Fixes: c1fe9119ee70 ("iommu: Add set_platform_dma_ops callbacks") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20230123093102.12392-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 29ec713e8a21..7eaa58ced4ae 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1346,8 +1346,10 @@ static void exynos_iommu_release_device(struct device *dev) struct iommu_group *group = iommu_group_get(dev); if (group) { +#ifndef CONFIG_ARM WARN_ON(owner->domain != iommu_group_default_domain(group)); +#endif exynos_iommu_detach_device(owner->domain, dev); iommu_group_put(group); } @@ -1398,6 +1400,9 @@ static int exynos_iommu_of_xlate(struct device *dev, static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, .device_group = generic_device_group, +#ifdef CONFIG_ARM + .set_platform_dma_ops = exynos_iommu_release_device, +#endif .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, -- cgit v1.2.3 From cf5c1c87c2391649e05e58ecc6dfc3dc5ebebc05 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Sat, 28 Jan 2023 12:35:32 +0100 Subject: iommu/dart: Fix apple_dart_device_group for PCI groups pci_device_group() can return an already existing IOMMU group if the PCI device's pagetables have to be shared with another one due to bus toplogy, isolation features and/or DMA alias quirks. apple_dart_device_group() however assumes that the group has just been created and overwrites its iommudata which will eventually lead to apple_dart_release_group leaving stale entries in sid2group. Fix that by merging the iommudata if the returned group already exists. Fixes: f0b636804c7c ("iommu/dart: Clear sid2group entry when a group is freed") Signed-off-by: Sven Peter Reviewed-by: Eric Curtin Link: https://lore.kernel.org/r/20230128113532.94651-1-sven@svenpeter.dev Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 51 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 42666617803d..1fc64a42c890 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -835,6 +835,29 @@ static void apple_dart_release_group(void *iommu_data) mutex_unlock(&apple_dart_groups_lock); } +static int apple_dart_merge_master_cfg(struct apple_dart_master_cfg *dst, + struct apple_dart_master_cfg *src) +{ + /* + * We know that this function is only called for groups returned from + * pci_device_group and that all Apple Silicon platforms never spread + * PCIe devices from the same bus across multiple DARTs such that we can + * just assume that both src and dst only have the same single DART. + */ + if (src->stream_maps[1].dart) + return -EINVAL; + if (dst->stream_maps[1].dart) + return -EINVAL; + if (src->stream_maps[0].dart != dst->stream_maps[0].dart) + return -EINVAL; + + bitmap_or(dst->stream_maps[0].sidmap, + dst->stream_maps[0].sidmap, + src->stream_maps[0].sidmap, + dst->stream_maps[0].dart->num_streams); + return 0; +} + static struct iommu_group *apple_dart_device_group(struct device *dev) { int i, sid; @@ -876,14 +899,28 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) if (!group) goto out; - group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg), GFP_KERNEL); - if (!group_master_cfg) { - iommu_group_put(group); - goto out; - } + group_master_cfg = iommu_group_get_iommudata(group); + if (group_master_cfg) { + int ret; - iommu_group_set_iommudata(group, group_master_cfg, - apple_dart_release_group); + ret = apple_dart_merge_master_cfg(group_master_cfg, cfg); + if (ret) { + dev_err(dev, "Failed to merge DART IOMMU grups.\n"); + iommu_group_put(group); + res = ERR_PTR(ret); + goto out; + } + } else { + group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg), + GFP_KERNEL); + if (!group_master_cfg) { + iommu_group_put(group); + goto out; + } + + iommu_group_set_iommudata(group, group_master_cfg, + apple_dart_release_group); + } for_each_stream_map(i, cfg, stream_map) for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) -- cgit v1.2.3 From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 2 Feb 2023 08:26:56 +0000 Subject: iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow, because the string specifier in the format string sscanf() has no width limitation. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Ilia.Gavrilov Reviewed-by: Kim Phillips Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 467b194975b3..19a46b9f7357 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3475,15 +3475,26 @@ found: return 1; } +#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) + static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; + char acpiid[ACPIID_LEN] = {0}; int i; addr = strchr(str, '@'); if (!addr) { + addr = strchr(str, '='); + if (!addr) + goto not_found; + + ++addr; + + if (strlen(addr) > ACPIID_LEN) + goto not_found; + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", @@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; + if (strlen(str) > ACPIID_LEN + 1) + goto not_found; + if (sscanf(str, "=%s", acpiid) != 1) goto not_found; -- cgit v1.2.3 From d82e6ae67ac2f9d6ba51690353c477b340bba6b5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:29 +0800 Subject: iommu/vt-d: Remove include/linux/intel-svm.h There's no need to have a public header for Intel SVA implementation. The device driver should interact with Intel SVA implementation via the IOMMU generic APIs. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230109014955.147068-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 - drivers/iommu/intel/iommu.c | 1 - drivers/iommu/intel/iommu.h | 5 +++++ drivers/iommu/intel/svm.c | 1 - include/linux/intel-svm.h | 16 ---------------- 5 files changed, 5 insertions(+), 19 deletions(-) delete mode 100644 include/linux/intel-svm.h (limited to 'drivers/iommu') diff --git a/MAINTAINERS b/MAINTAINERS index a36df9ed283d..43fd97f0e1df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10459,7 +10459,6 @@ L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ -F: include/linux/intel-svm.h INTEL IPU3 CSI-2 CIO2 DRIVER M: Yong Zhi diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 59df7e42fd53..317af67b6098 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 06e61e474856..f89f63d7a72a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -438,6 +438,11 @@ struct q_inval { int free_cnt; }; +/* Page Request Queue depth */ +#define PRQ_ORDER 4 +#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) +#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) + struct dmar_pci_notify_info; #ifdef CONFIG_IRQ_REMAP diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index c76b66263467..d38a54396c23 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h deleted file mode 100644 index f9a0d44f6fdb..000000000000 --- a/include/linux/intel-svm.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2015 Intel Corporation. - * - * Authors: David Woodhouse - */ - -#ifndef __INTEL_SVM_H__ -#define __INTEL_SVM_H__ - -/* Page Request Queue depth */ -#define PRQ_ORDER 4 -#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) - -#endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 557abbd60c1676cbde5326a17dd517ec48f80bb6 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:30 +0800 Subject: iommu/vt-d: Remove unused fields in svm structures They aren't used anywhere. Remove them to avoid dead code. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230109014955.147068-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 4 ---- drivers/iommu/intel/svm.c | 2 -- 2 files changed, 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index f89f63d7a72a..e7c732979364 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -762,18 +762,14 @@ struct intel_svm_dev { struct device *dev; struct intel_iommu *iommu; struct iommu_sva sva; - u32 pasid; int users; u16 did; - u16 dev_iotlb:1; u16 sid, qdep; }; struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - - unsigned int flags; u32 pasid; struct list_head devs; }; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d38a54396c23..d1e445f03aa6 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -351,11 +351,9 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); sdev->users = 1; - sdev->pasid = svm->pasid; sdev->sva.dev = dev; init_rcu_head(&sdev->rcu); if (info->ats_enabled) { - sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) sdev->qdep = 0; -- cgit v1.2.3 From 49cab9d2b846bf69ef1164156ea2f4c7dec02f9f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:31 +0800 Subject: iommu/vt-d: Remove users from intel_svm_dev It was used as a reference counter of an existing bond between device and user application memory address. Commit be51b1d6bbff ("iommu/sva: Refactoring iommu_sva_bind/unbind_device()") has added this in iommu core. Remove it to avoid duplicate code. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230109014955.147068-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 1 - drivers/iommu/intel/svm.c | 62 ++++++++++++++++++++------------------------- 2 files changed, 27 insertions(+), 36 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index e7c732979364..2a1619ff0d79 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -762,7 +762,6 @@ struct intel_svm_dev { struct device *dev; struct intel_iommu *iommu; struct iommu_sva sva; - int users; u16 did; u16 sid, qdep; }; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d1e445f03aa6..c7dc53e40c26 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -333,13 +333,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, } } - /* Find the matching device in svm list */ - sdev = svm_lookup_device_by_dev(svm, dev); - if (sdev) { - sdev->users++; - goto success; - } - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; @@ -350,7 +343,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, sdev->iommu = iommu; sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); - sdev->users = 1; sdev->sva.dev = dev; init_rcu_head(&sdev->rcu); if (info->ats_enabled) { @@ -367,7 +359,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, goto free_sdev; list_add_rcu(&sdev->list, &svm->devs); -success: + return &sdev->sva; free_sdev: @@ -401,32 +393,32 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) mm = svm->mm; if (sdev) { - sdev->users--; - if (!sdev->users) { - list_del_rcu(&sdev->list); - /* Flush the PASID cache and IOTLB for this device. - * Note that we do depend on the hardware *not* using - * the PASID any more. Just as we depend on other - * devices never using PASIDs that they have no right - * to use. We have a *shared* PASID table, because it's - * large and has to be physically contiguous. So it's - * hard to be as defensive as we might like. */ - intel_pasid_tear_down_entry(iommu, dev, - svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); - kfree_rcu(sdev, rcu); - - if (list_empty(&svm->devs)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(svm->pasid); - /* We mandate that no page faults may be outstanding - * for the PASID when intel_svm_unbind_mm() is called. - * If that is not obeyed, subtle errors will happen. - * Let's make them less subtle... */ - memset(svm, 0x6b, sizeof(*svm)); - kfree(svm); - } + list_del_rcu(&sdev->list); + /* + * Flush the PASID cache and IOTLB for this device. + * Note that we do depend on the hardware *not* using + * the PASID any more. Just as we depend on other + * devices never using PASIDs that they have no right + * to use. We have a *shared* PASID table, because it's + * large and has to be physically contiguous. So it's + * hard to be as defensive as we might like. + */ + intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); + intel_svm_drain_prq(dev, svm->pasid); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(svm->pasid); + /* + * We mandate that no page faults may be outstanding + * for the PASID when intel_svm_unbind_mm() is called. + * If that is not obeyed, subtle errors will happen. + * Let's make them less subtle... + */ + memset(svm, 0x6b, sizeof(*svm)); + kfree(svm); } } out: -- cgit v1.2.3 From ec9ab12dee30d09bcffdd25943076611654316eb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:32 +0800 Subject: iommu/vt-d: Remove sva from intel_svm_dev After commit be51b1d6bbff ("iommu/sva: Refactoring iommu_sva_bind/unbind_device()"), the iommu driver doesn't need to return an iommu_sva pointer anymore. This removes the sva field from intel_svm_dev and cleanups the code accordingly. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230109014955.147068-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 1 - drivers/iommu/intel/svm.c | 23 +++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 2a1619ff0d79..f0222ad38757 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -761,7 +761,6 @@ struct intel_svm_dev { struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct iommu_sva sva; u16 did; u16 sid, qdep; }; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index c7dc53e40c26..e7b9bedebcc0 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -298,9 +298,8 @@ out: return 0; } -static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, - struct device *dev, - struct mm_struct *mm) +static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, + struct mm_struct *mm) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_svm_dev *sdev; @@ -312,7 +311,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, if (!svm) { svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) - return ERR_PTR(-ENOMEM); + return -ENOMEM; svm->pasid = mm->pasid; svm->mm = mm; @@ -322,14 +321,14 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, ret = mmu_notifier_register(&svm->notifier, mm); if (ret) { kfree(svm); - return ERR_PTR(ret); + return ret; } ret = pasid_private_add(svm->pasid, svm); if (ret) { mmu_notifier_unregister(&svm->notifier, mm); kfree(svm); - return ERR_PTR(ret); + return ret; } } @@ -343,7 +342,6 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, sdev->iommu = iommu; sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); - sdev->sva.dev = dev; init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->qdep = info->ats_qdep; @@ -360,7 +358,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, list_add_rcu(&sdev->list, &svm->devs); - return &sdev->sva; + return 0; free_sdev: kfree(sdev); @@ -371,7 +369,7 @@ free_svm: kfree(svm); } - return ERR_PTR(ret); + return ret; } /* Caller must hold pasid_mutex */ @@ -843,13 +841,10 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; - struct iommu_sva *sva; - int ret = 0; + int ret; mutex_lock(&pasid_mutex); - sva = intel_svm_bind_mm(iommu, dev, mm); - if (IS_ERR(sva)) - ret = PTR_ERR(sva); + ret = intel_svm_bind_mm(iommu, dev, mm); mutex_unlock(&pasid_mutex); return ret; -- cgit v1.2.3 From e06d24435596c8afcaa81c0c498f5b0ec4ee2b7c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:33 +0800 Subject: iommu/vt-d: Set No Execute Enable bit in PASID table entry Setup No Execute Enable bit (Bit 133) of a scalable mode PASID entry. This is to allow the use of XD bit of the first level page table. Fixes: ddf09b6d43ec ("iommu/vt-d: Setup pasid entries for iova over first level") Signed-off-by: Ashok Raj Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230126095438.354205-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb3c7020028d..ec964ac7d797 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -364,6 +364,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) pasid_set_bits(&pe->val[1], 1 << 23, value << 23); } +/* + * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID + * entry. It is required when XD bit of the first level page table + * entry is about to be set. + */ +static inline void pasid_set_nxe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); +} + /* * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode * PASID entry. @@ -557,6 +567,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + pasid_set_nxe(pte); /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); -- cgit v1.2.3 From 4db96bfe9d7772d6ddedd62ce478895999043fd7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:34 +0800 Subject: iommu/vt-d: Support size of the register set in DRHD A new field, which indicates the size of the remapping hardware register set for this remapping unit, is introduced in the DMA-remapping hardware unit definition (DRHD) structure with the VT-d Spec 4.0. With this information, SW doesn't need to 'guess' the size of the register set anymore. Update the struct acpi_dmar_hardware_unit to reflect the field. Store the size of the register set in struct dmar_drhd_unit for each dmar device. The 'size' information is ResvZ for the old BIOS and platforms. Fall back to the old guessing method. There is nothing changed. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-2-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 11 +++++++---- include/acpi/actbl1.h | 2 +- include/linux/dmar.h | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b00a0ceb2d13..3a40fef1ec1b 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -427,6 +427,8 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) memcpy(dmaru->hdr, header, header->length); dmaru->reg_base_addr = drhd->address; dmaru->segment = drhd->segment; + /* The size of the register set is 2 ^ N 4 KB pages. */ + dmaru->reg_size = 1UL << (drhd->size + 12); dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), ((void *)drhd) + drhd->header.length, @@ -956,17 +958,18 @@ static void unmap_iommu(struct intel_iommu *iommu) /** * map_iommu: map the iommu's registers * @iommu: the iommu to map - * @phys_addr: the physical address of the base resgister + * @drhd: DMA remapping hardware definition structure * * Memory map the iommu's registers. Start w/ a single page, and * possibly expand if that turns out to be insufficent. */ -static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd) { + u64 phys_addr = drhd->reg_base_addr; int map_size, err=0; iommu->reg_phys = phys_addr; - iommu->reg_size = VTD_PAGE_SIZE; + iommu->reg_size = drhd->reg_size; if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { pr_err("Can't reserve memory\n"); @@ -1050,7 +1053,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) } sprintf(iommu->name, "dmar%d", iommu->seq_id); - err = map_iommu(iommu, drhd->reg_base_addr); + err = map_iommu(iommu, drhd); if (err) { pr_err("Failed to map %s\n", iommu->name); goto error_free_seq_id; diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4175dce3967c..bdded0ac46eb 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -802,7 +802,7 @@ struct acpi_dmar_pci_path { struct acpi_dmar_hardware_unit { struct acpi_dmar_header header; u8 flags; - u8 reserved; + u8 size; /* Size of the register set */ u16 segment; u64 address; /* Register Base Address */ }; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d81a51978d01..725d5e6acec0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -39,6 +39,7 @@ struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ + unsigned long reg_size; /* size of register set */ struct dmar_dev_scope *devices;/* target device array */ int devices_cnt; /* target device count */ u16 segment; /* PCI domain */ -- cgit v1.2.3 From a6a5006dad572a53b5df3f47e1471d207ae9ba49 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:35 +0800 Subject: iommu/vt-d: Retrieve IOMMU perfmon capability information The performance monitoring infrastructure, perfmon, is to support collection of information about key events occurring during operation of the remapping hardware, to aid performance tuning and debug. Each remapping hardware unit has capability registers that indicate support for performance monitoring features and enumerate the capabilities. Add alloc_iommu_pmu() to retrieve IOMMU perfmon capability information for each iommu unit. The information is stored in the iommu->pmu data structure. Capability registers are read-only, so it's safe to prefetch and store them in the pmu structure. This could avoid unnecessary VMEXIT when this code is running in the virtualization environment. Add free_iommu_pmu() to free the saved capability information when freeing the iommu unit. Add a kernel config option for the IOMMU perfmon feature. Unless a user explicitly uses the perf tool to monitor the IOMMU perfmon event, there isn't any impact for the existing IOMMU. Enable it by default. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-3-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 11 +++ drivers/iommu/intel/Makefile | 1 + drivers/iommu/intel/dmar.c | 7 ++ drivers/iommu/intel/iommu.h | 43 ++++++++++- drivers/iommu/intel/perfmon.c | 172 ++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/perfmon.h | 40 ++++++++++ 6 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/intel/perfmon.c create mode 100644 drivers/iommu/intel/perfmon.h (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index b7dff5092fd2..12e1e90fdae1 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -96,4 +96,15 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON passing intel_iommu=sm_on to the kernel. If not sure, please use the default value. +config INTEL_IOMMU_PERF_EVENTS + def_bool y + bool "Intel IOMMU performance events" + depends on INTEL_IOMMU && PERF_EVENTS + help + Selecting this option will enable the performance monitoring + infrastructure in the Intel IOMMU. It collects information about + key events occurring during operation of the remapping hardware, + to aid performance tuning and debug. These are available on modern + processors which support Intel VT-d 4.0 and later. + endif # INTEL_IOMMU diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fa0dae16441c..7af3b8a4f2a0 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 3a40fef1ec1b..7a03cadb13ff 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include "../irq_remapping.h" #include "perf.h" #include "trace.h" +#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1106,6 +1107,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; + if (alloc_iommu_pmu(iommu)) + pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); + raw_spin_lock_init(&iommu->register_lock); /* @@ -1140,6 +1144,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_unmap: + free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: ida_free(&dmar_seq_ids, iommu->seq_id); @@ -1155,6 +1160,8 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_sysfs_remove(&iommu->iommu); } + free_iommu_pmu(iommu); + if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index f0222ad38757..f03d4b6bf49a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -125,6 +125,11 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_PERFCAP_REG 0x300 +#define DMAR_PERFCFGOFF_REG 0x310 +#define DMAR_PERFOVFOFF_REG 0x318 +#define DMAR_PERFCNTROFF_REG 0x31c +#define DMAR_PERFEVNTCAP_REG 0x380 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -148,6 +153,7 @@ */ #define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) +#define cap_ecmds(c) (((c) >> 61) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -179,7 +185,8 @@ * Extended Capability Register */ -#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_pms(e) (((e) >> 51) & 0x1) +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) @@ -210,6 +217,22 @@ #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ +/* + * Decoding Perf Capability Register + */ +#define pcap_num_cntr(p) ((p) & 0xffff) +#define pcap_cntr_width(p) (((p) >> 16) & 0x7f) +#define pcap_num_event_group(p) (((p) >> 24) & 0x1f) +#define pcap_filters_mask(p) (((p) >> 32) & 0x1f) +#define pcap_interrupt(p) (((p) >> 50) & 0x1) +/* The counter stride is calculated as 2 ^ (x+10) bytes */ +#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10)) + +/* + * Decoding Perf Event Capability Register + */ +#define pecap_es(p) ((p) & 0xfffffff) + /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -559,6 +582,22 @@ struct dmar_domain { iommu core */ }; +struct iommu_pmu { + struct intel_iommu *iommu; + u32 num_cntr; /* Number of counters */ + u32 num_eg; /* Number of event group */ + u32 cntr_width; /* Counter width */ + u32 cntr_stride; /* Counter Stride */ + u32 filter; /* Bitmask of filter support */ + void __iomem *base; /* the PerfMon base address */ + void __iomem *cfg_reg; /* counter configuration base address */ + void __iomem *cntr_reg; /* counter 0 address*/ + void __iomem *overflow; /* overflow status register */ + + u64 *evcap; /* Indicates all supported events */ + u32 **cntr_evcap; /* Supported events of each counter. */ +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -605,6 +644,8 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; void *perf_statistic; + + struct iommu_pmu *pmu; }; /* PCI domain-device relationship */ diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c new file mode 100644 index 000000000000..db5791a54455 --- /dev/null +++ b/drivers/iommu/intel/perfmon.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support Intel IOMMU PerfMon + * Copyright(c) 2023 Intel Corporation. + */ +#define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include "iommu.h" +#include "perfmon.h" + +static inline void __iomem * +get_perf_reg_address(struct intel_iommu *iommu, u32 offset) +{ + u32 off = dmar_readl(iommu->reg + offset); + + return iommu->reg + off; +} + +int alloc_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu; + int i, j, ret; + u64 perfcap; + u32 cap; + + if (!ecap_pms(iommu->ecap)) + return 0; + + /* The IOMMU PMU requires the ECMD support as well */ + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); + /* The performance monitoring is not supported. */ + if (!perfcap) + return -ENODEV; + + /* Sanity check for the number of the counters and event groups */ + if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) + return -ENODEV; + + /* The interrupt on overflow is required */ + if (!pcap_interrupt(perfcap)) + return -ENODEV; + + iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); + if (!iommu_pmu) + return -ENOMEM; + + iommu_pmu->num_cntr = pcap_num_cntr(perfcap); + iommu_pmu->cntr_width = pcap_cntr_width(perfcap); + iommu_pmu->filter = pcap_filters_mask(perfcap); + iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); + iommu_pmu->num_eg = pcap_num_event_group(perfcap); + + iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); + if (!iommu_pmu->evcap) { + ret = -ENOMEM; + goto free_pmu; + } + + /* Parse event group capabilities */ + for (i = 0; i < iommu_pmu->num_eg; i++) { + u64 pcap; + + pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + + i * IOMMU_PMU_CAP_REGS_STEP); + iommu_pmu->evcap[i] = pecap_es(pcap); + } + + iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap) { + ret = -ENOMEM; + goto free_pmu_evcap; + } + for (i = 0; i < iommu_pmu->num_cntr; i++) { + iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap[i]) { + ret = -ENOMEM; + goto free_pmu_cntr_evcap; + } + /* + * Set to the global capabilities, will adjust according + * to per-counter capabilities later. + */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; + } + + iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); + iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); + iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); + + /* + * Check per-counter capabilities. All counters should have the + * same capabilities on Interrupt on Overflow Support and Counter + * Width. + */ + for (i = 0; i < iommu_pmu->num_cntr; i++) { + cap = dmar_readl(iommu_pmu->cfg_reg + + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTRCAP_OFFSET); + if (!iommu_cntrcap_pcc(cap)) + continue; + + /* + * It's possible that some counters have a different + * capability because of e.g., HW bug. Check the corner + * case here and simply drop those counters. + */ + if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || + !iommu_cntrcap_ios(cap)) { + iommu_pmu->num_cntr = i; + pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", + iommu_pmu->num_cntr); + } + + /* Clear the pre-defined events group */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = 0; + + /* Override with per-counter event capabilities */ + for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { + cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTREVCAP_OFFSET + + (j * IOMMU_PMU_OFF_REGS_STEP)); + iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); + /* + * Some events may only be supported by a specific counter. + * Track them in the evcap as well. + */ + iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); + } + } + + iommu_pmu->iommu = iommu; + iommu->pmu = iommu_pmu; + + return 0; + +free_pmu_cntr_evcap: + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); +free_pmu_evcap: + kfree(iommu_pmu->evcap); +free_pmu: + kfree(iommu_pmu); + + return ret; +} + +void free_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + if (iommu_pmu->evcap) { + int i; + + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); + } + kfree(iommu_pmu->evcap); + kfree(iommu_pmu); + iommu->pmu = NULL; +} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h new file mode 100644 index 000000000000..4b0d9c1fea6f --- /dev/null +++ b/drivers/iommu/intel/perfmon.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * PERFCFGOFF_REG, PERFFRZOFF_REG + * PERFOVFOFF_REG, PERFCNTROFF_REG + */ +#define IOMMU_PMU_NUM_OFF_REGS 4 +#define IOMMU_PMU_OFF_REGS_STEP 4 + +#define IOMMU_PMU_CFG_OFFSET 0x100 +#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 +#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 +#define IOMMU_PMU_CFG_SIZE 0x8 +#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4 + +#define IOMMU_PMU_CAP_REGS_STEP 8 + +#define iommu_cntrcap_pcc(p) ((p) & 0x1) +#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff) +#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) +#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) + +#define iommu_event_select(p) ((p) & 0xfffffff) +#define iommu_event_group(p) (((p) >> 28) & 0xf) + +#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS +int alloc_iommu_pmu(struct intel_iommu *iommu); +void free_iommu_pmu(struct intel_iommu *iommu); +#else +static inline int +alloc_iommu_pmu(struct intel_iommu *iommu) +{ + return 0; +} + +static inline void +free_iommu_pmu(struct intel_iommu *iommu) +{ +} +#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ -- cgit v1.2.3 From dc57875866ab9f6e0c366a6fd342217f71847b8b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:36 +0800 Subject: iommu/vt-d: Support Enhanced Command Interface The Enhanced Command Register is to submit command and operand of enhanced commands to DMA Remapping hardware. It can supports up to 256 enhanced commands. There is a HW register to indicate the availability of all 256 enhanced commands. Each bit stands for each command. But there isn't an existing interface to read/write all 256 bits. Introduce the u64 ecmdcap[4] to store the existence of each enhanced command. Read 4 times to get all of them in map_iommu(). Add a helper to facilitate an enhanced command launch. Make sure hardware complete the command. Also add a helper to facilitate the check of PMU essentials. These helpers will be used later. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-4-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 10 ++++++++ drivers/iommu/intel/iommu.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/iommu.h | 33 ++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 7a03cadb13ff..0e429bab436f 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1017,6 +1017,16 @@ static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd) goto release; } } + + if (cap_ecmds(iommu->cap)) { + int i; + + for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) { + iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG + + i * DMA_ECMD_REG_STEP); + } + } + err = 0; goto out; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 317af67b6098..e314c30d371a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5022,3 +5022,59 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, pasid, qdep, address, mask); } } + +#define ecmd_get_status_code(res) (((res) & 0xff) >> 1) + +/* + * Function to submit a command to the enhanced command interface. The + * valid enhanced command descriptions are defined in Table 47 of the + * VT-d spec. The VT-d hardware implementation may support some but not + * all commands, which can be determined by checking the Enhanced + * Command Capability Register. + * + * Return values: + * - 0: Command successful without any error; + * - Negative: software error value; + * - Nonzero positive: failure status code defined in Table 48. + */ +int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) +{ + unsigned long flags; + u64 res; + int ret; + + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + res = dmar_readq(iommu->reg + DMAR_ECRSP_REG); + if (res & DMA_ECMD_ECRSP_IP) { + ret = -EBUSY; + goto err; + } + + /* + * Unconditionally write the operand B, because + * - There is no side effect if an ecmd doesn't require an + * operand B, but we set the register to some value. + * - It's not invoked in any critical path. The extra MMIO + * write doesn't bring any performance concerns. + */ + dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob); + dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT)); + + IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq, + !(res & DMA_ECMD_ECRSP_IP), res); + + if (res & DMA_ECMD_ECRSP_IP) { + ret = -ETIMEDOUT; + goto err; + } + + ret = ecmd_get_status_code(res); +err: + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + return ret; +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index f03d4b6bf49a..f918e83bf91c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -130,6 +130,10 @@ #define DMAR_PERFOVFOFF_REG 0x318 #define DMAR_PERFCNTROFF_REG 0x31c #define DMAR_PERFEVNTCAP_REG 0x380 +#define DMAR_ECMD_REG 0x400 +#define DMAR_ECEO_REG 0x408 +#define DMAR_ECRSP_REG 0x410 +#define DMAR_ECCAP_REG 0x430 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -304,6 +308,26 @@ #define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) #define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) +/* ECMD_REG */ +#define DMA_MAX_NUM_ECMD 256 +#define DMA_MAX_NUM_ECMDCAP (DMA_MAX_NUM_ECMD / 64) +#define DMA_ECMD_REG_STEP 8 +#define DMA_ECMD_ENABLE 0xf0 +#define DMA_ECMD_DISABLE 0xf1 +#define DMA_ECMD_FREEZE 0xf4 +#define DMA_ECMD_UNFREEZE 0xf5 +#define DMA_ECMD_OA_SHIFT 16 +#define DMA_ECMD_ECRSP_IP 0x1 +#define DMA_ECMD_ECCAP3 3 +#define DMA_ECMD_ECCAP3_ECNTS BIT_ULL(48) +#define DMA_ECMD_ECCAP3_DCNTS BIT_ULL(49) +#define DMA_ECMD_ECCAP3_FCNTS BIT_ULL(52) +#define DMA_ECMD_ECCAP3_UFCNTS BIT_ULL(53) +#define DMA_ECMD_ECCAP3_ESSENTIAL (DMA_ECMD_ECCAP3_ECNTS | \ + DMA_ECMD_ECCAP3_DCNTS | \ + DMA_ECMD_ECCAP3_FCNTS | \ + DMA_ECMD_ECCAP3_UFCNTS) + /* FECTL_REG */ #define DMA_FECTL_IM (((u32)1) << 31) @@ -605,6 +629,7 @@ struct intel_iommu { u64 cap; u64 ecap; u64 vccap; + u64 ecmdcap[DMA_MAX_NUM_ECMDCAP]; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ raw_spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ @@ -840,6 +865,14 @@ extern const struct iommu_ops intel_iommu_ops; extern int intel_iommu_sm; extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob); + +static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu) +{ + return (iommu->ecmdcap[DMA_ECMD_ECCAP3] & DMA_ECMD_ECCAP3_ESSENTIAL) == + DMA_ECMD_ECCAP3_ESSENTIAL; +} + extern int dmar_disabled; extern int intel_iommu_enabled; #else -- cgit v1.2.3 From 7232ab8b89e9ea32f07370643635a13641ce9c3c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:37 +0800 Subject: iommu/vt-d: Add IOMMU perfmon support Implement the IOMMU performance monitor capability, which supports the collection of information about key events occurring during operation of the remapping hardware, to aid performance tuning and debug. The IOMMU perfmon support is implemented as part of the IOMMU driver and interfaces with the Linux perf subsystem. The IOMMU PMU has the following unique features compared with the other PMUs. - Support counting. Not support sampling. - Does not support per-thread counting. The scope is system-wide. - Support per-counter capability register. The event constraints can be enumerated. - The available event and event group can also be enumerated. - Extra Enhanced Commands are introduced to control the counters. Add a new variable, struct iommu_pmu *pmu, to in the struct intel_iommu to track the PMU related information. Add iommu_pmu_register() and iommu_pmu_unregister() to register and unregister a IOMMU PMU. The register function setup the IOMMU PMU ops and invoke the standard perf_pmu_register() interface to register a PMU in the perf subsystem. This patch only exposes the functions. The following patch will enable them in the IOMMU driver. The IOMMU PMUs can be found under /sys/bus/event_source/devices/dmar* The available filters and event format can be found at the format folder $ ls /sys/bus/event_source/devices/dmar1/format/ event event_group filter_ats filter_ats_en filter_page_table filter_page_table_en The supported events can be found at the events folder $ ls /sys/bus/event_source/devices/dmar1/events/ ats_blocked fs_nonleaf_hit int_cache_hit_posted iommu_mem_blocked iotlb_hit pasid_cache_lookup ss_nonleaf_hit ctxt_cache_hit fs_nonleaf_lookup int_cache_lookup iommu_mrds iotlb_lookup pg_req_posted ss_nonleaf_lookup ctxt_cache_lookup int_cache_hit_nonposted iommu_clocks iommu_requests pasid_cache_hit pw_occupancy The command below illustrates filter usage with a simple example. $ perf stat -e dmar1/iommu_requests,filter_ats_en=0x1,filter_ats=0x1/ -a sleep 1 Performance counter stats for 'system wide': 368,947 dmar1/iommu_requests,filter_ats_en=0x1,filter_ats=0x1/ 1.002592074 seconds time elapsed Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-5-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- .../testing/sysfs-bus-event_source-devices-iommu | 29 ++ drivers/iommu/intel/iommu.h | 15 + drivers/iommu/intel/perfmon.c | 526 +++++++++++++++++++++ drivers/iommu/intel/perfmon.h | 24 + 4 files changed, 594 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu (limited to 'drivers/iommu') diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu new file mode 100644 index 000000000000..988210a0e8ce --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu @@ -0,0 +1,29 @@ +What: /sys/bus/event_source/devices/dmar*/format +Date: Jan 2023 +KernelVersion: 6.3 +Contact: Kan Liang +Description: Read-only. Attribute group to describe the magic bits + that go into perf_event_attr.config, + perf_event_attr.config1 or perf_event_attr.config2 for + the IOMMU pmu. (See also + ABI/testing/sysfs-bus-event_source-devices-format). + + Each attribute in this group defines a bit range in + perf_event_attr.config, perf_event_attr.config1, + or perf_event_attr.config2. All supported attributes + are listed below (See the VT-d Spec 4.0 for possible + attribute values):: + + event = "config:0-27" - event ID + event_group = "config:28-31" - event group ID + + filter_requester_en = "config1:0" - Enable Requester ID filter + filter_domain_en = "config1:1" - Enable Domain ID filter + filter_pasid_en = "config1:2" - Enable PASID filter + filter_ats_en = "config1:3" - Enable Address Type filter + filter_page_table_en= "config1:4" - Enable Page Table Level filter + filter_requester_id = "config1:16-31" - Requester ID filter + filter_domain = "config1:32-47" - Domain ID filter + filter_pasid = "config2:0-21" - PASID filter + filter_ats = "config2:24-28" - Address Type filter + filter_page_table = "config2:32-36" - Page Table Level filter diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index f918e83bf91c..d56b3c386366 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -606,6 +607,16 @@ struct dmar_domain { iommu core */ }; +/* + * In theory, the VT-d 4.0 spec can support up to 2 ^ 16 counters. + * But in practice, there are only 14 counters for the existing + * platform. Setting the max number of counters to 64 should be good + * enough for a long time. Also, supporting more than 64 counters + * requires more extras, e.g., extra freeze and overflow registers, + * which is not necessary for now. + */ +#define IOMMU_PMU_IDX_MAX 64 + struct iommu_pmu { struct intel_iommu *iommu; u32 num_cntr; /* Number of counters */ @@ -620,6 +631,10 @@ struct iommu_pmu { u64 *evcap; /* Indicates all supported events */ u32 **cntr_evcap; /* Supported events of each counter. */ + + struct pmu pmu; + DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); + struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; }; struct intel_iommu { diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index db5791a54455..df9b78736462 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -10,6 +10,504 @@ #include "iommu.h" #include "perfmon.h" +PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */ +PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */ + +static struct attribute *iommu_pmu_format_attrs[] = { + &format_attr_event_group.attr, + &format_attr_event.attr, + NULL +}; + +static struct attribute_group iommu_pmu_format_attr_group = { + .name = "format", + .attrs = iommu_pmu_format_attrs, +}; + +/* The available events are added in attr_update later */ +static struct attribute *attrs_empty[] = { + NULL +}; + +static struct attribute_group iommu_pmu_events_attr_group = { + .name = "events", + .attrs = attrs_empty, +}; + +static const struct attribute_group *iommu_pmu_attr_groups[] = { + &iommu_pmu_format_attr_group, + &iommu_pmu_events_attr_group, + NULL +}; + +static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev) +{ + /* + * The perf_event creates its own dev for each PMU. + * See pmu_dev_alloc() + */ + return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu); +} + +#define IOMMU_PMU_ATTR(_name, _format, _filter) \ + PMU_FORMAT_ATTR(_name, _format); \ + \ +static struct attribute *_name##_attr[] = { \ + &format_attr_##_name.attr, \ + NULL \ +}; \ + \ +static umode_t \ +_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ + \ + if (!iommu_pmu) \ + return 0; \ + return (iommu_pmu->filter & _filter) ? attr->mode : 0; \ +} \ + \ +static struct attribute_group _name = { \ + .name = "format", \ + .attrs = _name##_attr, \ + .is_visible = _name##_is_visible, \ +}; + +IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID); +IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN); +IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID); +IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS); +IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE); +IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID); +IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN); +IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID); +IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS); +IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE); + +#define iommu_pmu_en_requester_id(e) ((e) & 0x1) +#define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1) +#define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1) +#define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1) +#define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1) +#define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff) +#define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff) +#define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff) +#define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f) +#define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f) + +#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \ +{ \ + if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \ + dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ + IOMMU_PMU_CFG_SIZE + \ + (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ + iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\ + } \ +} + +#define iommu_pmu_clear_filter(_filter, _idx) \ +{ \ + if (iommu_pmu->filter & _filter) { \ + dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ + IOMMU_PMU_CFG_SIZE + \ + (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ + 0); \ + } \ +} + +/* + * Define the event attr related functions + * Input: _name: event attr name + * _string: string of the event in sysfs + * _g_idx: event group encoding + * _event: event encoding + */ +#define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \ + PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \ + \ +static struct attribute *_name##_attr[] = { \ + &event_attr_##_name.attr.attr, \ + NULL \ +}; \ + \ +static umode_t \ +_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ + \ + if (!iommu_pmu) \ + return 0; \ + return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \ +} \ + \ +static struct attribute_group _name = { \ + .name = "events", \ + .attrs = _name##_attr, \ + .is_visible = _name##_is_visible, \ +}; + +IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001) +IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002) +IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004) +IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008) +IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001) +IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020) +IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040) +IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001) +IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002) +IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004) +IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008) +IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010) +IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020) +IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040) +IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080) +IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100) +IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200) +IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001) +IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002) +IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004) +IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008) +IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001) +IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002) +IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004) + +static const struct attribute_group *iommu_pmu_attr_update[] = { + &filter_requester_id_en, + &filter_domain_en, + &filter_pasid_en, + &filter_ats_en, + &filter_page_table_en, + &filter_requester_id, + &filter_domain, + &filter_pasid, + &filter_ats, + &filter_page_table, + &iommu_clocks, + &iommu_requests, + &pw_occupancy, + &ats_blocked, + &iommu_mrds, + &iommu_mem_blocked, + &pg_req_posted, + &ctxt_cache_lookup, + &ctxt_cache_hit, + &pasid_cache_lookup, + &pasid_cache_hit, + &ss_nonleaf_lookup, + &ss_nonleaf_hit, + &fs_nonleaf_lookup, + &fs_nonleaf_hit, + &hpt_nonleaf_lookup, + &hpt_nonleaf_hit, + &iotlb_lookup, + &iotlb_hit, + &hpt_leaf_lookup, + &hpt_leaf_hit, + &int_cache_lookup, + &int_cache_hit_nonposted, + &int_cache_hit_posted, + NULL +}; + +static inline void __iomem * +iommu_event_base(struct iommu_pmu *iommu_pmu, int idx) +{ + return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride; +} + +static inline void __iomem * +iommu_config_base(struct iommu_pmu *iommu_pmu, int idx) +{ + return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET; +} + +static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct iommu_pmu, pmu); +} + +static inline u64 iommu_event_config(struct perf_event *event) +{ + u64 config = event->attr.config; + + return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) | + (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) | + IOMMU_EVENT_CFG_INT; +} + +static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu, + struct perf_event *event) +{ + return event->pmu == &iommu_pmu->pmu; +} + +static int iommu_pmu_validate_event(struct perf_event *event) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + u32 event_group = iommu_event_group(event->attr.config); + + if (event_group >= iommu_pmu->num_eg) + return -EINVAL; + + return 0; +} + +static int iommu_pmu_validate_group(struct perf_event *event) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct perf_event *sibling; + int nr = 0; + + /* + * All events in a group must be scheduled simultaneously. + * Check whether there is enough counters for all the events. + */ + for_each_sibling_event(sibling, event->group_leader) { + if (!is_iommu_pmu_event(iommu_pmu, sibling) || + sibling->state <= PERF_EVENT_STATE_OFF) + continue; + + if (++nr > iommu_pmu->num_cntr) + return -EINVAL; + } + + return 0; +} + +static int iommu_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* sampling not supported */ + if (event->attr.sample_period) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (iommu_pmu_validate_event(event)) + return -EINVAL; + + hwc->config = iommu_event_config(event); + + return iommu_pmu_validate_group(event); +} + +static void iommu_pmu_event_update(struct perf_event *event) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct hw_perf_event *hwc = &event->hw; + u64 prev_count, new_count, delta; + int shift = 64 - iommu_pmu->cntr_width; + +again: + prev_count = local64_read(&hwc->prev_count); + new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); + if (local64_xchg(&hwc->prev_count, new_count) != prev_count) + goto again; + + /* + * The counter width is enumerated. Always shift the counter + * before using it. + */ + delta = (new_count << shift) - (prev_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); +} + +static void iommu_pmu_start(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct intel_iommu *iommu = iommu_pmu->iommu; + struct hw_perf_event *hwc = &event->hw; + u64 count; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Always reprogram the period */ + count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); + local64_set((&hwc->prev_count), count); + + /* + * The error of ecmd will be ignored. + * - The existing perf_event subsystem doesn't handle the error. + * Only IOMMU PMU returns runtime HW error. We don't want to + * change the existing generic interfaces for the specific case. + * - It's a corner case caused by HW, which is very unlikely to + * happen. There is nothing SW can do. + * - The worst case is that the user will get with + * perf command, which can give the user some hints. + */ + ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0); + + perf_event_update_userpage(event); +} + +static void iommu_pmu_stop(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct intel_iommu *iommu = iommu_pmu->iommu; + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0); + + iommu_pmu_event_update(event); + + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static inline int +iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu, + int idx, struct perf_event *event) +{ + u32 event_group = iommu_event_group(event->attr.config); + u32 select = iommu_event_select(event->attr.config); + + if (!(iommu_pmu->cntr_evcap[idx][event_group] & select)) + return -EINVAL; + + return 0; +} + +static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx; + + /* + * The counters which support limited events are usually at the end. + * Schedule them first to accommodate more events. + */ + for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) { + if (test_and_set_bit(idx, iommu_pmu->used_mask)) + continue; + /* Check per-counter event capabilities */ + if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event)) + break; + clear_bit(idx, iommu_pmu->used_mask); + } + if (idx < 0) + return -EINVAL; + + iommu_pmu->event_list[idx] = event; + hwc->idx = idx; + + /* config events */ + dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config); + + iommu_pmu_set_filter(requester_id, event->attr.config1, + IOMMU_PMU_FILTER_REQUESTER_ID, idx, + event->attr.config1); + iommu_pmu_set_filter(domain, event->attr.config1, + IOMMU_PMU_FILTER_DOMAIN, idx, + event->attr.config1); + iommu_pmu_set_filter(pasid, event->attr.config1, + IOMMU_PMU_FILTER_PASID, idx, + event->attr.config1); + iommu_pmu_set_filter(ats, event->attr.config2, + IOMMU_PMU_FILTER_ATS, idx, + event->attr.config1); + iommu_pmu_set_filter(page_table, event->attr.config2, + IOMMU_PMU_FILTER_PAGE_TABLE, idx, + event->attr.config1); + + return 0; +} + +static int iommu_pmu_add(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct hw_perf_event *hwc = &event->hw; + int ret; + + ret = iommu_pmu_assign_event(iommu_pmu, event); + if (ret < 0) + return ret; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + iommu_pmu_start(event, 0); + + return 0; +} + +static void iommu_pmu_del(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + int idx = event->hw.idx; + + iommu_pmu_stop(event, PERF_EF_UPDATE); + + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx); + + iommu_pmu->event_list[idx] = NULL; + event->hw.idx = -1; + clear_bit(idx, iommu_pmu->used_mask); + + perf_event_update_userpage(event); +} + +static void iommu_pmu_enable(struct pmu *pmu) +{ + struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); + struct intel_iommu *iommu = iommu_pmu->iommu; + + ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0); +} + +static void iommu_pmu_disable(struct pmu *pmu) +{ + struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); + struct intel_iommu *iommu = iommu_pmu->iommu; + + ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0); +} + +static int __iommu_pmu_register(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + iommu_pmu->pmu.name = iommu->name; + iommu_pmu->pmu.task_ctx_nr = perf_invalid_context; + iommu_pmu->pmu.event_init = iommu_pmu_event_init; + iommu_pmu->pmu.pmu_enable = iommu_pmu_enable; + iommu_pmu->pmu.pmu_disable = iommu_pmu_disable; + iommu_pmu->pmu.add = iommu_pmu_add; + iommu_pmu->pmu.del = iommu_pmu_del; + iommu_pmu->pmu.start = iommu_pmu_start; + iommu_pmu->pmu.stop = iommu_pmu_stop; + iommu_pmu->pmu.read = iommu_pmu_event_update; + iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; + iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; + iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + iommu_pmu->pmu.module = THIS_MODULE; + + return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1); +} + static inline void __iomem * get_perf_reg_address(struct intel_iommu *iommu, u32 offset) { @@ -45,11 +543,21 @@ int alloc_iommu_pmu(struct intel_iommu *iommu) if (!pcap_interrupt(perfcap)) return -ENODEV; + /* Check required Enhanced Command Capability */ + if (!ecmd_has_pmu_essential(iommu)) + return -ENODEV; + iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); if (!iommu_pmu) return -ENOMEM; iommu_pmu->num_cntr = pcap_num_cntr(perfcap); + if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) { + pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!", + iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX); + iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX; + } + iommu_pmu->cntr_width = pcap_cntr_width(perfcap); iommu_pmu->filter = pcap_filters_mask(perfcap); iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); @@ -170,3 +678,21 @@ void free_iommu_pmu(struct intel_iommu *iommu) kfree(iommu_pmu); iommu->pmu = NULL; } + +void iommu_pmu_register(struct intel_iommu *iommu) +{ + if (!iommu->pmu) + return; + + if (__iommu_pmu_register(iommu)) { + pr_err("Failed to register PMU for iommu (seq_id = %d)\n", + iommu->seq_id); + free_iommu_pmu(iommu); + } +} + +void iommu_pmu_unregister(struct intel_iommu *iommu) +{ + if (iommu->pmu) + perf_pmu_unregister(&iommu->pmu->pmu); +} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h index 4b0d9c1fea6f..58606af9a2b9 100644 --- a/drivers/iommu/intel/perfmon.h +++ b/drivers/iommu/intel/perfmon.h @@ -7,6 +7,14 @@ #define IOMMU_PMU_NUM_OFF_REGS 4 #define IOMMU_PMU_OFF_REGS_STEP 4 +#define IOMMU_PMU_FILTER_REQUESTER_ID 0x01 +#define IOMMU_PMU_FILTER_DOMAIN 0x02 +#define IOMMU_PMU_FILTER_PASID 0x04 +#define IOMMU_PMU_FILTER_ATS 0x08 +#define IOMMU_PMU_FILTER_PAGE_TABLE 0x10 + +#define IOMMU_PMU_FILTER_EN BIT(31) + #define IOMMU_PMU_CFG_OFFSET 0x100 #define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 #define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 @@ -20,12 +28,18 @@ #define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) #define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) +#define IOMMU_EVENT_CFG_EGI_SHIFT 8 +#define IOMMU_EVENT_CFG_ES_SHIFT 32 +#define IOMMU_EVENT_CFG_INT BIT_ULL(1) + #define iommu_event_select(p) ((p) & 0xfffffff) #define iommu_event_group(p) (((p) >> 28) & 0xf) #ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS int alloc_iommu_pmu(struct intel_iommu *iommu); void free_iommu_pmu(struct intel_iommu *iommu); +void iommu_pmu_register(struct intel_iommu *iommu); +void iommu_pmu_unregister(struct intel_iommu *iommu); #else static inline int alloc_iommu_pmu(struct intel_iommu *iommu) @@ -37,4 +51,14 @@ static inline void free_iommu_pmu(struct intel_iommu *iommu) { } + +static inline void +iommu_pmu_register(struct intel_iommu *iommu) +{ +} + +static inline void +iommu_pmu_unregister(struct intel_iommu *iommu) +{ +} #endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ -- cgit v1.2.3 From 46284c6ceb5e4dfddcb00dafb7c2f3c1437fdca4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:38 +0800 Subject: iommu/vt-d: Support cpumask for IOMMU perfmon The perf subsystem assumes that all counters are by default per-CPU. So the user space tool reads a counter from each CPU. However, the IOMMU counters are system-wide and can be read from any CPU. Here we use a CPU mask to restrict counting to one CPU to handle the issue. (with CPU hotplug notifier to choose a different CPU if the chosen one is taken off-line). The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for the user space perf tool. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-6-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- .../testing/sysfs-bus-event_source-devices-iommu | 8 ++ drivers/iommu/intel/perfmon.c | 113 +++++++++++++++++++-- include/linux/cpuhotplug.h | 1 + 3 files changed, 114 insertions(+), 8 deletions(-) (limited to 'drivers/iommu') diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu index 988210a0e8ce..d7af4919302e 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu @@ -27,3 +27,11 @@ Description: Read-only. Attribute group to describe the magic bits filter_pasid = "config2:0-21" - PASID filter filter_ats = "config2:24-28" - Address Type filter filter_page_table = "config2:32-36" - Page Table Level filter + +What: /sys/bus/event_source/devices/dmar*/cpumask +Date: Jan 2023 +KernelVersion: 6.3 +Contact: Kan Liang +Description: Read-only. This file always returns the CPU to which the + IOMMU pmu is bound for access to all IOMMU pmu performance + monitoring events. diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index df9b78736462..322d362b85e4 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -34,9 +34,28 @@ static struct attribute_group iommu_pmu_events_attr_group = { .attrs = attrs_empty, }; +static cpumask_t iommu_pmu_cpu_mask; + +static ssize_t +cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask); +} +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *iommu_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group iommu_pmu_cpumask_attr_group = { + .attrs = iommu_pmu_cpumask_attrs, +}; + static const struct attribute_group *iommu_pmu_attr_groups[] = { &iommu_pmu_format_attr_group, &iommu_pmu_events_attr_group, + &iommu_pmu_cpumask_attr_group, NULL }; @@ -679,20 +698,98 @@ void free_iommu_pmu(struct intel_iommu *iommu) iommu->pmu = NULL; } +static int iommu_pmu_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&iommu_pmu_cpu_mask)) + cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); + + return 0; +} + +static int iommu_pmu_cpu_offline(unsigned int cpu) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int target; + + if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &iommu_pmu_cpu_mask); + else + target = -1; + + rcu_read_lock(); + + for_each_iommu(iommu, drhd) { + if (!iommu->pmu) + continue; + perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target); + } + rcu_read_unlock(); + + return 0; +} + +static int nr_iommu_pmu; + +static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) +{ + int ret; + + if (nr_iommu_pmu++) + return 0; + + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, + "driver/iommu/intel/perfmon:online", + iommu_pmu_cpu_online, + iommu_pmu_cpu_offline); + if (ret) + nr_iommu_pmu = 0; + + return ret; +} + +static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) +{ + if (--nr_iommu_pmu) + return; + + cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE); +} + void iommu_pmu_register(struct intel_iommu *iommu) { - if (!iommu->pmu) + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) return; - if (__iommu_pmu_register(iommu)) { - pr_err("Failed to register PMU for iommu (seq_id = %d)\n", - iommu->seq_id); - free_iommu_pmu(iommu); - } + if (__iommu_pmu_register(iommu)) + goto err; + + if (iommu_pmu_cpuhp_setup(iommu_pmu)) + goto unregister; + + return; + +unregister: + perf_pmu_unregister(&iommu_pmu->pmu); +err: + pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id); + free_iommu_pmu(iommu); } void iommu_pmu_unregister(struct intel_iommu *iommu) { - if (iommu->pmu) - perf_pmu_unregister(&iommu->pmu->pmu); + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + iommu_pmu_cpuhp_free(iommu_pmu); + perf_pmu_unregister(&iommu_pmu->pmu); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..f2ea348ce3b0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -221,6 +221,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, + CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From 4a0d4265659b1078db3432cb80ceaf26ad921704 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:39 +0800 Subject: iommu/vt-d: Add IOMMU perfmon overflow handler support While enabled to count events and an event occurrence causes the counter value to increment and roll over to or past zero, this is termed a counter overflow. The overflow can trigger an interrupt. The IOMMU perfmon needs to handle the case properly. New HW IRQs are allocated for each IOMMU device for perfmon. The IRQ IDs are after the SVM range. In the overflow handler, the counter is not frozen. It's very unlikely that the same counter overflows again during the period. But it's possible that other counters overflow at the same time. Read the overflow register at the end of the handler and check whether there are more. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-7-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 ++ drivers/iommu/intel/iommu.h | 11 +++++- drivers/iommu/intel/perfmon.c | 82 +++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/svm.c | 2 +- 4 files changed, 95 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 0e429bab436f..43db6ebe8b57 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1879,6 +1879,8 @@ static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq) return DMAR_FECTL_REG; else if (iommu->pr_irq == irq) return DMAR_PECTL_REG; + else if (iommu->perf_irq == irq) + return DMAR_PERFINTRCTL_REG; else BUG(); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d56b3c386366..d7c61eb20f6f 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -130,6 +130,8 @@ #define DMAR_PERFCFGOFF_REG 0x310 #define DMAR_PERFOVFOFF_REG 0x318 #define DMAR_PERFCNTROFF_REG 0x31c +#define DMAR_PERFINTRSTS_REG 0x324 +#define DMAR_PERFINTRCTL_REG 0x328 #define DMAR_PERFEVNTCAP_REG 0x380 #define DMAR_ECMD_REG 0x400 #define DMAR_ECEO_REG 0x408 @@ -357,6 +359,9 @@ #define DMA_VCS_PAS ((u64)1) +/* PERFINTRSTS_REG */ +#define DMA_PERFINTRSTS_PIS ((u32)1) + #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ do { \ cycles_t start_time = get_cycles(); \ @@ -635,8 +640,12 @@ struct iommu_pmu { struct pmu pmu; DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; + unsigned char irq_name[16]; }; +#define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) +#define IOMMU_IRQ_ID_OFFSET_PERF (2 * DMAR_UNITS_SUPPORTED) + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -650,7 +659,7 @@ struct intel_iommu { int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ int msagaw; /* max sagaw of this iommu */ - unsigned int irq, pr_irq; + unsigned int irq, pr_irq, perf_irq; u16 segment; /* PCI segment# */ unsigned char name[13]; /* Device Name */ diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index 322d362b85e4..e17d9743a0d8 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -505,6 +505,49 @@ static void iommu_pmu_disable(struct pmu *pmu) ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0); } +static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu) +{ + struct perf_event *event; + u64 status; + int i; + + /* + * Two counters may be overflowed very close. Always check + * whether there are more to handle. + */ + while ((status = dmar_readq(iommu_pmu->overflow))) { + for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) { + /* + * Find the assigned event of the counter. + * Accumulate the value into the event->count. + */ + event = iommu_pmu->event_list[i]; + if (!event) { + pr_warn_once("Cannot find the assigned event for counter %d\n", i); + continue; + } + iommu_pmu_event_update(event); + } + + dmar_writeq(iommu_pmu->overflow, status); + } +} + +static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id) +{ + struct intel_iommu *iommu = dev_id; + + if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG)) + return IRQ_NONE; + + iommu_pmu_counter_overflow(iommu->pmu); + + /* Clear the status bit */ + dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS); + + return IRQ_HANDLED; +} + static int __iommu_pmu_register(struct intel_iommu *iommu) { struct iommu_pmu *iommu_pmu = iommu->pmu; @@ -698,6 +741,38 @@ void free_iommu_pmu(struct intel_iommu *iommu) iommu->pmu = NULL; } +static int iommu_pmu_set_interrupt(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + int irq, ret; + + irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu); + if (irq <= 0) + return -EINVAL; + + snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id); + + iommu->perf_irq = irq; + ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler, + IRQF_ONESHOT, iommu_pmu->irq_name, iommu); + if (ret) { + dmar_free_hwirq(irq); + iommu->perf_irq = 0; + return ret; + } + return 0; +} + +static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) +{ + if (!iommu->perf_irq) + return; + + free_irq(iommu->perf_irq, iommu); + dmar_free_hwirq(iommu->perf_irq); + iommu->perf_irq = 0; +} + static int iommu_pmu_cpu_online(unsigned int cpu) { if (cpumask_empty(&iommu_pmu_cpu_mask)) @@ -774,8 +849,14 @@ void iommu_pmu_register(struct intel_iommu *iommu) if (iommu_pmu_cpuhp_setup(iommu_pmu)) goto unregister; + /* Set interrupt for overflow */ + if (iommu_pmu_set_interrupt(iommu)) + goto cpuhp_free; + return; +cpuhp_free: + iommu_pmu_cpuhp_free(iommu_pmu); unregister: perf_pmu_unregister(&iommu_pmu->pmu); err: @@ -790,6 +871,7 @@ void iommu_pmu_unregister(struct intel_iommu *iommu) if (!iommu_pmu) return; + iommu_pmu_unset_interrupt(iommu); iommu_pmu_cpuhp_free(iommu_pmu); perf_pmu_unregister(&iommu_pmu->pmu); } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index e7b9bedebcc0..7367f56c3bad 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -78,7 +78,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) } iommu->prq = page_address(pages); - irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); + irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); if (irq <= 0) { pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", iommu->name); -- cgit v1.2.3 From d8a7c0cf05a2ac79d3cbd0c0ed7d01ec01f75980 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:40 +0800 Subject: iommu/vt-d: Enable IOMMU perfmon support Register and enable an IOMMU perfmon for each active IOMMU device. The failure of IOMMU perfmon registration doesn't impact other functionalities of an IOMMU device. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-8-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 +++ drivers/iommu/intel/iommu.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 43db6ebe8b57..6acfe879589c 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1144,6 +1144,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); if (err) goto err_sysfs; + + iommu_pmu_register(iommu); } drhd->iommu = iommu; @@ -1166,6 +1168,7 @@ error: static void free_iommu(struct intel_iommu *iommu) { if (intel_iommu_enabled && !iommu->drhd->ignored) { + iommu_pmu_unregister(iommu); iommu_device_unregister(&iommu->iommu); iommu_device_sysfs_remove(&iommu->iommu); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e314c30d371a..691b306fada5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -29,6 +29,7 @@ #include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" +#include "perfmon.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -4012,6 +4013,8 @@ int __init intel_iommu_init(void) intel_iommu_groups, "%s", iommu->name); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + + iommu_pmu_register(iommu); } up_read(&dmar_global_lock); -- cgit v1.2.3 From 9e6a1825aca8568f624abeeb047969c72aa4c183 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Wed, 1 Feb 2023 12:42:58 +0000 Subject: iommu: dart: DART_T8110_ERROR range should be 0 to 5 This was detected by smatch as one "else if" statement could never be reached. Confirmed bit order by comparing with python implementation [1]. drivers/iommu/apple-dart.c:991 apple_dart_t8110_irq() warn: duplicate check 'error_code == ((((1))) << (3))' (previous on line 989) Link: https://github.com/AsahiLinux/m1n1/commit/96b2d584feec1e3f7bfa [1] Fixes: d8bcc870d99d ("iommu: dart: Add t8110 DART support") Reported-by: Dan Carpenter Signed-off-by: Eric Curtin Reviewed-by: Sven Peter Link: https://lore.kernel.org/r/20230201124257.7801-1-ecurtin@redhat.com Signed-off-by: Joerg Roedel --- drivers/iommu/apple-dart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 1fc64a42c890..abedcffed237 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -111,8 +111,8 @@ #define DART_T8110_ERROR_MASK 0x104 -#define DART_T8110_ERROR_READ_FAULT BIT(4) -#define DART_T8110_ERROR_WRITE_FAULT BIT(3) +#define DART_T8110_ERROR_READ_FAULT BIT(5) +#define DART_T8110_ERROR_WRITE_FAULT BIT(4) #define DART_T8110_ERROR_NO_PTE BIT(3) #define DART_T8110_ERROR_NO_PMD BIT(2) #define DART_T8110_ERROR_NO_PGD BIT(1) -- cgit v1.2.3 From 4762315d1c971312d55848fdc85eee7f0b09c8f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 8 Feb 2023 17:03:59 -0800 Subject: iommu/of: mark an unused function as __maybe_unused When CONFIG_OF_ADDRESS is not set, there is a build warning/error about an unused function. Annotate the function to quieten the warning/error. ../drivers/iommu/of_iommu.c:176:29: warning: 'iommu_resv_region_get_type' defined but not used [-Wunused-function] 176 | static enum iommu_resv_type iommu_resv_region_get_type(struct device *dev, struct resource *phys, | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: a5bf3cfce8cb ("iommu: Implement of_iommu_get_resv_regions()") Signed-off-by: Randy Dunlap Cc: Thierry Reding Cc: Joerg Roedel Cc: Will Deacon Cc: iommu@lists.linux.dev Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20230209010359.23831-1-rdunlap@infradead.org [joro: Improve code formatting] Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 3c2e2a13cf9c..40f57d293a79 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -173,8 +173,10 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, return ops; } -static enum iommu_resv_type iommu_resv_region_get_type(struct device *dev, struct resource *phys, - phys_addr_t start, size_t length) +static enum iommu_resv_type __maybe_unused +iommu_resv_region_get_type(struct device *dev, + struct resource *phys, + phys_addr_t start, size_t length) { phys_addr_t end = start + length - 1; -- cgit v1.2.3 From 4daa861174d56023c2068ddb03de0752f07fa199 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 15 Feb 2023 21:21:16 -0400 Subject: iommu: Fix error unwind in iommu_group_alloc() If either iommu_group_grate_file() fails then the iommu_group is leaked. Destroy it on these error paths. Found by kselftest/iommu/iommufd_fail_nth Fixes: bc7d12b91bd3 ("iommu: Implement reserved_regions iommu-group sysfs file") Fixes: c52c72d3dee8 ("iommu: Add sysfs attribyte for domain type") Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-8f616bee028d+8b-iommu_group_alloc_leak_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 42591766266d..989c0c963be3 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -798,12 +798,16 @@ struct iommu_group *iommu_group_alloc(void) ret = iommu_group_create_file(group, &iommu_group_attr_reserved_regions); - if (ret) + if (ret) { + kobject_put(group->devices_kobj); return ERR_PTR(ret); + } ret = iommu_group_create_file(group, &iommu_group_attr_type); - if (ret) + if (ret) { + kobject_put(group->devices_kobj); return ERR_PTR(ret); + } pr_debug("Allocated group %d\n", group->id); -- cgit v1.2.3 From 18792e99ea2fea27c72eb1ecca1879e5e6be304d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 7 Feb 2023 09:17:52 +0000 Subject: iommu/amd: Do not identity map v2 capable device when snp is enabled Flow: - Booted system with SNP enabled, memory encryption off and IOMMU DMA translation mode - AMD driver detects v2 capable device and amd_iommu_def_domain_type() returns identity mode - amd_iommu_domain_alloc() returns NULL an SNP is enabled - System will fail to register device On SNP enabled system, passthrough mode is not supported. IOMMU default domain is set to translation mode. We need to return zero from amd_iommu_def_domain_type() so that it allocates translation domain. Fixes: fb2accadaa94 ("iommu/amd: Introduce function to check and enable SNP") CC: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20230207091752.7656-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 33c134a2b581..80749b99472f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2403,12 +2403,17 @@ static int amd_iommu_def_domain_type(struct device *dev) return 0; /* - * Do not identity map IOMMUv2 capable devices when memory encryption is - * active, because some of those devices (AMD GPUs) don't have the - * encryption bit in their DMA-mask and require remapping. + * Do not identity map IOMMUv2 capable devices when: + * - memory encryption is active, because some of those devices + * (AMD GPUs) don't have the encryption bit in their DMA-mask + * and require remapping. + * - SNP is enabled, because it prohibits DTE[Mode]=0. */ - if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT) && dev_data->iommu_v2) + if (dev_data->iommu_v2 && + !cc_platform_has(CC_ATTR_MEM_ENCRYPT) && + !amd_iommu_snp_en) { return IOMMU_DOMAIN_IDENTITY; + } return 0; } -- cgit v1.2.3 From 996d120b4de2b0d6b592bd9fbbe6e244b81ab3cc Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 15 Feb 2023 05:26:42 +0000 Subject: iommu/amd: Improve page fault error reporting If IOMMU domain for device group is not setup properly then we may hit IOMMU page fault. Current page fault handler assumes that domain is always setup and it will hit NULL pointer derefence (see below sample log). Lets check whether domain is setup or not and log appropriate message. Sample log: ---------- amdgpu 0000:00:01.0: amdgpu: SE 1, SH per SE 1, CU per SH 8, active_cu_number 6 BUG: kernel NULL pointer dereference, address: 0000000000000058 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 56 Comm: irq/24-AMD-Vi Not tainted 6.2.0-rc2+ #89 Hardware name: xxx RIP: 0010:report_iommu_fault+0x11/0x90 [...] Call Trace: amd_iommu_int_thread+0x60c/0x760 ? __pfx_irq_thread_fn+0x10/0x10 irq_thread_fn+0x1f/0x60 irq_thread+0xea/0x1a0 ? preempt_count_add+0x6a/0xa0 ? __pfx_irq_thread_dtor+0x10/0x10 ? __pfx_irq_thread+0x10/0x10 kthread+0xe9/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 Reported-by: Matt Fagnani Suggested-by: Joerg Roedel Signed-off-by: Vasant Hegde Link: https://bugzilla.kernel.org/show_bug.cgi?id=216865 Link: https://lore.kernel.org/lkml/15d0f9ff-2a56-b3e9-5b45-e6b23300ae3b@leemhuis.info/ Link: https://lore.kernel.org/r/20230215052642.6016-3-vasant.hegde@amd.com Cc: stable@vger.kernel.org [joro: Edit commit message] Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 80749b99472f..f97c0bf930a7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -558,6 +558,15 @@ static void amd_iommu_report_page_fault(struct amd_iommu *iommu, * prevent logging it. */ if (IS_IOMMU_MEM_TRANSACTION(flags)) { + /* Device not attached to domain properly */ + if (dev_data->domain == NULL) { + pr_err_ratelimited("Event logged [Device not attached to domain properly]\n"); + pr_err_ratelimited(" device=%04x:%02x:%02x.%x domain=0x%04x\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), domain_id); + goto out; + } + if (!report_iommu_fault(&dev_data->domain->domain, &pdev->dev, address, IS_WRITE_REQUEST(flags) ? -- cgit v1.2.3 From 60b1daa3b168fbc648ae2ad28a84759223e49e18 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 16 Feb 2023 21:08:13 +0800 Subject: iommu/vt-d: Fix error handling in sva enable/disable paths Roll back all previous actions in error paths of intel_iommu_enable_sva() and intel_iommu_disable_sva(). Fixes: d5b9e4bfe0d8 ("iommu/vt-d: Report prq to io-pgfault framework") Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230208051559.700109-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 691b306fada5..9107fac8c375 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4644,8 +4644,12 @@ static int intel_iommu_enable_sva(struct device *dev) return -EINVAL; ret = iopf_queue_add_device(iommu->iopf_queue, dev); - if (!ret) - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + return ret; + + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + iopf_queue_remove_device(iommu->iopf_queue, dev); return ret; } @@ -4657,8 +4661,12 @@ static int intel_iommu_disable_sva(struct device *dev) int ret; ret = iommu_unregister_device_fault_handler(dev); - if (!ret) - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + if (ret) + return ret; + + ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + if (ret) + iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); return ret; } -- cgit v1.2.3 From 16a75bbe480c3598b3af57a2504ea89b1e32c3ac Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 16 Feb 2023 21:08:14 +0800 Subject: iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode Intel IOMMU driver implements IOTLB flush queue with domain selective or PASID selective invalidations. In this case there's no need to track IOVA page range and sync IOTLBs, which may cause significant performance hit. This patch adds a check to avoid IOVA gather page and IOTLB sync for the lazy path. The performance difference on Sapphire Rapids 100Gb NIC is improved by the following (as measured by iperf send): w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s Cc: Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers") Reviewed-by: Robin Murphy Reviewed-by: Kevin Tian Tested-by: Sanjay Kumar Signed-off-by: Sanjay Kumar Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230209175330.1783556-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9107fac8c375..547977d535c5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4348,7 +4348,12 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; - iommu_iotlb_gather_add_page(domain, gather, iova, size); + /* + * We do not use page-selective IOTLB invalidation in flush queue, + * so there is no need to track page and sync iotlb. + */ + if (!iommu_iotlb_gather_queued(gather)) + iommu_iotlb_gather_add_page(domain, gather, iova, size); return size; } -- cgit v1.2.3 From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 16 Feb 2023 21:08:15 +0800 Subject: iommu/vt-d: Fix PASID directory pointer coherency On platforms that do not support IOMMU Extended capability bit 0 Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing any translation structures. IOMMU access goes only directly to memory. Intel IOMMU code was missing a flush for the PASID table directory that resulted in the unrecoverable fault as shown below. This patch adds clflush calls whenever allocating and updating a PASID table directory to ensure cache coherency. On the reverse direction, there's no need to clflush the PASID directory pointer when we deactivate a context entry in that IOMMU hardware will not see the old PASID directory pointer after we clear the context entry. PASID directory entries are also never freed once allocated. DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000 [fault reason 0x51] SM: Present bit in Directory Entry is clear DMAR: Dump dmar1 table entries for IOVA 0x1026a4000 DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001 DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401 DMAR: pasid dir entry: 0x0000000101b4e001 DMAR: pasid table entry[0]: 0x0000000000000109 DMAR: pasid table entry[1]: 0x0000000000000001 DMAR: pasid table entry[2]: 0x0000000000000000 DMAR: pasid table entry[3]: 0x0000000000000000 DMAR: pasid table entry[4]: 0x0000000000000000 DMAR: pasid table entry[5]: 0x0000000000000000 DMAR: pasid table entry[6]: 0x0000000000000000 DMAR: pasid table entry[7]: 0x0000000000000000 DMAR: PTE not present at level 4 Cc: Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables") Reviewed-by: Kevin Tian Reported-by: Sukumar Ghorai Signed-off-by: Ashok Raj Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index ec964ac7d797..9d2f05cf6164 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev) pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); info->pasid_table = pasid_table; + if (!ecap_coherent(info->iommu->ecap)) + clflush_cache_range(pasid_table->table, size); + return 0; } @@ -215,6 +218,10 @@ retry: free_pgtable_page(entries); goto retry; } + if (!ecap_coherent(info->iommu->ecap)) { + clflush_cache_range(entries, VTD_PAGE_SIZE); + clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); + } } return &entries[index]; -- cgit v1.2.3 From 257ec290741924f8df678927d0dfecb1deebb9c5 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 16 Feb 2023 21:08:16 +0800 Subject: iommu/vt-d: Allow to use flush-queue when first level is default Commit 29b32839725f ("iommu/vt-d: Do not use flush-queue when caching-mode is on") forced default domains to be strict mode as long as IOMMU caching-mode is flagged. The reason for doing this is that when vIOMMU uses VT-d caching mode to synchronize shadowing page tables, the strict mode shows better performance. However, this optimization is orthogonal to the first-level page table because the Intel VT-d architecture does not define the caching mode of the first-level page table. Refer to VT-d spec, section 6.1, "When the CM field is reported as Set, any software updates to remapping structures other than first-stage mapping (including updates to not- present entries or present entries whose programming resulted in translation faults) requires explicit invalidation of the caches." Exclude the first-level page table from this optimization. Generally using first-stage translation in vIOMMU implies nested translation enabled in the physical IOMMU. In this case the first-stage page table is wholly captured by the guest. The vIOMMU only needs to transfer the cache invalidations on vIOMMU to the physical IOMMU. Forcing the default domain to strict mode will cause more frequent cache invalidations, resulting in performance degradation. In a real performance benchmark test measured by iperf receive, the performance result on Sapphire Rapids 100Gb NIC shows: w/ this fix ~51 Gbits/s, w/o this fix ~39.3 Gbits/s. Theoretically a first-stage IOMMU page table can still be shadowed in absence of the caching mode, e.g. with host write-protecting guest IOMMU page table to synchronize changed PTEs with the physical IOMMU page table. In this case the shadowing overhead is decoupled from emulating IOTLB invalidation then the overhead of the latter part is solely decided by the frequency of IOTLB invalidations. Hence allowing guest default dma domain to be lazy can also benefit the overall performance by reducing the total VM-exit numbers. Fixes: 29b32839725f ("iommu/vt-d: Do not use flush-queue when caching-mode is on") Reported-by: Sanjay Kumar Suggested-by: Sanjay Kumar Signed-off-by: Tina Zhang Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230214025618.2292889-1-tina.zhang@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 547977d535c5..34b45ae99025 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4005,7 +4005,8 @@ int __init intel_iommu_init(void) * is likely to be much lower than the overhead of synchronizing * the virtual and physical IOMMU page-tables. */ - if (cap_caching_mode(iommu->cap)) { + if (cap_caching_mode(iommu->cap) && + !first_level_by_default(IOMMU_DOMAIN_DMA)) { pr_info_once("IOMMU batching disallowed due to virtualization\n"); iommu_set_dma_strict(); } -- cgit v1.2.3 From 2cc73c5712f97de98c38c2fafc1f288354a9f3c3 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 15 Feb 2023 05:26:40 +0000 Subject: iommu: Attach device group to old domain in error path iommu_attach_group() attaches all devices in a group to domain and then sets group domain (group->domain). Current code (__iommu_attach_group()) does not handle error path. This creates problem as devices to domain attachment is in inconsistent state. Flow: - During boot iommu attach devices to default domain - Later some device driver (like amd/iommu_v2 or vfio) tries to attach device to new domain. - In iommu_attach_group() path we detach device from current domain. Then it tries to attach devices to new domain. - If it fails to attach device to new domain then device to domain link is broken. - iommu_attach_group() returns error. - At this stage iommu_attach_group() caller thinks, attaching device to new domain failed and devices are still attached to old domain. - But in reality device to old domain link is broken. It will result in all sort of failures (like IO page fault) later. To recover from this situation, we need to attach all devices back to the old domain. Also log warning if it fails attach device back to old domain. Suggested-by: Lu Baolu Reported-by: Matt Fagnani Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Tested-by: Matt Fagnani Link: https://lore.kernel.org/r/20230215052642.6016-1-vasant.hegde@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=216865 Link: https://lore.kernel.org/lkml/15d0f9ff-2a56-b3e9-5b45-e6b23300ae3b@leemhuis.info/ Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 989c0c963be3..1a0e41c8839b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2123,8 +2123,22 @@ static int __iommu_attach_group(struct iommu_domain *domain, ret = __iommu_group_for_each_dev(group, domain, iommu_group_do_attach_device); - if (ret == 0) + if (ret == 0) { group->domain = domain; + } else { + /* + * To recover from the case when certain device within the + * group fails to attach to the new domain, we need force + * attaching all devices back to the old domain. The old + * domain is compatible for all devices in the group, + * hence the iommu driver should always return success. + */ + struct iommu_domain *old_domain = group->domain; + + group->domain = NULL; + WARN(__iommu_group_set_domain(group, old_domain), + "iommu driver failed to attach a compatible domain"); + } return ret; } -- cgit v1.2.3 From f451c7a5a3b818ecfeba2ba258570769998baf3a Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 15 Feb 2023 05:26:41 +0000 Subject: iommu/amd: Skip attach device domain is same as new domain If device->domain is same as new domain then we can skip the device attach process. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20230215052642.6016-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f97c0bf930a7..2aff392d9895 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2184,6 +2184,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct amd_iommu *iommu = rlookup_amd_iommu(dev); int ret; + /* + * Skip attach device to domain if new domain is same as + * devices current domain + */ + if (dev_data->domain == domain) + return 0; + dev_data->defer_attach = false; if (dev_data->domain) -- cgit v1.2.3