From efc30a8f15a7981737297f9e9c62fb814d74e268 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 9 Dec 2022 13:23:08 -0400 Subject: iommu: Add iommu_group_has_isolated_msi() Compute the isolated_msi over all the devices in the IOMMU group because iommufd and vfio both need to know that the entire group is isolated before granting access to it. Link: https://lore.kernel.org/r/2-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index de91dd88705b..4c2199a493d8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "dma-iommu.h" @@ -1897,6 +1898,31 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap) } EXPORT_SYMBOL_GPL(device_iommu_capable); +/** + * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() + * for a group + * @group: Group to query + * + * IOMMU groups should not have differing values of + * msi_device_has_isolated_msi() for devices in a group. However nothing + * directly prevents this, so ensure mistakes don't result in isolation failures + * by checking that all the devices are the same. + */ +bool iommu_group_has_isolated_msi(struct iommu_group *group) +{ + struct group_device *group_dev; + bool ret = true; + + mutex_lock(&group->mutex); + list_for_each_entry(group_dev, &group->devices, list) { + ret &= msi_device_has_isolated_msi(group_dev->dev) || + device_iommu_capable(group_dev->dev, IOMMU_CAP_INTR_REMAP); + } + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); + /** * iommu_set_fault_handler() - set a fault handler for an iommu domain * @domain: iommu domain -- cgit v1.2.3 From 6b1a7a00427cb54a99237c5f27f1a2875c9960b0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 8 Dec 2022 11:10:55 -0400 Subject: vfio/type1: Convert to iommu_group_has_isolated_msi() Trivially use the new API. Link: https://lore.kernel.org/r/3-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/vfio/vfio_iommu_type1.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 23c24fe98c00..393b27a3bd87 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -37,7 +37,6 @@ #include #include #include -#include #include "vfio.h" #define DRIVER_VERSION "0.2" @@ -2160,12 +2159,6 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, list_splice_tail(iova_copy, iova); } -/* Redundantly walks non-present capabilities to simplify caller */ -static int vfio_iommu_device_capable(struct device *dev, void *data) -{ - return device_iommu_capable(dev, (enum iommu_cap)data); -} - static int vfio_iommu_domain_alloc(struct device *dev, void *data) { struct iommu_domain **domain = data; @@ -2180,7 +2173,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; struct vfio_domain *domain, *d; - bool resv_msi, msi_remap; + bool resv_msi; phys_addr_t resv_msi_base = 0; struct iommu_domain_geometry *geo; LIST_HEAD(iova_copy); @@ -2278,11 +2271,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); - msi_remap = irq_domain_check_msi_remap() || - iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP, - vfio_iommu_device_capable); - - if (!allow_unsafe_interrupts && !msi_remap) { + if (!allow_unsafe_interrupts && + !iommu_group_has_isolated_msi(iommu_group)) { pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", __func__); ret = -EPERM; -- cgit v1.2.3 From 25fc417f79381760382c34be699273554c8c5cc0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 8 Dec 2022 10:50:30 -0400 Subject: iommufd: Convert to msi_device_has_isolated_msi() Trivially use the new API. Link: https://lore.kernel.org/r/4-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index d81f93a321af..9f3b9674d72e 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -4,7 +4,6 @@ #include #include #include -#include #include "io_pagetable.h" #include "iommufd_private.h" @@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, * operation from the device (eg a simple DMA) cannot trigger an * interrupt outside this iommufd context. */ - if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) && - !irq_domain_check_msi_remap()) { + if (!iommu_group_has_isolated_msi(idev->group)) { if (!allow_unsafe_interrupts) return -EPERM; -- cgit v1.2.3 From dcb83f6ec1bf08a44b3f19719b56e8dc18058ff5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:12:43 -0400 Subject: genirq/msi: Rename IRQ_DOMAIN_MSI_REMAP to IRQ_DOMAIN_ISOLATED_MSI What x86 calls "interrupt remapping" is one way to achieve isolated MSI, make it clear this is talking about isolated MSI, no matter how it is achieved. This matches the new driver facing API name of msi_device_has_isolated_msi() No functional change. Link: https://lore.kernel.org/r/6-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- include/linux/irqdomain.h | 6 ++++-- kernel/irq/msi.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 973ede0197e3..b4069f825a9b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4692,7 +4692,7 @@ static bool __maybe_unused its_enable_quirk_socionext_synquacer(void *data) } /* the pre-ITS breaks isolation, so disable MSI remapping */ - its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP; + its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_ISOLATED_MSI; return true; } return false; @@ -5074,7 +5074,7 @@ static int __init its_probe_one(struct resource *res, its->cmd_write = its->cmd_base; its->fwnode_handle = handle; its->get_msi_base = its_irq_get_msi_base; - its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP; + its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI; its_enable_quirks(its); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b04ce03d3bb6..0a3e974b7288 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,8 +192,10 @@ enum { /* Irq domain implements MSIs */ IRQ_DOMAIN_FLAG_MSI = (1 << 4), - /* Irq domain implements MSI remapping */ - IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* + * Irq domain implements isolated MSI, see msi_device_has_isolated_msi() + */ + IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5), /* Irq domain doesn't translate anything */ IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index dfb5d40abac9..ac5e224a11b9 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1645,7 +1645,7 @@ bool msi_device_has_isolated_msi(struct device *dev) struct irq_domain *domain = dev_get_msi_domain(dev); for (; domain; domain = domain->parent) - if (domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP) + if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI) return true; return false; } -- cgit v1.2.3 From f188bdb5f1df5b35374482ea0568de85f1f468ea Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:18:14 -0400 Subject: iommu/x86: Replace IOMMU_CAP_INTR_REMAP with IRQ_DOMAIN_FLAG_ISOLATED_MSI On x86 platforms when the HW can support interrupt remapping the iommu driver creates an irq_domain for the IR hardware and creates a child MSI irq_domain. When the global irq_remapping_enabled is set, the IR MSI domain is assigned to the PCI devices (by intel_irq_remap_add_device(), or amd_iommu_set_pci_msi_domain()) making those devices have the isolated MSI property. Due to how interrupt domains work, setting IRQ_DOMAIN_FLAG_ISOLATED_MSI on the parent IR domain will cause all struct devices attached to it to return true from msi_device_has_isolated_msi(). This replaces the IOMMU_CAP_INTR_REMAP flag as all places using IOMMU_CAP_INTR_REMAP also call msi_device_has_isolated_msi() Set the flag and delete the cap. Link: https://lore.kernel.org/r/7-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Acked-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- drivers/iommu/amd/iommu.c | 5 ++--- drivers/iommu/intel/iommu.c | 2 -- drivers/iommu/intel/irq_remapping.c | 3 ++- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cbeaab55c0db..321d50e9df5b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2271,8 +2271,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return (irq_remapping_enabled == 1); case IOMMU_CAP_NOEXEC: return false; case IOMMU_CAP_PRE_BOOT_PROTECTION: @@ -3671,7 +3669,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) } irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | + IRQ_DOMAIN_FLAG_ISOLATED_MSI; if (amd_iommu_np_cache) iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 59df7e42fd53..7cfab5fd5e59 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4464,8 +4464,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return irq_remapping_enabled == 1; case IOMMU_CAP_PRE_BOOT_PROTECTION: return dmar_platform_optin(); case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index f58f5f57af78..6d01fa078c36 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -573,7 +573,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) } irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | + IRQ_DOMAIN_FLAG_ISOLATED_MSI; if (cap_caching_mode(iommu->cap)) iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops; -- cgit v1.2.3 From bf210f793937a634bae6eda6a6d699c00b2b53d9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:31:57 -0400 Subject: irq/s390: Add arch_is_isolated_msi() for s390 s390 doesn't use irq_domains, so it has no place to set IRQ_DOMAIN_FLAG_ISOLATED_MSI. Instead of continuing to abuse the iommu subsystem to convey this information add a simple define which s390 can make statically true. The define will cause msi_device_has_isolated() to return true. Remove IOMMU_CAP_INTR_REMAP from the s390 iommu driver. Link: https://lore.kernel.org/r/8-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Reviewed-by: Matthew Rosato Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- arch/s390/include/asm/msi.h | 17 +++++++++++++++++ drivers/iommu/s390-iommu.c | 2 -- include/linux/msi.h | 10 +++++++--- kernel/irq/msi.c | 2 +- 4 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 arch/s390/include/asm/msi.h (limited to 'drivers') diff --git a/arch/s390/include/asm/msi.h b/arch/s390/include/asm/msi.h new file mode 100644 index 000000000000..399343ed9ffb --- /dev/null +++ b/arch/s390/include/asm/msi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_MSI_H +#define _ASM_S390_MSI_H +#include + +/* + * Work around S390 not using irq_domain at all so we can't set + * IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works: + * + * https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/ + * + * Note this is less isolated than the ARM/x86 versions as userspace can trigger + * MSI belonging to kernel devices within the same gisa. + */ +#define arch_is_isolated_msi() true + +#endif diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ed33c6cce083..bb00580a30d8 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; - case IOMMU_CAP_INTR_REMAP: - return true; default: return false; } diff --git a/include/linux/msi.h b/include/linux/msi.h index e8a3f3a8a7f4..13c9b74a4575 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data { } __attribute__ ((packed)) arch_msi_msg_data_t; #endif +#ifndef arch_is_isolated_msi +#define arch_is_isolated_msi() false +#endif + /** * msi_msg - Representation of a MSI message * @address_lo: Low 32 bits of msi message address @@ -657,10 +661,10 @@ static inline bool msi_device_has_isolated_msi(struct device *dev) /* * Arguably if the platform does not enable MSI support then it has * "isolated MSI", as an interrupt controller that cannot receive MSIs - * is inherently isolated by our definition. As nobody seems to needs - * this be conservative and return false anyhow. + * is inherently isolated by our definition. The default definition for + * arch_is_isolated_msi() is conservative and returns false anyhow. */ - return false; + return arch_is_isolated_msi(); } #endif /* CONFIG_GENERIC_MSI_IRQ */ diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ac5e224a11b9..4dec57fc4ea6 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1647,6 +1647,6 @@ bool msi_device_has_isolated_msi(struct device *dev) for (; domain; domain = domain->parent) if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI) return true; - return false; + return arch_is_isolated_msi(); } EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi); -- cgit v1.2.3 From b062007c63eb4452f1122384e86d402531fb1d52 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:34:32 -0400 Subject: iommu: Remove IOMMU_CAP_INTR_REMAP No iommu driver implements this any more, get rid of it. Link: https://lore.kernel.org/r/9-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommu.c | 6 ++---- include/linux/iommu.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4c2199a493d8..834e6ecf3e51 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1914,10 +1914,8 @@ bool iommu_group_has_isolated_msi(struct iommu_group *group) bool ret = true; mutex_lock(&group->mutex); - list_for_each_entry(group_dev, &group->devices, list) { - ret &= msi_device_has_isolated_msi(group_dev->dev) || - device_iommu_capable(group_dev->dev, IOMMU_CAP_INTR_REMAP); - } + list_for_each_entry(group_dev, &group->devices, list) + ret &= msi_device_has_isolated_msi(group_dev->dev); mutex_unlock(&group->mutex); return ret; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9b7a9fa5ad28..933cc57bfc48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain) enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */ - IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for DMA protection and we should too */ -- cgit v1.2.3 From 84798f2849942bb5e8817417adfdfa6241df2835 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 20 Jan 2023 04:20:40 -0800 Subject: iommufd: Add three missing structures in ucmd_buffer struct iommu_ioas_copy, struct iommu_option and struct iommu_vfio_ioas are missed in ucmd_buffer. Although they are smaller than the size of ucmd_buffer, it is safer to list them in ucmd_buffer explicitly. Fixes: aad37e71d5c4 ("iommufd: IOCTLs for the io_pagetable") Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility") Link: https://lore.kernel.org/r/20230120122040.280219-1-yi.l.liu@intel.com Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 083e6fcbe10a..3fbe636c3d8a 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -252,9 +252,12 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; + struct iommu_ioas_copy ioas_copy; struct iommu_ioas_iova_ranges iova_ranges; struct iommu_ioas_map map; struct iommu_ioas_unmap unmap; + struct iommu_option option; + struct iommu_vfio_ioas vfio_ioas; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif -- cgit v1.2.3 From c9a397cee9f5c93a7f48e18038b14057044db6ba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 18 Jan 2023 13:50:28 -0400 Subject: vfio: Support VFIO_NOIOMMU with iommufd Add a small amount of emulation to vfio_compat to accept the SET_IOMMU to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working on a no-iommu enabled device. Move the enable_unsafe_noiommu_mode module out of container.c into vfio_main.c so that it is always available even if VFIO_CONTAINER=n. This passes Alex's mini-test: https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c Link: https://lore.kernel.org/r/0-v3-480cd64a16f7+1ad0-iommufd_noiommu_jgg@nvidia.com Reviewed-by: Kevin Tian Acked-by: Alex Williamson Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/Kconfig | 2 +- drivers/iommu/iommufd/iommufd_private.h | 2 + drivers/iommu/iommufd/vfio_compat.c | 105 ++++++++++++++++++++++++++------ drivers/vfio/Kconfig | 2 +- drivers/vfio/container.c | 7 --- drivers/vfio/group.c | 7 ++- drivers/vfio/iommufd.c | 19 +++++- drivers/vfio/vfio.h | 8 ++- drivers/vfio/vfio_main.c | 7 +++ include/linux/iommufd.h | 12 +++- 10 files changed, 136 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 8306616b6d81..ada693ea51a7 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER removed. IOMMUFD VFIO container emulation is known to lack certain features - of the native VFIO container, such as no-IOMMU support, peer-to-peer + of the native VFIO container, such as peer-to-peer DMA mapping, PPC IOMMU support, as well as other potentially undiscovered gaps. This option is currently intended for the purpose of testing IOMMUFD with unmodified userspace supporting VFIO diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 222e86591f8a..9d7f71510ca1 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -18,6 +18,8 @@ struct iommufd_ctx { struct xarray objects; u8 account_mode; + /* Compatibility with VFIO no iommu */ + u8 no_iommu_mode; struct iommufd_ioas *vfio_ioas; }; diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 3ceca0e8311c..8e38bb02d5e1 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -26,39 +26,84 @@ out_unlock: } /** - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use + * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists + * @ictx: Context to operate on + * @out_ioas_id: The IOAS ID of the compatibility IOAS + * + * Return the ID of the current compatibility IOAS. The ID can be passed into + * other functions that take an ioas_id. + */ +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +{ + struct iommufd_ioas *ioas; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + *out_ioas_id = ioas->obj.id; + iommufd_put_object(&ioas->obj); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); + +/** + * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached + * @ictx: Context to operate on + * + * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types. + */ +int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) +{ + int ret; + + xa_lock(&ictx->objects); + if (!ictx->vfio_ioas) { + ictx->no_iommu_mode = 1; + ret = 0; + } else { + ret = -EINVAL; + } + xa_unlock(&ictx->objects); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO); + +/** + * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created * @ictx: Context to operate on - * @out_ioas_id: The ioas_id the caller should use * * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate * on since they do not have an IOAS ID input in their ABI. Only attaching a - * group should cause a default creation of the internal ioas, this returns the - * existing ioas if it has already been assigned somehow. + * group should cause a default creation of the internal ioas, this does nothing + * if an existing ioas has already been assigned somehow. */ -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) { struct iommufd_ioas *ioas = NULL; - struct iommufd_ioas *out_ioas; + int ret; ioas = iommufd_ioas_alloc(ictx); if (IS_ERR(ioas)) return PTR_ERR(ioas); xa_lock(&ictx->objects); - if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) - out_ioas = ictx->vfio_ioas; - else { - out_ioas = ioas; - ictx->vfio_ioas = ioas; + /* + * VFIO won't allow attaching a container to both iommu and no iommu + * operation + */ + if (ictx->no_iommu_mode) { + ret = -EINVAL; + goto out_abort; } - xa_unlock(&ictx->objects); - *out_ioas_id = out_ioas->obj.id; - if (out_ioas != ioas) { - iommufd_put_object(&out_ioas->obj); - iommufd_object_abort(ictx, &ioas->obj); - return 0; + if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { + ret = 0; + iommufd_put_object(&ictx->vfio_ioas->obj); + goto out_abort; } + ictx->vfio_ioas = ioas; + xa_unlock(&ictx->objects); + /* * An automatically created compat IOAS is treated as a userspace * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, @@ -67,8 +112,13 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) */ iommufd_object_finalize(ictx, &ioas->obj); return 0; + +out_abort: + xa_unlock(&ictx->objects); + iommufd_object_abort(ictx, &ioas->obj); + return ret; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) { @@ -235,6 +285,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, case VFIO_UNMAP_ALL: return 1; + case VFIO_NOIOMMU_IOMMU: + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); + case VFIO_DMA_CC_IOMMU: return iommufd_vfio_cc_iommu(ictx); @@ -261,10 +314,24 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) { + bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode); struct iommufd_ioas *ioas = NULL; int rc = 0; - if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) + /* + * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all + * other ioctls. We let them keep working but they mostly fail since no + * IOAS should exist. + */ + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU && + no_iommu_mode) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + return 0; + } + + if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) || + no_iommu_mode) return -EINVAL; /* VFIO fails the set_iommu if there is no group */ diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index a8f544629467..89e06c981e43 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -32,6 +32,7 @@ config VFIO_IOMMU_SPAPR_TCE tristate depends on SPAPR_TCE_IOMMU default VFIO +endif config VFIO_NOIOMMU bool "VFIO No-IOMMU support" @@ -46,7 +47,6 @@ config VFIO_NOIOMMU this mode since there is no IOMMU to provide DMA translation. If you don't know what to do here, say N. -endif config VFIO_VIRQFD bool diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index b7a9560ab25e..89f10becf962 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -29,13 +29,6 @@ static struct vfio { struct mutex iommu_drivers_lock; } vfio; -#ifdef CONFIG_VFIO_NOIOMMU -bool vfio_noiommu __read_mostly; -module_param_named(enable_unsafe_noiommu_mode, - vfio_noiommu, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); -#endif - static void *vfio_noiommu_open(unsigned long arg) { if (arg != VFIO_NOIOMMU_IOMMU) diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index bb24b2f0271e..e166ad7ce6e7 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -133,9 +133,12 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, iommufd = iommufd_ctx_from_file(f.file); if (!IS_ERR(iommufd)) { - u32 ioas_id; + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && + group->type == VFIO_NO_IOMMU) + ret = iommufd_vfio_compat_set_no_iommu(iommufd); + else + ret = iommufd_vfio_compat_ioas_create(iommufd); - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); if (ret) { iommufd_ctx_put(group->iommufd); goto out_unlock; diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 4f82a6fa7c6c..db4efbd56042 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -18,6 +18,20 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) lockdep_assert_held(&vdev->dev_set->lock); + if (vfio_device_is_noiommu(vdev)) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + /* + * Require no compat ioas to be assigned to proceed. The basic + * statement is that the user cannot have done something that + * implies they expected translation to exist + */ + if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) + return -EPERM; + return 0; + } + /* * If the driver doesn't provide this op then it means the device does * not do DMA at all. So nothing to do. @@ -29,7 +43,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) if (ret) return ret; - ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); if (ret) goto err_unbind; ret = vdev->ops->attach_ioas(vdev, &ioas_id); @@ -52,6 +66,9 @@ void vfio_iommufd_unbind(struct vfio_device *vdev) { lockdep_assert_held(&vdev->dev_set->lock); + if (vfio_device_is_noiommu(vdev)) + return; + if (vdev->ops->unbind_iommufd) vdev->ops->unbind_iommufd(vdev); } diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index f8219a438bfb..9e94abcf8ee1 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -10,10 +10,10 @@ #include #include #include +#include struct iommufd_ctx; struct iommu_group; -struct vfio_device; struct vfio_container; void vfio_device_put_registration(struct vfio_device *device); @@ -88,6 +88,12 @@ bool vfio_device_has_container(struct vfio_device *device); int __init vfio_group_init(void); void vfio_group_cleanup(void); +static inline bool vfio_device_is_noiommu(struct vfio_device *vdev) +{ + return IS_ENABLED(CONFIG_VFIO_NOIOMMU) && + vdev->group->type == VFIO_NO_IOMMU; +} + #if IS_ENABLED(CONFIG_VFIO_CONTAINER) /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 5177bb061b17..90541fc94988 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -45,6 +45,13 @@ static struct vfio { struct ida device_ida; } vfio; +#ifdef CONFIG_VFIO_NOIOMMU +bool vfio_noiommu __read_mostly; +module_param_named(enable_unsafe_noiommu_mode, + vfio_noiommu, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); +#endif + static DEFINE_XARRAY(vfio_device_set_xa); int vfio_assign_device_set(struct vfio_device *device, void *set_id) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 650d45629647..c0b5b3ac34f1 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -57,7 +57,9 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx); +int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -89,8 +91,12 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long return -EOPNOTSUPP; } -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, - u32 *out_ioas_id) +static inline int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) { return -EOPNOTSUPP; } -- cgit v1.2.3 From b3551ead616318ea155558cdbe7e91495b8d9b33 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 13 Feb 2023 10:32:21 -0400 Subject: iommufd: Make sure to zero vfio_iommu_type1_info before copying to user Missed a zero initialization here. Most of the struct is filled with a copy_from_user(), however minsz for that copy is smaller than the actual struct by 8 bytes, thus we don't fill the padding. Cc: stable@vger.kernel.org # 6.1+ Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility") Link: https://lore.kernel.org/r/0-v1-a74499ece799+1a-iommufd_get_info_leak_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: syzbot+cb1e0978f6bf46b83a58@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/vfio_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 8e38bb02d5e1..514494a0025b 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -448,7 +448,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, }; size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); struct vfio_info_cap_header __user *last_cap = NULL; - struct vfio_iommu_type1_info info; + struct vfio_iommu_type1_info info = {}; struct iommufd_ioas *ioas; size_t total_cap_size; int rc; -- cgit v1.2.3 From b4ff830eca097df51af10a9be29e8cc817327919 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 13 Feb 2023 14:02:42 -0400 Subject: iommufd: Do not add the same hwpt to the ioas->hwpt_list twice The hwpt is added to the hwpt_list only during its creation, it is never added again. This hunk is some missed leftover from rework. Adding it twice will corrupt the linked list in some cases. It effects HWPT specific attachment, which is something the test suite cannot cover until we can create a legitimate struct device with a non-system iommu "driver" (ie we need the bus removed from the iommu code) Cc: stable@vger.kernel.org Fixes: e8d57210035b ("iommufd: Add kAPI toward external drivers for physical devices") Link: https://lore.kernel.org/r/1-v1-4336b5cb2fe4+1d7-iommufd_hwpt_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/device.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9f3b9674d72e..a0c66f47a65a 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -344,10 +344,6 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) rc = iommufd_device_do_attach(idev, hwpt); if (rc) goto out_put_pt_obj; - - mutex_lock(&hwpt->ioas->mutex); - list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); - mutex_unlock(&hwpt->ioas->mutex); break; } case IOMMUFD_OBJ_IOAS: { -- cgit v1.2.3