From ef3a3f6a294ba65fd906a291553935881796f8a5 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Tue, 31 Jan 2023 08:58:03 -0800 Subject: vfio/type1: exclude mdevs from VFIO_UPDATE_VADDR Disable the VFIO_UPDATE_VADDR capability if mediated devices are present. Their kernel threads could be blocked indefinitely by a misbehaving userland while trying to pin/unpin pages while vaddrs are being updated. Do not allow groups to be added to the container while vaddr's are invalid, so we never need to block user threads from pinning, and can delete the vaddr-waiting code in a subsequent patch. Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr") Cc: stable@vger.kernel.org Signed-off-by: Steve Sistare Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1675184289-267876-2-git-send-email-steven.sistare@oracle.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 23105eb036fa..0552e8dcf0cb 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -49,7 +49,11 @@ /* Supports VFIO_DMA_UNMAP_FLAG_ALL */ #define VFIO_UNMAP_ALL 9 -/* Supports the vaddr flag for DMA map and unmap */ +/* + * Supports the vaddr flag for DMA map and unmap. Not supported for mediated + * devices, so this capability is subject to change as groups are added or + * removed. + */ #define VFIO_UPDATE_VADDR 10 /* @@ -1343,8 +1347,7 @@ struct vfio_iommu_type1_info_dma_avail { * Map process virtual addresses to IO virtual addresses using the * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. * - * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and - * unblock translation of host virtual addresses in the iova range. The vaddr + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To * maintain memory consistency within the user application, the updated vaddr * must address the same memory object as originally mapped. Failure to do so @@ -1395,9 +1398,9 @@ struct vfio_bitmap { * must be 0. This cannot be combined with the get-dirty-bitmap flag. * * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host - * virtual addresses in the iova range. Tasks that attempt to translate an - * iova's vaddr will block. DMA to already-mapped pages continues. This - * cannot be combined with the get-dirty-bitmap flag. + * virtual addresses in the iova range. DMA to already-mapped pages continues. + * Groups may not be added to the container while any addresses are invalid. + * This cannot be combined with the get-dirty-bitmap flag. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; -- cgit v1.2.3 From 2b48f52f2bff8e8926165983f3a3d7b89b33de08 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 3 Feb 2023 16:50:26 -0500 Subject: vfio: fix deadlock between group lock and kvm lock After 51cdc8bc120e, we have another deadlock scenario between the kvm->lock and the vfio group_lock with two different codepaths acquiring the locks in different order. Specifically in vfio_open_device, vfio holds the vfio group_lock when issuing device->ops->open_device but some drivers (like vfio-ap) need to acquire kvm->lock during their open_device routine; Meanwhile, kvm_vfio_release will acquire the kvm->lock first before calling vfio_file_set_kvm which will acquire the vfio group_lock. To resolve this, let's remove the need for the vfio group_lock from the kvm_vfio_release codepath. This is done by introducing a new spinlock to protect modifications to the vfio group kvm pointer, and acquiring a kvm ref from within vfio while holding this spinlock, with the reference held until the last close for the device in question. Fixes: 51cdc8bc120e ("kvm/vfio: Fix potential deadlock on vfio group_lock") Reported-by: Anthony Krowiak Suggested-by: Jason Gunthorpe Signed-off-by: Matthew Rosato Tested-by: Tony Krowiak Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20230203215027.151988-2-mjrosato@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/vfio/group.c | 44 +++++++++++++++++++++++++++------ drivers/vfio/vfio.h | 15 ++++++++++++ drivers/vfio/vfio_main.c | 63 ++++++++++++++++++++++++++++++++++++++++++------ include/linux/vfio.h | 2 +- 4 files changed, 109 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index bb24b2f0271e..98621ac082f0 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -154,6 +154,18 @@ out_unlock: return ret; } +static void vfio_device_group_get_kvm_safe(struct vfio_device *device) +{ + spin_lock(&device->group->kvm_ref_lock); + if (!device->group->kvm) + goto unlock; + + _vfio_device_get_kvm_safe(device, device->group->kvm); + +unlock: + spin_unlock(&device->group->kvm_ref_lock); +} + static int vfio_device_group_open(struct vfio_device *device) { int ret; @@ -164,13 +176,23 @@ static int vfio_device_group_open(struct vfio_device *device) goto out_unlock; } + mutex_lock(&device->dev_set->lock); + /* - * Here we pass the KVM pointer with the group under the lock. If the - * device driver will use it, it must obtain a reference and release it - * during close_device. + * Before the first device open, get the KVM pointer currently + * associated with the group (if there is one) and obtain a reference + * now that will be held until the open_count reaches 0 again. Save + * the pointer in the device for use by drivers. */ - ret = vfio_device_open(device, device->group->iommufd, - device->group->kvm); + if (device->open_count == 0) + vfio_device_group_get_kvm_safe(device); + + ret = vfio_device_open(device, device->group->iommufd, device->kvm); + + if (device->open_count == 0) + vfio_device_put_kvm(device); + + mutex_unlock(&device->dev_set->lock); out_unlock: mutex_unlock(&device->group->group_lock); @@ -180,7 +202,14 @@ out_unlock: void vfio_device_group_close(struct vfio_device *device) { mutex_lock(&device->group->group_lock); + mutex_lock(&device->dev_set->lock); + vfio_device_close(device, device->group->iommufd); + + if (device->open_count == 0) + vfio_device_put_kvm(device); + + mutex_unlock(&device->dev_set->lock); mutex_unlock(&device->group->group_lock); } @@ -450,6 +479,7 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, refcount_set(&group->drivers, 1); mutex_init(&group->group_lock); + spin_lock_init(&group->kvm_ref_lock); INIT_LIST_HEAD(&group->device_list); mutex_init(&group->device_lock); group->iommu_group = iommu_group; @@ -803,9 +833,9 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) if (!vfio_file_is_group(file)) return; - mutex_lock(&group->group_lock); + spin_lock(&group->kvm_ref_lock); group->kvm = kvm; - mutex_unlock(&group->group_lock); + spin_unlock(&group->kvm_ref_lock); } EXPORT_SYMBOL_GPL(vfio_file_set_kvm); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index d5fa896b5a85..8ec3a5db23f5 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -74,6 +74,7 @@ struct vfio_group { struct file *opened_file; struct blocking_notifier_head notifier; struct iommufd_ctx *iommufd; + spinlock_t kvm_ref_lock; }; int vfio_device_set_group(struct vfio_device *device, @@ -244,4 +245,18 @@ extern bool vfio_noiommu __read_mostly; enum { vfio_noiommu = false }; #endif +#ifdef CONFIG_HAVE_KVM +void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm); +void vfio_device_put_kvm(struct vfio_device *device); +#else +static inline void _vfio_device_get_kvm_safe(struct vfio_device *device, + struct kvm *kvm) +{ +} + +static inline void vfio_device_put_kvm(struct vfio_device *device) +{ +} +#endif + #endif diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 5177bb061b17..28c47cd6a6b5 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -16,6 +16,9 @@ #include #include #include +#ifdef CONFIG_HAVE_KVM +#include +#endif #include #include #include @@ -338,6 +341,55 @@ void vfio_unregister_group_dev(struct vfio_device *device) } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); +#ifdef CONFIG_HAVE_KVM +void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) +{ + void (*pfn)(struct kvm *kvm); + bool (*fn)(struct kvm *kvm); + bool ret; + + lockdep_assert_held(&device->dev_set->lock); + + pfn = symbol_get(kvm_put_kvm); + if (WARN_ON(!pfn)) + return; + + fn = symbol_get(kvm_get_kvm_safe); + if (WARN_ON(!fn)) { + symbol_put(kvm_put_kvm); + return; + } + + ret = fn(kvm); + symbol_put(kvm_get_kvm_safe); + if (!ret) { + symbol_put(kvm_put_kvm); + return; + } + + device->put_kvm = pfn; + device->kvm = kvm; +} + +void vfio_device_put_kvm(struct vfio_device *device) +{ + lockdep_assert_held(&device->dev_set->lock); + + if (!device->kvm) + return; + + if (WARN_ON(!device->put_kvm)) + goto clear; + + device->put_kvm(device->kvm); + device->put_kvm = NULL; + symbol_put(kvm_put_kvm); + +clear: + device->kvm = NULL; +} +#endif + /* true if the vfio_device has open_device() called but not close_device() */ static bool vfio_assert_device_open(struct vfio_device *device) { @@ -361,7 +413,6 @@ static int vfio_device_first_open(struct vfio_device *device, if (ret) goto err_module_put; - device->kvm = kvm; if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) @@ -370,7 +421,6 @@ static int vfio_device_first_open(struct vfio_device *device, return 0; err_unuse_iommu: - device->kvm = NULL; if (iommufd) vfio_iommufd_unbind(device); else @@ -387,7 +437,6 @@ static void vfio_device_last_close(struct vfio_device *device, if (device->ops->close_device) device->ops->close_device(device); - device->kvm = NULL; if (iommufd) vfio_iommufd_unbind(device); else @@ -400,14 +449,14 @@ int vfio_device_open(struct vfio_device *device, { int ret = 0; - mutex_lock(&device->dev_set->lock); + lockdep_assert_held(&device->dev_set->lock); + device->open_count++; if (device->open_count == 1) { ret = vfio_device_first_open(device, iommufd, kvm); if (ret) device->open_count--; } - mutex_unlock(&device->dev_set->lock); return ret; } @@ -415,12 +464,12 @@ int vfio_device_open(struct vfio_device *device, void vfio_device_close(struct vfio_device *device, struct iommufd_ctx *iommufd) { - mutex_lock(&device->dev_set->lock); + lockdep_assert_held(&device->dev_set->lock); + vfio_assert_device_open(device); if (device->open_count == 1) vfio_device_last_close(device, iommufd); device->open_count--; - mutex_unlock(&device->dev_set->lock); } /* diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 35be78e9ae57..87ff862ff555 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -46,7 +46,6 @@ struct vfio_device { struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; - /* Driver must reference the kvm during open_device or never touch it */ struct kvm *kvm; /* Members below here are private, not for driver use */ @@ -58,6 +57,7 @@ struct vfio_device { struct list_head group_next; struct list_head iommu_entry; struct iommufd_access *iommufd_access; + void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; struct iommufd_ctx *iommufd_ictx; -- cgit v1.2.3 From fae9068022186b832534ea8b325d5242586c4303 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 9 Feb 2023 00:12:09 -0800 Subject: vfio: Update the kdoc for vfio_device_ops this is missed when adding bind_iommufd/unbind_iommufd and attach_ioas. Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230209081210.141372-2-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 87ff862ff555..93134b023968 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -70,6 +70,10 @@ struct vfio_device { * * @init: initialize private fields in device structure * @release: Reclaim private fields in device structure + * @bind_iommufd: Called when binding the device to an iommufd + * @unbind_iommufd: Opposite of bind_iommufd + * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the + * bound iommufd. Undo in unbind_iommufd. * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor -- cgit v1.2.3