From 309bba39c945ac8ab8083ac05cd6cfe5822968e0 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 24 Jun 2022 09:56:56 +0200 Subject: vringh: iterate on iotlb_translate to handle large translations iotlb_translate() can return -ENOBUFS if the bio_vec is not big enough to contain all the ranges for translation. This can happen for example if the VMM maps a large bounce buffer, without using hugepages, that requires more than 16 ranges to translate the addresses. To handle this case, let's extend iotlb_translate() to also return the number of bytes successfully translated. In copy_from_iotlb()/copy_to_iotlb() loops by calling iotlb_translate() several times until we complete the translation. Signed-off-by: Stefano Garzarella Message-Id: <20220624075656.13997-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 78 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 22 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index eab55accf381..11f59dd06a74 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern); #if IS_REACHABLE(CONFIG_VHOST_IOTLB) static int iotlb_translate(const struct vringh *vrh, - u64 addr, u64 len, struct bio_vec iov[], + u64 addr, u64 len, u64 *translated, + struct bio_vec iov[], int iov_size, u32 perm) { struct vhost_iotlb_map *map; @@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh, spin_unlock(vrh->iotlb_lock); + if (translated) + *translated = min(len, s); + return ret; } static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { - struct iov_iter iter; - struct bio_vec iov[16]; - int ret; + u64 total_translated = 0; - ret = iotlb_translate(vrh, (u64)(uintptr_t)src, - len, iov, 16, VHOST_MAP_RO); - if (ret < 0) - return ret; + while (total_translated < len) { + struct bio_vec iov[16]; + struct iov_iter iter; + u64 translated; + int ret; - iov_iter_bvec(&iter, READ, iov, ret, len); + ret = iotlb_translate(vrh, (u64)(uintptr_t)src, + len - total_translated, &translated, + iov, ARRAY_SIZE(iov), VHOST_MAP_RO); + if (ret == -ENOBUFS) + ret = ARRAY_SIZE(iov); + else if (ret < 0) + return ret; - ret = copy_from_iter(dst, len, &iter); + iov_iter_bvec(&iter, READ, iov, ret, translated); - return ret; + ret = copy_from_iter(dst, translated, &iter); + if (ret < 0) + return ret; + + src += translated; + dst += translated; + total_translated += translated; + } + + return total_translated; } static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { - struct iov_iter iter; - struct bio_vec iov[16]; - int ret; + u64 total_translated = 0; - ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, - len, iov, 16, VHOST_MAP_WO); - if (ret < 0) - return ret; + while (total_translated < len) { + struct bio_vec iov[16]; + struct iov_iter iter; + u64 translated; + int ret; + + ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, + len - total_translated, &translated, + iov, ARRAY_SIZE(iov), VHOST_MAP_WO); + if (ret == -ENOBUFS) + ret = ARRAY_SIZE(iov); + else if (ret < 0) + return ret; - iov_iter_bvec(&iter, WRITE, iov, ret, len); + iov_iter_bvec(&iter, WRITE, iov, ret, translated); + + ret = copy_to_iter(src, translated, &iter); + if (ret < 0) + return ret; + + src += translated; + dst += translated; + total_translated += translated; + } - return copy_to_iter(src, len, &iter); + return total_translated; } static inline int getu16_iotlb(const struct vringh *vrh, @@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh, int ret; /* Atomic read is needed for getu16 */ - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, &iov, 1, VHOST_MAP_RO); if (ret < 0) return ret; @@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh, int ret; /* Atomic write is needed for putu16 */ - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, &iov, 1, VHOST_MAP_WO); if (ret < 0) return ret; -- cgit v1.2.3 From 366958a7fd7b541cba478e2866ef08f34e20471b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 8 Jun 2022 12:48:26 +0100 Subject: vdpa: Use device_iommu_capable() Use the new interface to check the capability for our device specifically. Signed-off-by: Robin Murphy Message-Id: <548e316fa282ce513fabb991a4c4d92258062eb5.1654688822.git.robin.murphy@arm.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 23dcbfdfa13b..250ad4160ccb 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1076,7 +1076,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) if (!bus) return -EFAULT; - if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) + if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) return -ENOTSUPP; v->domain = iommu_domain_alloc(bus); -- cgit v1.2.3 From 5a4b0420b28fed7e8df81ac7e3b53834142053dd Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Jul 2022 22:05:24 -0500 Subject: vhost-scsi: Fix max number of virtqueues Qemu takes it's num_queues limit then adds the fixed queues (control and event) to the total it will request from the kernel. So when a user requests 128 (or qemu does it's num_queues calculation based on vCPUS and other system limits), we hit errors due to userspace trying to setup 130 queues when vhost-scsi has a hard coded limit of 128. This has vhost-scsi adjust it's max so we can do a total of 130 virtqueues (128 IO and 2 fixed). For the case where the user has 128 vCPUs the guest OS can then nicely map each IO virtqueue to a vCPU and not have the odd case where 2 vCPUs share a virtqueue. Signed-off-by: Mike Christie Message-Id: <20220708030525.5065-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ffd9e6c2ffc1..8d6b4eef554d 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -159,7 +159,7 @@ enum { }; #define VHOST_SCSI_MAX_TARGET 256 -#define VHOST_SCSI_MAX_VQ 128 +#define VHOST_SCSI_MAX_VQ 128 + VHOST_SCSI_VQ_IO #define VHOST_SCSI_MAX_EVENT 128 struct vhost_scsi_virtqueue { -- cgit v1.2.3 From f49c2226af8444563897e25bf293ea29e377995a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 7 Jul 2022 22:05:25 -0500 Subject: vhost scsi: Allow user to control num virtqueues We are currently hard coded to always create 128 IO virtqueues, so this adds a modparam to control it. For large systems where we are ok with using memory for virtqueues it allows us to add up to 1024. This limit was just selected becuase that's qemu's limit. Signed-off-by: Mike Christie Message-Id: <20220708030525.5065-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/scsi.c | 85 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 24 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8d6b4eef554d..d9861ab2c300 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -159,9 +159,13 @@ enum { }; #define VHOST_SCSI_MAX_TARGET 256 -#define VHOST_SCSI_MAX_VQ 128 + VHOST_SCSI_VQ_IO +#define VHOST_SCSI_MAX_IO_VQ 1024 #define VHOST_SCSI_MAX_EVENT 128 +static unsigned vhost_scsi_max_io_vqs = 128; +module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644); +MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024."); + struct vhost_scsi_virtqueue { struct vhost_virtqueue vq; /* @@ -186,7 +190,9 @@ struct vhost_scsi { char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; struct vhost_dev dev; - struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ]; + struct vhost_scsi_virtqueue *vqs; + unsigned long *compl_bitmap; + struct vhost_scsi_inflight **old_inflight; struct vhost_work vs_completion_work; /* cmd completion work item */ struct llist_head vs_completion_list; /* cmd completion queue */ @@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs, struct vhost_virtqueue *vq; int idx, i; - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); @@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) { struct vhost_scsi *vs = container_of(work, struct vhost_scsi, vs_completion_work); - DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ); struct virtio_scsi_cmd_resp v_rsp; struct vhost_scsi_cmd *cmd, *t; struct llist_node *llnode; @@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) struct iov_iter iov_iter; int ret, vq; - bitmap_zero(signal, VHOST_SCSI_MAX_VQ); + bitmap_zero(vs->compl_bitmap, vs->dev.nvqs); llnode = llist_del_all(&vs->vs_completion_list); llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; @@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); vq = q - vs->vqs; - __set_bit(vq, signal); + __set_bit(vq, vs->compl_bitmap); } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); @@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } vq = -1; - while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) - < VHOST_SCSI_MAX_VQ) + while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1)) + < vs->dev.nvqs) vhost_signal(&vs->dev, &vs->vqs[vq].vq); } @@ -1421,26 +1426,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) /* Callers must hold dev mutex */ static void vhost_scsi_flush(struct vhost_scsi *vs) { - struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ]; int i; /* Init new inflight and remember the old inflight */ - vhost_scsi_init_inflight(vs, old_inflight); + vhost_scsi_init_inflight(vs, vs->old_inflight); /* * The inflight->kref was initialized to 1. We decrement it here to * indicate the start of the flush operation so that it will reach 0 * when all the reqs are finished. */ - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) - kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); + for (i = 0; i < vs->dev.nvqs; i++) + kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight); /* Flush both the vhost poll and vhost work */ vhost_dev_flush(&vs->dev); /* Wait for all reqs issued before the flush to be finished */ - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) - wait_for_completion(&old_inflight[i]->comp); + for (i = 0; i < vs->dev.nvqs; i++) + wait_for_completion(&vs->old_inflight[i]->comp); } static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) @@ -1603,7 +1607,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); - for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; if (!vhost_vq_is_setup(vq)) continue; @@ -1613,7 +1617,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, goto destroy_vq_cmds; } - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, vs_tpg); @@ -1715,7 +1719,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, target_undepend_item(&se_tpg->tpg_group.cg_item); } if (match) { - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, NULL); @@ -1724,7 +1728,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, /* Make sure cmds are not running before tearing them down. */ vhost_scsi_flush(vs); - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; vhost_scsi_destroy_vq_cmds(vq); } @@ -1764,7 +1768,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) return -EFAULT; } - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vq->acked_features = features; @@ -1778,16 +1782,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) { struct vhost_scsi *vs; struct vhost_virtqueue **vqs; - int r = -ENOMEM, i; + int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs; vs = kvzalloc(sizeof(*vs), GFP_KERNEL); if (!vs) goto err_vs; - vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL); - if (!vqs) + if (nvqs > VHOST_SCSI_MAX_IO_VQ) { + pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs, + VHOST_SCSI_MAX_IO_VQ); + nvqs = VHOST_SCSI_MAX_IO_VQ; + } else if (nvqs == 0) { + pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs); + nvqs = 1; + } + nvqs += VHOST_SCSI_VQ_IO; + + vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL); + if (!vs->compl_bitmap) + goto err_compl_bitmap; + + vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight), + GFP_KERNEL | __GFP_ZERO); + if (!vs->old_inflight) + goto err_inflight; + + vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs), + GFP_KERNEL | __GFP_ZERO); + if (!vs->vqs) goto err_vqs; + vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_local_vqs; + vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work); @@ -1798,11 +1826,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; - for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) { vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, + vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV, VHOST_SCSI_WEIGHT, 0, true, NULL); vhost_scsi_init_inflight(vs, NULL); @@ -1810,7 +1838,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) f->private_data = vs; return 0; +err_local_vqs: + kfree(vs->vqs); err_vqs: + kfree(vs->old_inflight); +err_inflight: + bitmap_free(vs->compl_bitmap); +err_compl_bitmap: kvfree(vs); err_vs: return r; @@ -1828,6 +1862,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_dev_stop(&vs->dev); vhost_dev_cleanup(&vs->dev); kfree(vs->dev.vqs); + kfree(vs->vqs); + kfree(vs->old_inflight); + bitmap_free(vs->compl_bitmap); kvfree(vs); return 0; } -- cgit v1.2.3 From ebe6a354fa7e0a7d5b581da31ad031b19d8693f9 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Fri, 5 Aug 2022 05:12:54 -0400 Subject: vhost-vdpa: Call ida_simple_remove() when failed In function vhost_vdpa_probe(), when code execution fails, we should call ida_simple_remove() to free ida. Signed-off-by: Bo Liu Message-Id: <20220805091254.20026-1-liubo03@inspur.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 250ad4160ccb..2c997d77d266 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1363,6 +1363,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) err: put_device(&v->dev); + ida_simple_remove(&vhost_vdpa_ida, v->minor); return r; } -- cgit v1.2.3 From 0723f1df5c3ec8a1112d150dab98e149361ef488 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:10 +0200 Subject: vhost-vdpa: introduce SUSPEND backend feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userland knows if it can suspend the device or not by checking this feature bit. It's only offered if the vdpa driver backend implements the suspend() operation callback, and to offer it or userland to ack it if the backend does not offer that callback is an error. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-3-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 16 +++++++++++++++- include/uapi/linux/vhost_types.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2c997d77d266..092752fea8e1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return 0; } +static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->suspend; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -577,7 +585,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (cmd == VHOST_SET_BACKEND_FEATURES) { if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) + if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_SUSPEND))) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && + !vhost_vdpa_can_suspend(v)) return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; @@ -628,6 +640,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; + if (vhost_vdpa_can_suspend(v)) + features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 634cee485abb..1bdd6e363f4c 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range { * message */ #define VHOST_BACKEND_F_IOTLB_ASID 0x3 +/* Device can be suspended */ +#define VHOST_BACKEND_F_SUSPEND 0x4 #endif -- cgit v1.2.3 From f345a0143b4dd1cfc850009c6979a3801b86a06f Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:11 +0200 Subject: vhost-vdpa: uAPI to suspend the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ioctl adds support for suspending the device from userspace. This is a must before getting virtqueue indexes (base) for live migration, since the device could modify them after userland gets them. There are individual ways to perform that action for some devices (VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no way to perform it for any vhost device (and, in particular, vhost-vdpa). After a successful return of the ioctl call the device must not process more virtqueue descriptors. The device can answer to read or writes of config fields as if it were not suspended. In particular, writing to "queue_enable" with a value of 1 will not make the device start processing buffers of the virtqueue. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-4-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 19 +++++++++++++++++++ include/uapi/linux/vhost.h | 9 +++++++++ 2 files changed, 28 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 092752fea8e1..166044642fd5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -478,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) return 0; } +/* After a successful return of ioctl the device must not process more + * virtqueue descriptors. The device can answer to read or writes of config + * fields as if it were not suspended. In particular, writing to "queue_enable" + * with a value of 1 will not make the device start processing buffers. + */ +static long vhost_vdpa_suspend(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->suspend) + return -EOPNOTSUPP; + + return ops->suspend(vdpa); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -654,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VQS_COUNT: r = vhost_vdpa_get_vqs_count(v, argp); break; + case VHOST_VDPA_SUSPEND: + r = vhost_vdpa_suspend(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index cab645d4a645..f9f115a7c75b 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -171,4 +171,13 @@ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ struct vhost_vring_state) +/* Suspend a device so it does not process virtqueue requests anymore + * + * After the return of ioctl the device must preserve all the necessary state + * (the virtqueue vring base plus the possible device specific states) that is + * required for restoring in the future. The device must not change its + * configuration after that point. + */ +#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) + #endif -- cgit v1.2.3