From c2a052a4a949df53f50a5024843432d2234cb824 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Fri, 24 Jun 2022 10:55:41 +0800 Subject: remoteproc: rename len of rpoc_vring to num Rename the member len in the structure rpoc_vring to num. And remove 'in bytes' from the comment of it. This is misleading. Because this actually refers to the size of the virtio vring to be created. The unit is not bytes. Signed-off-by: Xuan Zhuo Message-Id: <20220624025621.128843-2-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 7c943f0a2fc4..aea79c77db0f 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -597,7 +597,7 @@ struct rproc_subdev { /** * struct rproc_vring - remoteproc vring state * @va: virtual address - * @len: length, in bytes + * @num: vring size * @da: device address * @align: vring alignment * @notifyid: rproc-specific unique vring index @@ -606,7 +606,7 @@ struct rproc_subdev { */ struct rproc_vring { void *va; - int len; + int num; u32 da; u32 align; int notifyid; -- cgit v1.2.3 From da802961832f9852886304290135457519815497 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:21 +0800 Subject: virtio: record the maximum queue num supported by the device. virtio-net can display the maximum (supported by hardware) ring size in ethtool -g eth0. When the subsequent patch implements vring reset, it can judge whether the ring size passed by the driver is legal based on this. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-2-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- arch/um/drivers/virtio_uml.c | 1 + drivers/platform/mellanox/mlxbf-tmfifo.c | 2 ++ drivers/remoteproc/remoteproc_virtio.c | 2 ++ drivers/s390/virtio/virtio_ccw.c | 3 +++ drivers/virtio/virtio_mmio.c | 2 ++ drivers/virtio/virtio_pci_legacy.c | 2 ++ drivers/virtio/virtio_pci_modern.c | 2 ++ drivers/virtio/virtio_vdpa.c | 2 ++ include/linux/virtio.h | 2 ++ 9 files changed, 18 insertions(+) (limited to 'include') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 82ff3785bf69..e719af8bdf56 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, goto error_create; } vq->priv = info; + vq->num_max = num; num = virtqueue_get_vring_size(vq); if (vu_dev->protocol_features & diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 38800e86ed8a..1ae3c56b66b0 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -959,6 +959,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, goto error; } + vq->num_max = vring->num; + vqs[i] = vq; vring->vq = vq; vq->priv = vring; diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index d43d74733f0a..0f7706e23eb9 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, return ERR_PTR(-ENOMEM); } + vq->num_max = num; + rvring->vq = vq; vq->priv = rvring; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 161d3b141f0d..6b86d0280d6b 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -530,6 +530,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, err = -ENOMEM; goto out_err; } + + vq->num_max = info->num; + /* it may have been reduced */ info->num = virtqueue_get_vring_size(vq); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 945cb8fb60b6..3ff746e3f24a 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -403,6 +403,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in goto error_new_virtqueue; } + vq->num_max = num; + /* Activate the queue */ writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); if (vm_dev->version == 1) { diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index a5e5721145c7..2257f1b3d8ae 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -135,6 +135,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!vq) return ERR_PTR(-ENOMEM); + vq->num_max = num; + q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; if (q_pfn >> 32) { dev_err(&vp_dev->pci_dev->dev, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 623906b4996c..e7e0b8c850f6 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -218,6 +218,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!vq) return ERR_PTR(-ENOMEM); + vq->num_max = num; + /* activate the queue */ vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq)); vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq), diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index c40f7deb6b5a..9670cc79371d 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -183,6 +183,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, goto error_new_virtqueue; } + vq->num_max = max_num; + /* Setup virtqueue callback */ cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; cb.private = info; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d8fdf170637c..129bde7521e3 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -19,6 +19,7 @@ * @priv: a pointer for the virtqueue implementation to use. * @index: the zero-based ordinal number for this queue. * @num_free: number of elements we expect to be able to fit. + * @num_max: the maximum number of elements supported by the device. * * A note on @num_free: with indirect buffers, each buffer needs one * element in the queue, otherwise a buffer will need one element per @@ -31,6 +32,7 @@ struct virtqueue { struct virtio_device *vdev; unsigned int index; unsigned int num_free; + unsigned int num_max; void *priv; }; -- cgit v1.2.3 From 3086e9fc9173166774652a488467e4176ee1c81b Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:22 +0800 Subject: virtio: struct virtio_config_ops add callbacks for queue_reset reset can be divided into the following four steps (example): 1. transport: notify the device to reset the queue 2. vring: recycle the buffer submitted 3. vring: reset/resize the vring (may re-alloc) 4. transport: mmap vring to device, and enable the queue In order to support queue reset, add two callbacks in struct virtio_config_ops to implement steps 1 and 4. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-3-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index b47c2e7ed0ee..36ec7be1f480 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -78,6 +78,18 @@ struct virtio_shm_region { * @set_vq_affinity: set the affinity for a virtqueue (optional). * @get_vq_affinity: get the affinity for a virtqueue (optional). * @get_shm_region: get a shared memory region based on the index. + * @disable_vq_and_reset: reset a queue individually (optional). + * vq: the virtqueue + * Returns 0 on success or error status + * disable_vq_and_reset will guarantee that the callbacks are disabled and + * synchronized. + * Except for the callback, the caller should guarantee that the vring is + * not accessed by any functions of virtqueue. + * @enable_vq_after_reset: enable a reset queue + * vq: the virtqueue + * Returns 0 on success or error status + * If disable_vq_and_reset is set, then enable_vq_after_reset must also be + * set. */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -104,6 +116,8 @@ struct virtio_config_ops { int index); bool (*get_shm_region)(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id); + int (*disable_vq_and_reset)(struct virtqueue *vq); + int (*enable_vq_after_reset)(struct virtqueue *vq); }; /* If driver didn't advertise the feature, it will never appear. */ -- cgit v1.2.3 From 07d9629d49584b6f79faa6158cd7aef7e6919703 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:27 +0800 Subject: virtio_ring: split: stop __vring_new_virtqueue as export symbol There is currently only one place to reference __vring_new_virtqueue() directly from the outside of virtio core. And here vring_new_virtqueue() can be used instead. Subsequent patches will modify __vring_new_virtqueue, so stop it as an export symbol for now. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-8-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 25 ++++++++++++++++--------- include/linux/virtio_ring.h | 10 ---------- tools/virtio/virtio_test.c | 4 ++-- 3 files changed, 18 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a63ef2d99955..8ce6cc73d814 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -204,6 +204,14 @@ struct vring_virtqueue { #endif }; +static struct virtqueue *__vring_new_virtqueue(unsigned int index, + struct vring vring, + struct virtio_device *vdev, + bool weak_barriers, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name); /* * Helpers. @@ -2195,14 +2203,14 @@ irqreturn_t vring_interrupt(int irq, void *_vq) EXPORT_SYMBOL_GPL(vring_interrupt); /* Only available for split ring */ -struct virtqueue *__vring_new_virtqueue(unsigned int index, - struct vring vring, - struct virtio_device *vdev, - bool weak_barriers, - bool context, - bool (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *), - const char *name) +static struct virtqueue *__vring_new_virtqueue(unsigned int index, + struct vring vring, + struct virtio_device *vdev, + bool weak_barriers, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name) { struct vring_virtqueue *vq; @@ -2277,7 +2285,6 @@ err_state: kfree(vq); return NULL; } -EXPORT_SYMBOL_GPL(__vring_new_virtqueue); struct virtqueue *vring_create_virtqueue( unsigned int index, diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index b485b13fa50b..8b8af1a38991 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, void (*callback)(struct virtqueue *vq), const char *name); -/* Creates a virtqueue with a custom layout. */ -struct virtqueue *__vring_new_virtqueue(unsigned int index, - struct vring vring, - struct virtio_device *vdev, - bool weak_barriers, - bool ctx, - bool (*notify)(struct virtqueue *), - void (*callback)(struct virtqueue *), - const char *name); - /* * Creates a virtqueue with a standard layout but a caller-allocated * ring. diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 23f142af544a..86a410ddcedd 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) memset(info->ring, 0, vring_size(num, 4096)); vring_init(&info->vring, num, info->ring, 4096); - info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true, - false, vq_notify, vq_callback, "test"); + info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false, + info->ring, vq_notify, vq_callback, "test"); assert(info->vq); info->vq->priv = info; } -- cgit v1.2.3 From c790e8e1817f1a17c05e64f1c4f16f231b8529d5 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:44 +0800 Subject: virtio_ring: introduce virtqueue_resize() Introduce virtqueue_resize() to implement the resize of vring. Based on these, the driver can dynamically adjust the size of the vring. For example: ethtool -G. virtqueue_resize() implements resize based on the vq reset function. In case of failure to allocate a new vring, it will give up resize and use the original vring. During this process, if the re-enable reset vq fails, the vq can no longer be used. Although the probability of this situation is not high. The parameter recycle is used to recycle the buffer that is no longer used. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-25-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 3 ++ 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index bea5a3448217..6447a09e2e38 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2539,6 +2539,75 @@ struct virtqueue *vring_create_virtqueue( } EXPORT_SYMBOL_GPL(vring_create_virtqueue); +/** + * virtqueue_resize - resize the vring of vq + * @_vq: the struct virtqueue we're talking about. + * @num: new ring num + * @recycle: callback for recycle the useless buffer + * + * When it is really necessary to create a new vring, it will set the current vq + * into the reset state. Then call the passed callback to recycle the buffer + * that is no longer used. Only after the new vring is successfully created, the + * old vring will be released. + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. + * vq can still work normally + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -E2BIG/-EINVAL: num error + * -EPERM: Operation not permitted + * + */ +int virtqueue_resize(struct virtqueue *_vq, u32 num, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (num > vq->vq.num_max) + return -E2BIG; + + if (!num) + return -EINVAL; + + if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) + return 0; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + if (vq->packed_ring) + err = virtqueue_resize_packed(_vq, num); + else + err = virtqueue_resize_split(_vq, num); + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return err; +} +EXPORT_SYMBOL_GPL(virtqueue_resize); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 129bde7521e3..62e31bca5602 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -91,6 +91,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); +int virtqueue_resize(struct virtqueue *vq, u32 num, + void (*recycle)(struct virtqueue *vq, void *buf)); + /** * virtio_device - representation of a device using virtio * @index: unique position on the virtio bus -- cgit v1.2.3 From ea024594b1dc5b6719c1400ae154690f5c203996 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:45 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_notify_data Add queue_notify_data in struct virtio_pci_common_cfg, which comes from here https://github.com/oasis-tcs/virtio-spec/issues/89 In order not to affect the API, add a dedicated structure struct virtio_pci_modern_common_cfg to virtio_pci_modern.h. Since I want to add queue_reset after queue_notify_data, I submitted this patch first. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-26-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 7 +++++++ include/uapi/linux/virtio_pci.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index eb2bd9b4077d..41f5a018bd94 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,6 +5,13 @@ #include #include +struct virtio_pci_modern_common_cfg { + struct virtio_pci_common_cfg cfg; + + __le16 queue_notify_data; /* read-write */ + __le16 padding; +}; + struct virtio_pci_modern_device { struct pci_dev *pci_dev; diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 3a86f36d7e3d..f5981a874481 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -202,6 +202,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_AVAILHI 44 #define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDHI 52 +#define VIRTIO_PCI_COMMON_Q_NDATA 56 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From 3251063155032729b8793ac3957136ae25c0bafa Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:46 +0800 Subject: virtio: allow to unbreak/break virtqueue individually This patch allows the new introduced __virtqueue_break()/__virtqueue_unbreak() to break/unbreak the virtqueue. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-27-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 24 ++++++++++++++++++++++++ include/linux/virtio.h | 3 +++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6447a09e2e38..accb3ae6cc95 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2724,6 +2724,30 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); +/* + * This function should only be called by the core, not directly by the driver. + */ +void __virtqueue_break(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); +} +EXPORT_SYMBOL_GPL(__virtqueue_break); + +/* + * This function should only be called by the core, not directly by the driver. + */ +void __virtqueue_unbreak(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, false); +} +EXPORT_SYMBOL_GPL(__virtqueue_unbreak); + bool virtqueue_is_broken(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 62e31bca5602..d45ee82a4470 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -138,6 +138,9 @@ bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); void __virtio_unbreak_device(struct virtio_device *dev); +void __virtqueue_break(struct virtqueue *_vq); +void __virtqueue_unbreak(struct virtqueue *_vq); + void virtio_config_changed(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); -- cgit v1.2.3 From d94587b5bb5c4bba32fbc2bd92c86cc6de25167f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:47 +0800 Subject: virtio: queue_reset: add VIRTIO_F_RING_RESET Added VIRTIO_F_RING_RESET, it came from here https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 This feature indicates that the driver can reset a queue individually. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-28-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_config.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index f0fb0ae021c0..3c05162bc988 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 38 +#define VIRTIO_TRANSPORT_F_END 41 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -98,4 +98,9 @@ * Does the device support Single Root I/O Virtualization? */ #define VIRTIO_F_SR_IOV 37 + +/* + * This feature indicates that the driver can reset a queue individually. + */ +#define VIRTIO_F_RING_RESET 40 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From 4913e85441b40386c4bb093f188b955d8165f1b7 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:48 +0800 Subject: virtio_ring: struct virtqueue introduce reset Introduce a new member reset to the structure virtqueue to determine whether the current vq is in the reset state. Subsequent patches will use it. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-29-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 ++ include/linux/virtio.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index accb3ae6cc95..d66c8e6d0ef3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1996,6 +1996,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->vq.vdev = vdev; vq->vq.name = name; vq->vq.index = index; + vq->vq.reset = false; vq->we_own_ring = true; vq->notify = notify; vq->weak_barriers = weak_barriers; @@ -2481,6 +2482,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->vq.vdev = vdev; vq->vq.name = name; vq->vq.index = index; + vq->vq.reset = false; vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d45ee82a4470..a3f73bb6733e 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -20,6 +20,7 @@ * @index: the zero-based ordinal number for this queue. * @num_free: number of elements we expect to be able to fit. * @num_max: the maximum number of elements supported by the device. + * @reset: vq is in reset state or not. * * A note on @num_free: with indirect buffers, each buffer needs one * element in the queue, otherwise a buffer will need one element per @@ -34,6 +35,7 @@ struct virtqueue { unsigned int num_free; unsigned int num_max; void *priv; + bool reset; }; int virtqueue_add_outbuf(struct virtqueue *vq, -- cgit v1.2.3 From 0cdd450e70510c9e13af8099e9f6c1467e6a0b91 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:49 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_reset Add queue_reset in virtio_pci_modern_common_cfg. https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-30-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_pci_modern.h | 2 +- include/uapi/linux/virtio_pci.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 41f5a018bd94..05123b9a606f 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -9,7 +9,7 @@ struct virtio_pci_modern_common_cfg { struct virtio_pci_common_cfg cfg; __le16 queue_notify_data; /* read-write */ - __le16 padding; + __le16 queue_reset; /* read-write */ }; struct virtio_pci_modern_device { diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index f5981a874481..f703afc7ad31 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -203,6 +203,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 +#define VIRTIO_PCI_COMMON_Q_RESET 58 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From 0b50cece0b7857732d2055f2c77f8730c10f9196 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:50 +0800 Subject: virtio_pci: introduce helper to get/set queue reset Introduce new helpers to implement queue reset and get queue reset status. https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-31-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_modern_dev.c | 39 ++++++++++++++++++++++++++++++++++ include/linux/virtio_pci_modern.h | 2 ++ 2 files changed, 41 insertions(+) (limited to 'include') diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index fa2a9445bb18..869cb46bef96 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * vp_modern_map_capability - map a part of virtio pci capability @@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev, } EXPORT_SYMBOL_GPL(vp_modern_set_status); +/* + * vp_modern_get_queue_reset - get the queue reset status + * @mdev: the modern virtio-pci device + * @index: queue index + */ +int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index) +{ + struct virtio_pci_modern_common_cfg __iomem *cfg; + + cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common; + + vp_iowrite16(index, &cfg->cfg.queue_select); + return vp_ioread16(&cfg->queue_reset); +} +EXPORT_SYMBOL_GPL(vp_modern_get_queue_reset); + +/* + * vp_modern_set_queue_reset - reset the queue + * @mdev: the modern virtio-pci device + * @index: queue index + */ +void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index) +{ + struct virtio_pci_modern_common_cfg __iomem *cfg; + + cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common; + + vp_iowrite16(index, &cfg->cfg.queue_select); + vp_iowrite16(1, &cfg->queue_reset); + + while (vp_ioread16(&cfg->queue_reset)) + msleep(1); + + while (vp_ioread16(&cfg->cfg.queue_enable)) + msleep(1); +} +EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset); + /* * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue * @mdev: the modern virtio-pci device diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index 05123b9a606f..c4eeb79b0139 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -113,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, u16 index, resource_size_t *pa); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); +int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); +void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index); #endif -- cgit v1.2.3 From a10fba0377145fccefea4dc4dd5915b7ed87e546 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:53 +0800 Subject: virtio: find_vqs() add arg sizes find_vqs() adds a new parameter sizes to specify the size of each vq vring. NULL as sizes means that all queues in find_vqs() use the maximum size. A value in the array is 0, which means that the corresponding queue uses the maximum size. In the split scenario, the meaning of size is the largest size, because it may be limited by memory, the virtio core will try a smaller size. And the size is power of 2. Signed-off-by: Xuan Zhuo Acked-by: Hans de Goede Reviewed-by: Mathieu Poirier Acked-by: Jason Wang Message-Id: <20220801063902.129329-34-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- arch/um/drivers/virtio_uml.c | 2 +- drivers/platform/mellanox/mlxbf-tmfifo.c | 1 + drivers/remoteproc/remoteproc_virtio.c | 1 + drivers/s390/virtio/virtio_ccw.c | 1 + drivers/virtio/virtio_mmio.c | 1 + drivers/virtio/virtio_pci_common.c | 2 +- drivers/virtio/virtio_pci_common.h | 2 +- drivers/virtio/virtio_pci_modern.c | 7 +++++-- drivers/virtio/virtio_vdpa.c | 1 + include/linux/virtio_config.h | 14 +++++++++----- 10 files changed, 22 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index e719af8bdf56..79e38afd4b91 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1011,7 +1011,7 @@ error_kzalloc: static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], u32 sizes[], const bool *ctx, struct irq_affinity *desc) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 1ae3c56b66b0..8be13d416f48 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 0f7706e23eb9..81c4f5776109 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -158,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + u32 sizes[], const bool * ctx, struct irq_affinity *desc) { diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 6b86d0280d6b..72500cd2dbf5 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -635,6 +635,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 3ff746e3f24a..dfcecfd7aba1 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -474,6 +474,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index ad258a9d3b9f..7ad734584823 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -396,7 +396,7 @@ out_del_vqs: /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], u32 sizes[], const bool *ctx, struct irq_affinity *desc) { int err; diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 23112d84218f..a5ff838b85a5 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -110,7 +110,7 @@ void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], u32 sizes[], const bool *ctx, struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index c3b9f2761849..be51ec849252 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -347,12 +347,15 @@ err: static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], + u32 sizes[], + const bool *ctx, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx, + desc); if (rc) return rc; diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 9670cc79371d..832d2c5b1b19 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -269,6 +269,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 36ec7be1f480..888f7e96f0c7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,6 +55,7 @@ struct virtio_shm_region { * include a NULL entry for vqs that do not need a callback * names: array of virtqueue names (mainly for debugging) * include a NULL entry for vqs unused by driver + * sizes: array of virtqueue sizes * Returns 0 on success or error status * @del_vqs: free virtqueues found by find_vqs(). * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) @@ -103,7 +104,9 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], + u32 sizes[], + const bool *ctx, struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); @@ -212,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, const char *names[] = { n }; struct virtqueue *vq; int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, - NULL); + NULL, NULL); if (err < 0) return ERR_PTR(err); return vq; @@ -224,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, + NULL, desc); } static inline @@ -233,8 +237,8 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, - desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, + ctx, desc); } /** -- cgit v1.2.3 From fe3dc04e31aa51f91dc7f741a5f76cc4817eb5b4 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:56 +0800 Subject: virtio: add helper virtio_find_vqs_ctx_size() Introduce helper virtio_find_vqs_ctx_size() to call find_vqs and specify the maximum size of each vq ring. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-37-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 888f7e96f0c7..6adff09f7170 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -241,6 +241,18 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, ctx, desc); } +static inline +int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[], + u32 sizes[], + const bool *ctx, struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, + ctx, desc); +} + /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks * @vdev: the device -- cgit v1.2.3 From 699b045a8e43bd1063db4795be685bfd659649dc Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Mon, 18 Jul 2022 12:11:02 +0300 Subject: net: virtio_net: notifications coalescing support New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. Control a Virtio network device notifications coalescing parameters using the control virtqueue. A device that supports this fetature can receive VIRTIO_NET_CTRL_NOTF_COAL control commands. - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: Ask the network device to change the following parameters: - tx_usecs: Maximum number of usecs to delay a TX notification. - tx_max_packets: Maximum number of packets to send before a TX notification. - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: Ask the network device to change the following parameters: - rx_usecs: Maximum number of usecs to delay a RX notification. - rx_max_packets: Maximum number of packets to receive before a RX notification. VirtIO spec. patch: https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html Signed-off-by: Alvaro Karsz Message-Id: <20220718091102.498774-1-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Jakub Kicinski Acked-by: Jason Wang --- drivers/net/virtio_net.c | 111 ++++++++++++++++++++++++++++++++++------ include/uapi/linux/virtio_net.h | 34 +++++++++++- 2 files changed, 129 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 16783ed782c5..d9c434b00e9b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -270,6 +270,12 @@ struct virtnet_info { u8 duplex; u32 speed; + /* Interrupt coalescing settings */ + u32 tx_usecs; + u32 rx_usecs; + u32 tx_max_packets; + u32 rx_max_packets; + unsigned long guest_offloads; unsigned long guest_offloads_capable; @@ -2737,27 +2743,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, return 0; } +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, + struct ethtool_coalesce *ec) +{ + struct scatterlist sgs_tx, sgs_rx; + struct virtio_net_ctrl_coal_tx coal_tx; + struct virtio_net_ctrl_coal_rx coal_rx; + + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, + &sgs_tx)) + return -EINVAL; + + /* Save parameters */ + vi->tx_usecs = ec->tx_coalesce_usecs; + vi->tx_max_packets = ec->tx_max_coalesced_frames; + + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, + &sgs_rx)) + return -EINVAL; + + /* Save parameters */ + vi->rx_usecs = ec->rx_coalesce_usecs; + vi->rx_max_packets = ec->rx_max_coalesced_frames; + + return 0; +} + +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) +{ + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL + * feature is negotiated. + */ + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) + return -EOPNOTSUPP; + + if (ec->tx_max_coalesced_frames > 1 || + ec->rx_max_coalesced_frames != 1) + return -EINVAL; + + return 0; +} + static int virtnet_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); - int i, napi_weight; - - if (ec->tx_max_coalesced_frames > 1 || - ec->rx_max_coalesced_frames != 1) - return -EINVAL; + int ret, i, napi_weight; + bool update_napi = false; + /* Can't change NAPI weight if the link is up */ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; if (napi_weight ^ vi->sq[0].napi.weight) { if (dev->flags & IFF_UP) return -EBUSY; + else + update_napi = true; + } + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) + ret = virtnet_send_notf_coal_cmds(vi, ec); + else + ret = virtnet_coal_params_supported(ec); + + if (ret) + return ret; + + if (update_napi) { for (i = 0; i < vi->max_queue_pairs; i++) vi->sq[i].napi.weight = napi_weight; } - return 0; + return ret; } static int virtnet_get_coalesce(struct net_device *dev, @@ -2765,16 +2833,19 @@ static int virtnet_get_coalesce(struct net_device *dev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct ethtool_coalesce ec_default = { - .cmd = ETHTOOL_GCOALESCE, - .rx_max_coalesced_frames = 1, - }; struct virtnet_info *vi = netdev_priv(dev); - memcpy(ec, &ec_default, sizeof(ec_default)); + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + ec->rx_coalesce_usecs = vi->rx_usecs; + ec->tx_coalesce_usecs = vi->tx_usecs; + ec->tx_max_coalesced_frames = vi->tx_max_packets; + ec->rx_max_coalesced_frames = vi->rx_max_packets; + } else { + ec->rx_max_coalesced_frames = 1; - if (vi->sq[0].napi.weight) - ec->tx_max_coalesced_frames = 1; + if (vi->sq[0].napi.weight) + ec->tx_max_coalesced_frames = 1; + } return 0; } @@ -2893,7 +2964,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) } static const struct ethtool_ops virtnet_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USECS, .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = virtnet_get_ringparam, @@ -3606,6 +3678,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } @@ -3742,6 +3816,13 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + vi->rx_usecs = 0; + vi->tx_usecs = 0; + vi->tx_max_packets = 0; + vi->rx_max_packets = 0; + } + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; @@ -3977,7 +4058,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 3f55a4215f11..29ced55514d4 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ - +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -355,4 +355,36 @@ struct virtio_net_hash_config { #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 +/* + * Control notifications coalescing. + * + * Request the device to change the notifications coalescing parameters. + * + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. + */ +#define VIRTIO_NET_CTRL_NOTF_COAL 6 +/* + * Set the tx-usecs/tx-max-packets patameters. + * tx-usecs - Maximum number of usecs to delay a TX notification. + * tx-max-packets - Maximum number of packets to send before a TX notification. + */ +struct virtio_net_ctrl_coal_tx { + __le32 tx_max_packets; + __le32 tx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 + +/* + * Set the rx-usecs/rx-max-packets patameters. + * rx-usecs - Maximum number of usecs to delay a RX notification. + * rx-max-frames - Maximum number of packets to receive before a RX notification. + */ +struct virtio_net_ctrl_coal_rx { + __le32 rx_max_packets; + __le32 rx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 + #endif /* _UAPI_LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 79a463be9e0051997508d52cf411ed5e91d657f6 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 3 Aug 2022 12:55:22 +0800 Subject: vduse: Support registering userspace memory for IOVA regions Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM to support registering and de-registering userspace memory for IOVA regions. Now it only supports registering userspace memory for bounce buffer region in virtio-vdpa case. Signed-off-by: Xie Yongji Acked-by: Jason Wang Message-Id: <20220803045523.23851-5-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 141 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/vduse.h | 23 ++++++ 2 files changed, 164 insertions(+) (limited to 'include') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 3bc27de58f46..eedff0a3885a 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -64,6 +66,13 @@ struct vduse_vdpa { struct vduse_dev *dev; }; +struct vduse_umem { + unsigned long iova; + unsigned long npages; + struct page **pages; + struct mm_struct *mm; +}; + struct vduse_dev { struct vduse_vdpa *vdev; struct device *dev; @@ -95,6 +104,8 @@ struct vduse_dev { u8 status; u32 vq_num; u32 vq_align; + struct vduse_umem *umem; + struct mutex mem_lock; }; struct vduse_dev_msg { @@ -917,6 +928,102 @@ unlock: return ret; } +static int vduse_dev_dereg_umem(struct vduse_dev *dev, + u64 iova, u64 size) +{ + int ret; + + mutex_lock(&dev->mem_lock); + ret = -ENOENT; + if (!dev->umem) + goto unlock; + + ret = -EINVAL; + if (dev->umem->iova != iova || size != dev->domain->bounce_size) + goto unlock; + + vduse_domain_remove_user_bounce_pages(dev->domain); + unpin_user_pages_dirty_lock(dev->umem->pages, + dev->umem->npages, true); + atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); + mmdrop(dev->umem->mm); + vfree(dev->umem->pages); + kfree(dev->umem); + dev->umem = NULL; + ret = 0; +unlock: + mutex_unlock(&dev->mem_lock); + return ret; +} + +static int vduse_dev_reg_umem(struct vduse_dev *dev, + u64 iova, u64 uaddr, u64 size) +{ + struct page **page_list = NULL; + struct vduse_umem *umem = NULL; + long pinned = 0; + unsigned long npages, lock_limit; + int ret; + + if (!dev->domain->bounce_map || + size != dev->domain->bounce_size || + iova != 0 || uaddr & ~PAGE_MASK) + return -EINVAL; + + mutex_lock(&dev->mem_lock); + ret = -EEXIST; + if (dev->umem) + goto unlock; + + ret = -ENOMEM; + npages = size >> PAGE_SHIFT; + page_list = __vmalloc(array_size(npages, sizeof(struct page *)), + GFP_KERNEL_ACCOUNT); + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!page_list || !umem) + goto unlock; + + mmap_read_lock(current->mm); + + lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); + if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit) + goto out; + + pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE, + page_list, NULL); + if (pinned != npages) { + ret = pinned < 0 ? pinned : -ENOMEM; + goto out; + } + + ret = vduse_domain_add_user_bounce_pages(dev->domain, + page_list, pinned); + if (ret) + goto out; + + atomic64_add(npages, ¤t->mm->pinned_vm); + + umem->pages = page_list; + umem->npages = pinned; + umem->iova = iova; + umem->mm = current->mm; + mmgrab(current->mm); + + dev->umem = umem; +out: + if (ret && pinned > 0) + unpin_user_pages(page_list, pinned); + + mmap_read_unlock(current->mm); +unlock: + if (ret) { + vfree(page_list); + kfree(umem); + } + mutex_unlock(&dev->mem_lock); + return ret; +} + static long vduse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1089,6 +1196,38 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); break; } + case VDUSE_IOTLB_REG_UMEM: { + struct vduse_iova_umem umem; + + ret = -EFAULT; + if (copy_from_user(&umem, argp, sizeof(umem))) + break; + + ret = -EINVAL; + if (!is_mem_zero((const char *)umem.reserved, + sizeof(umem.reserved))) + break; + + ret = vduse_dev_reg_umem(dev, umem.iova, + umem.uaddr, umem.size); + break; + } + case VDUSE_IOTLB_DEREG_UMEM: { + struct vduse_iova_umem umem; + + ret = -EFAULT; + if (copy_from_user(&umem, argp, sizeof(umem))) + break; + + ret = -EINVAL; + if (!is_mem_zero((const char *)umem.reserved, + sizeof(umem.reserved))) + break; + + ret = vduse_dev_dereg_umem(dev, umem.iova, + umem.size); + break; + } default: ret = -ENOIOCTLCMD; break; @@ -1101,6 +1240,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file) { struct vduse_dev *dev = file->private_data; + vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); spin_lock(&dev->msg_lock); /* Make sure the inflight messages can processed after reconncection */ list_splice_init(&dev->recv_list, &dev->send_list); @@ -1163,6 +1303,7 @@ static struct vduse_dev *vduse_dev_create(void) return NULL; mutex_init(&dev->lock); + mutex_init(&dev->mem_lock); spin_lock_init(&dev->msg_lock); INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->recv_list); diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 7cfe1c1280c0..9885e0571f09 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -210,6 +210,29 @@ struct vduse_vq_eventfd { */ #define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) +/** + * struct vduse_iova_umem - userspace memory configuration for one IOVA region + * @uaddr: start address of userspace memory, it must be aligned to page size + * @iova: start of the IOVA region + * @size: size of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM + * ioctls to register/de-register userspace memory for IOVA regions + */ +struct vduse_iova_umem { + __u64 uaddr; + __u64 iova; + __u64 size; + __u64 reserved[3]; +}; + +/* Register userspace memory for IOVA regions */ +#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem) + +/* De-register the userspace memory. Caller should set iova and size field. */ +#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** -- cgit v1.2.3 From ad146355bfad627bd0717ece73997c6c93b1b82e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 3 Aug 2022 12:55:23 +0800 Subject: vduse: Support querying information of IOVA regions This introduces a new ioctl: VDUSE_IOTLB_GET_INFO to support querying some information of IOVA regions. Now it can be used to query whether the IOVA region supports userspace memory registration. Signed-off-by: Xie Yongji Message-Id: <20220803045523.23851-6-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_user/vduse_dev.c | 39 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vduse.h | 24 +++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index eedff0a3885a..41c0b29739f1 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1228,6 +1228,45 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, umem.size); break; } + case VDUSE_IOTLB_GET_INFO: { + struct vduse_iova_info info; + struct vhost_iotlb_map *map; + struct vduse_iova_domain *domain = dev->domain; + + ret = -EFAULT; + if (copy_from_user(&info, argp, sizeof(info))) + break; + + ret = -EINVAL; + if (info.start > info.last) + break; + + if (!is_mem_zero((const char *)info.reserved, + sizeof(info.reserved))) + break; + + spin_lock(&domain->iotlb_lock); + map = vhost_iotlb_itree_first(domain->iotlb, + info.start, info.last); + if (map) { + info.start = map->start; + info.last = map->last; + info.capability = 0; + if (domain->bounce_map && map->start == 0 && + map->last == domain->bounce_size - 1) + info.capability |= VDUSE_IOVA_CAP_UMEM; + } + spin_unlock(&domain->iotlb_lock); + if (!map) + break; + + ret = -EFAULT; + if (copy_to_user(argp, &info, sizeof(info))) + break; + + ret = 0; + break; + } default: ret = -ENOIOCTLCMD; break; diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 9885e0571f09..11bd48c72c6c 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -233,6 +233,30 @@ struct vduse_iova_umem { /* De-register the userspace memory. Caller should set iova and size field. */ #define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) +/** + * struct vduse_iova_info - information of one IOVA region + * @start: start of the IOVA region + * @last: last of the IOVA region + * @capability: capability of the IOVA regsion + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of + * one IOVA region. + */ +struct vduse_iova_info { + __u64 start; + __u64 last; +#define VDUSE_IOVA_CAP_UMEM (1 << 0) + __u64 capability; + __u64 reserved[3]; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return some information on it. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** -- cgit v1.2.3 From cae15c2ed8e6e058bd5e32de292ab7982640161e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 14 Jul 2022 14:39:26 +0300 Subject: vdpa/mlx5: Implement susupend virtqueue callback Implement the suspend callback allowing to suspend the virtqueues so they stop processing descriptors. This is required to allow to query a consistent state of the virtqueue while live migration is taking place. Signed-off-by: Eli Cohen Message-Id: <20220714113927.85729-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 83 ++++++++++++++++++++++++++++++++++++-- include/linux/mlx5/mlx5_ifc_vdpa.h | 8 ++++ 2 files changed, 88 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 99bbbf38c8b1..a476e06476f6 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -164,6 +164,7 @@ struct mlx5_vdpa_net { bool setup; u32 cur_num_vqs; u32 rqt_size; + bool nb_registered; struct notifier_block nb; struct vdpa_callback config_cb; struct mlx5_vdpa_wq_ent cvq_ent; @@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque if (err) goto err_cmd; + mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; kfree(in); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); @@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); return; } + mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; umems_destroy(ndev, mvq); } @@ -1121,6 +1124,20 @@ err_cmd: return err; } +static bool is_valid_state_change(int oldstate, int newstate) +{ + switch (oldstate) { + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: + return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: + return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: + case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: + default: + return false; + } +} + static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) { int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); @@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque void *in; int err; + if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) + return 0; + + if (!is_valid_state_change(mvq->fw_state, state)) + return -EINVAL; + in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; + int err; if (!mvdev->actual_features) return; @@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready } mvq = &ndev->vqs[idx]; - if (!ready) + if (!ready) { suspend_vq(ndev, mvq); + } else { + err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + if (err) { + mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); + ready = false; + } + } + mvq->ready = ready; } @@ -2733,6 +2765,37 @@ out_err: return err; } +static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_control_vq *cvq; + + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) + return; + + cvq = &mvdev->cvq; + cvq->ready = false; +} + +static int mlx5_vdpa_suspend(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + int i; + + down_write(&ndev->reslock); + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); + ndev->nb_registered = false; + flush_workqueue(ndev->mvdev.wq); + for (i = 0; i < ndev->cur_num_vqs; i++) { + mvq = &ndev->vqs[i]; + suspend_vq(ndev, mvq); + } + mlx5_vdpa_cvq_suspend(mvdev); + up_write(&ndev->reslock); + return 0; +} + static const struct vdpa_config_ops mlx5_vdpa_ops = { .set_vq_address = mlx5_vdpa_set_vq_address, .set_vq_num = mlx5_vdpa_set_vq_num, @@ -2763,6 +2826,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_generation = mlx5_vdpa_get_generation, .set_map = mlx5_vdpa_set_map, .free = mlx5_vdpa_free, + .suspend = mlx5_vdpa_suspend, }; static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) @@ -2828,6 +2892,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) mvq->index = i; mvq->ndev = ndev; mvq->fwqp.fw = true; + mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; } for (; i < ndev->mvdev.max_vqs; i++) { mvq = &ndev->vqs[i]; @@ -2902,13 +2967,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p switch (eqe->sub_type) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + down_read(&ndev->reslock); + if (!ndev->nb_registered) { + up_read(&ndev->reslock); + return NOTIFY_DONE; + } wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); - if (!wqent) + if (!wqent) { + up_read(&ndev->reslock); return NOTIFY_DONE; + } wqent->mvdev = &ndev->mvdev; INIT_WORK(&wqent->work, update_carrier); queue_work(ndev->mvdev.wq, &wqent->work); + up_read(&ndev->reslock); ret = NOTIFY_OK; break; default: @@ -3062,6 +3135,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->nb.notifier_call = event_handler; mlx5_notifier_register(mdev, &ndev->nb); + ndev->nb_registered = true; mvdev->vdev.mdev = &mgtdev->mgtdev; err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); if (err) @@ -3093,7 +3167,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct workqueue_struct *wq; - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); + if (ndev->nb_registered) { + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); + ndev->nb_registered = false; + } wq = mvdev->wq; mvdev->wq = NULL; destroy_workqueue(wq); diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 4414ed5b6ed2..9becdc3fa503 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -150,6 +150,14 @@ enum { MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, }; +/* This indicates that the object was not created or has already + * been desroyed. It is very safe to assume that this object will never + * have so many states + */ +enum { + MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff +}; + enum { MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, -- cgit v1.2.3 From 848ecea184e1253758423b37cbfc1ed732ccf5b4 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:09 +0200 Subject: vdpa: Add suspend operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This operation is optional: It it's not implemented, backend feature bit will not be exposed. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-2-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 7b4a13d3bd91..d282f464d2f1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -218,6 +218,9 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) + * @suspend: Suspend or resume the device (optional) + * @vdev: vdpa device + * Returns integer: success (0) or error (< 0) * @get_config_size: Get the size of the configuration space includes * fields that are conditional on feature bits. * @vdev: vdpa device @@ -319,6 +322,7 @@ struct vdpa_config_ops { u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); + int (*suspend)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); -- cgit v1.2.3 From 0723f1df5c3ec8a1112d150dab98e149361ef488 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:10 +0200 Subject: vhost-vdpa: introduce SUSPEND backend feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userland knows if it can suspend the device or not by checking this feature bit. It's only offered if the vdpa driver backend implements the suspend() operation callback, and to offer it or userland to ack it if the backend does not offer that callback is an error. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-3-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 16 +++++++++++++++- include/uapi/linux/vhost_types.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2c997d77d266..092752fea8e1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return 0; } +static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->suspend; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -577,7 +585,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (cmd == VHOST_SET_BACKEND_FEATURES) { if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) + if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_SUSPEND))) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && + !vhost_vdpa_can_suspend(v)) return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; @@ -628,6 +640,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; + if (vhost_vdpa_can_suspend(v)) + features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 634cee485abb..1bdd6e363f4c 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range { * message */ #define VHOST_BACKEND_F_IOTLB_ASID 0x3 +/* Device can be suspended */ +#define VHOST_BACKEND_F_SUSPEND 0x4 #endif -- cgit v1.2.3 From f345a0143b4dd1cfc850009c6979a3801b86a06f Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Wed, 10 Aug 2022 19:15:11 +0200 Subject: vhost-vdpa: uAPI to suspend the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ioctl adds support for suspending the device from userspace. This is a must before getting virtqueue indexes (base) for live migration, since the device could modify them after userland gets them. There are individual ways to perform that action for some devices (VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no way to perform it for any vhost device (and, in particular, vhost-vdpa). After a successful return of the ioctl call the device must not process more virtqueue descriptors. The device can answer to read or writes of config fields as if it were not suspended. In particular, writing to "queue_enable" with a value of 1 will not make the device start processing buffers of the virtqueue. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-4-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 19 +++++++++++++++++++ include/uapi/linux/vhost.h | 9 +++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 092752fea8e1..166044642fd5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -478,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) return 0; } +/* After a successful return of ioctl the device must not process more + * virtqueue descriptors. The device can answer to read or writes of config + * fields as if it were not suspended. In particular, writing to "queue_enable" + * with a value of 1 will not make the device start processing buffers. + */ +static long vhost_vdpa_suspend(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->suspend) + return -EOPNOTSUPP; + + return ops->suspend(vdpa); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -654,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VQS_COUNT: r = vhost_vdpa_get_vqs_count(v, argp); break; + case VHOST_VDPA_SUSPEND: + r = vhost_vdpa_suspend(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index cab645d4a645..f9f115a7c75b 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -171,4 +171,13 @@ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ struct vhost_vring_state) +/* Suspend a device so it does not process virtqueue requests anymore + * + * After the return of ioctl the device must preserve all the necessary state + * (the virtqueue vring base plus the possible device specific states) that is + * required for restoring in the future. The device must not change its + * configuration after that point. + */ +#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) + #endif -- cgit v1.2.3