From d16c0cd27331179daa86a3a489f50ce409121c80 Mon Sep 17 00:00:00 2001 From: Ricardo Cañuelo Date: Mon, 10 Oct 2022 08:43:59 +0200 Subject: docs: driver-api: virtio: virtio on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Basic doc about Virtio on Linux and a short tutorial on Virtio drivers. includes the following fixup: virtio: fix virtio_config_ops kerneldocs Fixes two warning messages when building htmldocs: warning: duplicate section name 'Note' warning: expecting prototype for virtio_config_ops(). Prototype was for vq_callback_t() instead Message-Id: <20221010064359.1324353-2-ricardo.canuelo@collabora.com> Signed-off-by: Ricardo Cañuelo Reviewed-by: Cornelia Huck Message-Id: <20221220100035.2712449-1-ricardo.canuelo@collabora.com> Reported-by: Stephen Rothwell Reviewed-by: Bagas Sanjaya Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4b517649cfe8..2b3438de2c4d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -16,8 +16,10 @@ struct virtio_shm_region { u64 len; }; +typedef void vq_callback_t(struct virtqueue *); + /** - * virtio_config_ops - operations for configuring a virtio device + * struct virtio_config_ops - operations for configuring a virtio device * Note: Do not assume that a transport implements all of the operations * getting/setting a value as a simple read/write! Generally speaking, * any of @get/@set, @get_status/@set_status, or @get_features/ @@ -69,7 +71,8 @@ struct virtio_shm_region { * vdev: the virtio_device * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. - * Note: despite the name this can be called any number of times. + * Note that despite the name this can be called any number of + * times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device @@ -91,7 +94,6 @@ struct virtio_shm_region { * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. */ -typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); -- cgit v1.2.3 From 95bfec41bd3d39b7659cba65b72080420bf5691e Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 15 Oct 2022 23:41:27 -0400 Subject: virtio-blk: add support for zoned block devices This patch adds support for Zoned Block Devices (ZBDs) to the kernel virtio-blk driver. The patch accompanies the virtio-blk ZBD support draft that is now being proposed for standardization. The latest version of the draft is linked at https://github.com/oasis-tcs/virtio-spec/issues/143 . The QEMU zoned device code that implements these protocol extensions has been developed by Sam Li and it is currently in review at the QEMU mailing list. A number of virtblk request structure changes has been introduced to accommodate the functionality that is specific to zoned block devices and, most importantly, make room for carrying the Zoned Append sector value from the device back to the driver along with the request status. The zone-specific code in the patch is heavily influenced by NVMe ZNS code in drivers/nvme/host/zns.c, but it is simpler because the proposed virtio ZBD draft only covers the zoned device features that are relevant to the zoned functionality provided by Linux block layer. includes the following fixup: virtio-blk: fix probe without CONFIG_BLK_DEV_ZONED When building without CONFIG_BLK_DEV_ZONED, VIRTIO_BLK_F_ZONED is excluded from array of driver features. As a result virtio_has_feature panics in virtio_check_driver_offered_feature since that by design verifies that a feature we are checking for is listed in the feature array. To fix, replace the call to virtio_has_feature with a stub. Message-Id: <20221016034127.330942-3-dmitry.fomichev@wdc.com> Co-developed-by: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi Signed-off-by: Dmitry Fomichev Message-Id: <20221220112340.518841-1-mst@redhat.com> Reported-by: Linux Kernel Functional Testing Tested-by: Linux Kernel Functional Testing Reported-by: Xuan Zhuo Debugged-by: Xuan Zhuo Tested-by: Anders Roxell Tested-by: Marek Szyprowski Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/block/virtio_blk.c | 387 ++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/virtio_blk.h | 105 +++++++++++ 2 files changed, 474 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6a77fa917428..71e1e4bd8439 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define PART_BITS 4 @@ -80,22 +81,51 @@ struct virtio_blk { int num_vqs; int io_queues[HCTX_MAX_TYPES]; struct virtio_blk_vq *vqs; + + /* For zoned device */ + unsigned int zone_sectors; }; struct virtblk_req { + /* Out header */ struct virtio_blk_outhdr out_hdr; - u8 status; + + /* In header */ + union { + u8 status; + + /* + * The zone append command has an extended in header. + * The status field in zone_append_in_hdr must have + * the same offset in virtblk_req as the non-zoned + * status field above. + */ + struct { + u8 status; + u8 reserved[7]; + u64 append_sector; + } zone_append_in_hdr; + }; + + size_t in_hdr_len; + struct sg_table sg_table; struct scatterlist sg[]; }; -static inline blk_status_t virtblk_result(struct virtblk_req *vbr) +static inline blk_status_t virtblk_result(u8 status) { - switch (vbr->status) { + switch (status) { case VIRTIO_BLK_S_OK: return BLK_STS_OK; case VIRTIO_BLK_S_UNSUPP: return BLK_STS_NOTSUPP; + case VIRTIO_BLK_S_ZONE_OPEN_RESOURCE: + return BLK_STS_ZONE_OPEN_RESOURCE; + case VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE: + return BLK_STS_ZONE_ACTIVE_RESOURCE; + case VIRTIO_BLK_S_IOERR: + case VIRTIO_BLK_S_ZONE_UNALIGNED_WP: default: return BLK_STS_IOERR; } @@ -111,11 +141,11 @@ static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) { - struct scatterlist hdr, status, *sgs[3]; + struct scatterlist out_hdr, in_hdr, *sgs[3]; unsigned int num_out = 0, num_in = 0; - sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); - sgs[num_out++] = &hdr; + sg_init_one(&out_hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sgs[num_out++] = &out_hdr; if (vbr->sg_table.nents) { if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) @@ -124,8 +154,8 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) sgs[num_out + num_in++] = vbr->sg_table.sgl; } - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); - sgs[num_out + num_in++] = &status; + sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len); + sgs[num_out + num_in++] = &in_hdr; return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } @@ -214,21 +244,22 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, struct virtblk_req *vbr) { + size_t in_hdr_len = sizeof(vbr->status); bool unmap = false; u32 type; + u64 sector = 0; - vbr->out_hdr.sector = 0; + /* Set fields for all request types */ + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); switch (req_op(req)) { case REQ_OP_READ: type = VIRTIO_BLK_T_IN; - vbr->out_hdr.sector = cpu_to_virtio64(vdev, - blk_rq_pos(req)); + sector = blk_rq_pos(req); break; case REQ_OP_WRITE: type = VIRTIO_BLK_T_OUT; - vbr->out_hdr.sector = cpu_to_virtio64(vdev, - blk_rq_pos(req)); + sector = blk_rq_pos(req); break; case REQ_OP_FLUSH: type = VIRTIO_BLK_T_FLUSH; @@ -243,16 +274,42 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, case REQ_OP_SECURE_ERASE: type = VIRTIO_BLK_T_SECURE_ERASE; break; - case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; + case REQ_OP_ZONE_OPEN: + type = VIRTIO_BLK_T_ZONE_OPEN; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_CLOSE: + type = VIRTIO_BLK_T_ZONE_CLOSE; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_FINISH: + type = VIRTIO_BLK_T_ZONE_FINISH; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_APPEND: + type = VIRTIO_BLK_T_ZONE_APPEND; + sector = blk_rq_pos(req); + in_hdr_len = sizeof(vbr->zone_append_in_hdr); break; + case REQ_OP_ZONE_RESET: + type = VIRTIO_BLK_T_ZONE_RESET; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_RESET_ALL: + type = VIRTIO_BLK_T_ZONE_RESET_ALL; + break; + case REQ_OP_DRV_IN: + /* Out header already filled in, nothing to do */ + return 0; default: WARN_ON_ONCE(1); return BLK_STS_IOERR; } + /* Set fields for non-REQ_OP_DRV_IN request types */ + vbr->in_hdr_len = in_hdr_len; vbr->out_hdr.type = cpu_to_virtio32(vdev, type); - vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector); if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES || type == VIRTIO_BLK_T_SECURE_ERASE) { @@ -266,10 +323,15 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + int status = virtblk_result(vbr->status); virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); - blk_mq_end_request(req, virtblk_result(vbr)); + + if (req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector); + + blk_mq_end_request(req, status); } static void virtblk_done(struct virtqueue *vq) @@ -457,6 +519,275 @@ static void virtio_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } +#ifdef CONFIG_BLK_DEV_ZONED +static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, + unsigned int nr_zones, + unsigned int zone_sectors, + size_t *buflen) +{ + struct request_queue *q = vblk->disk->queue; + size_t bufsize; + void *buf; + + nr_zones = min_t(unsigned int, nr_zones, + get_capacity(vblk->disk) >> ilog2(zone_sectors)); + + bufsize = sizeof(struct virtio_blk_zone_report) + + nr_zones * sizeof(struct virtio_blk_zone_descriptor); + bufsize = min_t(size_t, bufsize, + queue_max_hw_sectors(q) << SECTOR_SHIFT); + bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); + + while (bufsize >= sizeof(struct virtio_blk_zone_report)) { + buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); + if (buf) { + *buflen = bufsize; + return buf; + } + bufsize >>= 1; + } + + return NULL; +} + +static int virtblk_submit_zone_report(struct virtio_blk *vblk, + char *report_buf, size_t report_len, + sector_t sector) +{ + struct request_queue *q = vblk->disk->queue; + struct request *req; + struct virtblk_req *vbr; + int err; + + req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + vbr = blk_mq_rq_to_pdu(req); + vbr->in_hdr_len = sizeof(vbr->status); + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); + vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); + + err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL); + if (err) + goto out; + + blk_execute_rq(req, false); + err = blk_status_to_errno(virtblk_result(vbr->status)); +out: + blk_mq_free_request(req); + return err; +} + +static int virtblk_parse_zone(struct virtio_blk *vblk, + struct virtio_blk_zone_descriptor *entry, + unsigned int idx, unsigned int zone_sectors, + report_zones_cb cb, void *data) +{ + struct blk_zone zone = { }; + + if (entry->z_type != VIRTIO_BLK_ZT_SWR && + entry->z_type != VIRTIO_BLK_ZT_SWP && + entry->z_type != VIRTIO_BLK_ZT_CONV) { + dev_err(&vblk->vdev->dev, "invalid zone type %#x\n", + entry->z_type); + return -EINVAL; + } + + zone.type = entry->z_type; + zone.cond = entry->z_state; + zone.len = zone_sectors; + zone.capacity = le64_to_cpu(entry->z_cap); + zone.start = le64_to_cpu(entry->z_start); + if (zone.cond == BLK_ZONE_COND_FULL) + zone.wp = zone.start + zone.len; + else + zone.wp = le64_to_cpu(entry->z_wp); + + return cb(&zone, idx, data); +} + +static int virtblk_report_zones(struct gendisk *disk, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, + void *data) +{ + struct virtio_blk *vblk = disk->private_data; + struct virtio_blk_zone_report *report; + unsigned int zone_sectors = vblk->zone_sectors; + unsigned int nz, i; + int ret, zone_idx = 0; + size_t buflen; + + if (WARN_ON_ONCE(!vblk->zone_sectors)) + return -EOPNOTSUPP; + + report = virtblk_alloc_report_buffer(vblk, nr_zones, + zone_sectors, &buflen); + if (!report) + return -ENOMEM; + + while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) { + memset(report, 0, buflen); + + ret = virtblk_submit_zone_report(vblk, (char *)report, + buflen, sector); + if (ret) { + if (ret > 0) + ret = -EIO; + goto out_free; + } + nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); + if (!nz) + break; + + for (i = 0; i < nz && zone_idx < nr_zones; i++) { + ret = virtblk_parse_zone(vblk, &report->zones[i], + zone_idx, zone_sectors, cb, data); + if (ret) + goto out_free; + sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors; + zone_idx++; + } + } + + if (zone_idx > 0) + ret = zone_idx; + else + ret = -EINVAL; +out_free: + kvfree(report); + return ret; +} + +static void virtblk_revalidate_zones(struct virtio_blk *vblk) +{ + u8 model; + + if (!vblk->zone_sectors) + return; + + virtio_cread(vblk->vdev, struct virtio_blk_config, + zoned.model, &model); + if (!blk_revalidate_disk_zones(vblk->disk, NULL)) + set_capacity_and_notify(vblk->disk, 0); +} + +static int virtblk_probe_zoned_device(struct virtio_device *vdev, + struct virtio_blk *vblk, + struct request_queue *q) +{ + u32 v; + u8 model; + int ret; + + virtio_cread(vdev, struct virtio_blk_config, + zoned.model, &model); + + switch (model) { + case VIRTIO_BLK_Z_NONE: + return 0; + case VIRTIO_BLK_Z_HM: + break; + case VIRTIO_BLK_Z_HA: + /* + * Present the host-aware device as a regular drive. + * TODO It is possible to add an option to make it appear + * in the system as a zoned drive. + */ + return 0; + default: + dev_err(&vdev->dev, "unsupported zone model %d\n", model); + return -EINVAL; + } + + dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); + + disk_set_zoned(vblk->disk, BLK_ZONED_HM); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_open_zones, &v); + disk_set_max_open_zones(vblk->disk, le32_to_cpu(v)); + + dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v)); + + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_active_zones, &v); + disk_set_max_active_zones(vblk->disk, le32_to_cpu(v)); + dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v)); + + virtio_cread(vdev, struct virtio_blk_config, + zoned.write_granularity, &v); + if (!v) { + dev_warn(&vdev->dev, "zero write granularity reported\n"); + return -ENODEV; + } + blk_queue_physical_block_size(q, le32_to_cpu(v)); + blk_queue_io_min(q, le32_to_cpu(v)); + + dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v)); + + /* + * virtio ZBD specification doesn't require zones to be a power of + * two sectors in size, but the code in this driver expects that. + */ + virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v); + vblk->zone_sectors = le32_to_cpu(v); + if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) { + dev_err(&vdev->dev, + "zoned device with non power of two zone size %u\n", + vblk->zone_sectors); + return -ENODEV; + } + dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { + dev_warn(&vblk->vdev->dev, + "ignoring negotiated F_DISCARD for zoned device\n"); + blk_queue_max_discard_sectors(q, 0); + } + + ret = blk_revalidate_disk_zones(vblk->disk, NULL); + if (!ret) { + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_append_sectors, &v); + if (!v) { + dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); + return -ENODEV; + } + blk_queue_max_zone_append_sectors(q, le32_to_cpu(v)); + dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v)); + } + + return ret; +} + +static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); +} +#else + +/* + * Zoned block device support is not configured in this kernel. + * We only need to define a few symbols to avoid compilation errors. + */ +#define virtblk_report_zones NULL +static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) +{ +} +static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, + struct virtio_blk *vblk, struct request_queue *q) +{ + return -EOPNOTSUPP; +} + +static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + /* return id (s/n) string for *disk to *id_str */ static int virtblk_get_id(struct gendisk *disk, char *id_str) @@ -464,18 +795,24 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct virtio_blk *vblk = disk->private_data; struct request_queue *q = vblk->disk->queue; struct request *req; + struct virtblk_req *vbr; int err; req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return PTR_ERR(req); + vbr = blk_mq_rq_to_pdu(req); + vbr->in_hdr_len = sizeof(vbr->status); + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); + vbr->out_hdr.sector = 0; + err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); if (err) goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); + err = blk_status_to_errno(virtblk_result(vbr->status)); out: blk_mq_free_request(req); return err; @@ -526,6 +863,7 @@ static const struct block_device_operations virtblk_fops = { .owner = THIS_MODULE, .getgeo = virtblk_getgeo, .free_disk = virtblk_free_disk, + .report_zones = virtblk_report_zones, }; static int index_to_minor(int index) @@ -596,6 +934,7 @@ static void virtblk_config_changed_work(struct work_struct *work) struct virtio_blk *vblk = container_of(work, struct virtio_blk, config_work); + virtblk_revalidate_zones(vblk); virtblk_update_capacity(vblk, true); } @@ -1152,6 +1491,15 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); + if (virtblk_has_zoned_feature(vdev)) { + err = virtblk_probe_zoned_device(vdev, vblk, q); + if (err) + goto out_cleanup_disk; + } + + dev_info(&vdev->dev, "blk config size: %zu\n", + sizeof(struct virtio_blk_config)); + err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk; @@ -1253,6 +1601,9 @@ static unsigned int features[] = { VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, VIRTIO_BLK_F_SECURE_ERASE, +#ifdef CONFIG_BLK_DEV_ZONED + VIRTIO_BLK_F_ZONED, +#endif /* CONFIG_BLK_DEV_ZONED */ }; static struct virtio_driver virtio_blk = { diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 58e70b24b504..3744e4da1b2a 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -41,6 +41,7 @@ #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ #define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ +#define VIRTIO_BLK_F_ZONED 17 /* Zoned block device */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -137,6 +138,16 @@ struct virtio_blk_config { /* Secure erase commands must be aligned to this number of sectors. */ __virtio32 secure_erase_sector_alignment; + /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ + struct virtio_blk_zoned_characteristics { + __virtio32 zone_sectors; + __virtio32 max_open_zones; + __virtio32 max_active_zones; + __virtio32 max_append_sectors; + __virtio32 write_granularity; + __u8 model; + __u8 unused2[3]; + } zoned; } __attribute__((packed)); /* @@ -174,6 +185,27 @@ struct virtio_blk_config { /* Secure erase command */ #define VIRTIO_BLK_T_SECURE_ERASE 14 +/* Zone append command */ +#define VIRTIO_BLK_T_ZONE_APPEND 15 + +/* Report zones command */ +#define VIRTIO_BLK_T_ZONE_REPORT 16 + +/* Open zone command */ +#define VIRTIO_BLK_T_ZONE_OPEN 18 + +/* Close zone command */ +#define VIRTIO_BLK_T_ZONE_CLOSE 20 + +/* Finish zone command */ +#define VIRTIO_BLK_T_ZONE_FINISH 22 + +/* Reset zone command */ +#define VIRTIO_BLK_T_ZONE_RESET 24 + +/* Reset All zones command */ +#define VIRTIO_BLK_T_ZONE_RESET_ALL 26 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 @@ -193,6 +225,72 @@ struct virtio_blk_outhdr { __virtio64 sector; }; +/* + * Supported zoned device models. + */ + +/* Regular block device */ +#define VIRTIO_BLK_Z_NONE 0 +/* Host-managed zoned device */ +#define VIRTIO_BLK_Z_HM 1 +/* Host-aware zoned device */ +#define VIRTIO_BLK_Z_HA 2 + +/* + * Zone descriptor. A part of VIRTIO_BLK_T_ZONE_REPORT command reply. + */ +struct virtio_blk_zone_descriptor { + /* Zone capacity */ + __virtio64 z_cap; + /* The starting sector of the zone */ + __virtio64 z_start; + /* Zone write pointer position in sectors */ + __virtio64 z_wp; + /* Zone type */ + __u8 z_type; + /* Zone state */ + __u8 z_state; + __u8 reserved[38]; +}; + +struct virtio_blk_zone_report { + __virtio64 nr_zones; + __u8 reserved[56]; + struct virtio_blk_zone_descriptor zones[]; +}; + +/* + * Supported zone types. + */ + +/* Conventional zone */ +#define VIRTIO_BLK_ZT_CONV 1 +/* Sequential Write Required zone */ +#define VIRTIO_BLK_ZT_SWR 2 +/* Sequential Write Preferred zone */ +#define VIRTIO_BLK_ZT_SWP 3 + +/* + * Zone states that are available for zones of all types. + */ + +/* Not a write pointer (conventional zones only) */ +#define VIRTIO_BLK_ZS_NOT_WP 0 +/* Empty */ +#define VIRTIO_BLK_ZS_EMPTY 1 +/* Implicitly Open */ +#define VIRTIO_BLK_ZS_IOPEN 2 +/* Explicitly Open */ +#define VIRTIO_BLK_ZS_EOPEN 3 +/* Closed */ +#define VIRTIO_BLK_ZS_CLOSED 4 +/* Read-Only */ +#define VIRTIO_BLK_ZS_RDONLY 13 +/* Full */ +#define VIRTIO_BLK_ZS_FULL 14 +/* Offline */ +#define VIRTIO_BLK_ZS_OFFLINE 15 + /* Unmap this range (only valid for write zeroes command) */ #define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 @@ -219,4 +317,11 @@ struct virtio_scsi_inhdr { #define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_IOERR 1 #define VIRTIO_BLK_S_UNSUPP 2 + +/* Error codes that are specific to zoned block devices */ +#define VIRTIO_BLK_S_ZONE_INVALID_CMD 3 +#define VIRTIO_BLK_S_ZONE_UNALIGNED_WP 4 +#define VIRTIO_BLK_S_ZONE_OPEN_RESOURCE 5 +#define VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE 6 + #endif /* _LINUX_VIRTIO_BLK_H */ -- cgit v1.2.3 From b16a1756c716235891e298beabd68f3cd6bb5952 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 22 Dec 2022 14:30:13 -0500 Subject: virtio_blk: mark all zone fields LE zone is a virtio 1.x feature so all fields are LE, they are handled as such, but have mistakenly been labeled __virtioXX in the header. This results in a bunch of sparse warnings. Use the __leXX tags to make sparse happy. Message-Id: <20221222193214.55146-1-mst@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_blk.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 3744e4da1b2a..5af2a0300bb9 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -140,11 +140,11 @@ struct virtio_blk_config { /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ struct virtio_blk_zoned_characteristics { - __virtio32 zone_sectors; - __virtio32 max_open_zones; - __virtio32 max_active_zones; - __virtio32 max_append_sectors; - __virtio32 write_granularity; + __le32 zone_sectors; + __le32 max_open_zones; + __le32 max_active_zones; + __le32 max_append_sectors; + __le32 write_granularity; __u8 model; __u8 unused2[3]; } zoned; @@ -241,11 +241,11 @@ struct virtio_blk_outhdr { */ struct virtio_blk_zone_descriptor { /* Zone capacity */ - __virtio64 z_cap; + __le64 z_cap; /* The starting sector of the zone */ - __virtio64 z_start; + __le64 z_start; /* Zone write pointer position in sectors */ - __virtio64 z_wp; + __le64 z_wp; /* Zone type */ __u8 z_type; /* Zone state */ @@ -254,7 +254,7 @@ struct virtio_blk_zone_descriptor { }; struct virtio_blk_zone_report { - __virtio64 nr_zones; + __le64 nr_zones; __u8 reserved[56]; struct virtio_blk_zone_descriptor zones[]; }; -- cgit v1.2.3 From db6c4dee4c104f50ed163af71c53bfdb878a8318 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 10 Jan 2023 18:56:36 +0200 Subject: PCI: Add SolidRun vendor ID Add SolidRun vendor ID to pci_ids.h The vendor ID is used in 2 different source files, the SNET vDPA driver and PCI quirks. Signed-off-by: Alvaro Karsz Acked-by: Bjorn Helgaas Message-Id: <20230110165638.123745-2-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b362d90eb9b0..6716e6371a18 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3092,6 +3092,8 @@ #define PCI_VENDOR_ID_3COM_2 0xa727 +#define PCI_VENDOR_ID_SOLIDRUN 0xd063 + #define PCI_VENDOR_ID_DIGIUM 0xd161 #define PCI_DEVICE_ID_DIGIUM_HFC4S 0xb410 -- cgit v1.2.3 From 1538a8a49ecbe6d3302cd7f347632338e56857f8 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 3 Jan 2023 11:51:05 +0100 Subject: vdpa: Add resume operation Add a new operation to allow a vDPA device to be resumed after it has been suspended. Trying to resume a device that wasn't suspended will result in a no-op. This operation is optional. If it's not implemented, the associated backend feature bit will not be exposed. And if the feature bit is not exposed, invoking this operation will return an error. Acked-by: Jason Wang Signed-off-by: Sebastien Boeuf Message-Id: <6e05c4b31b47f3e29cb2bd7ebd56c81f84b8f48a.1672742878.git.sebastien.boeuf@intel.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 6d0f5e4e82c2..96d308cbf97b 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -219,7 +219,10 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) - * @suspend: Suspend or resume the device (optional) + * @suspend: Suspend the device (optional) + * @vdev: vdpa device + * Returns integer: success (0) or error (< 0) + * @resume: Resume the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) * @get_config_size: Get the size of the configuration space includes @@ -324,6 +327,7 @@ struct vdpa_config_ops { void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); int (*suspend)(struct vdpa_device *vdev); + int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); -- cgit v1.2.3 From 69106b6fb3d73bd4252daa48ae96e600c9701147 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 3 Jan 2023 11:51:06 +0100 Subject: vhost-vdpa: Introduce RESUME backend feature bit Userspace knows if the device can be resumed or not by checking this feature bit. It's only exposed if the vdpa driver backend implements the resume() operation callback. Userspace trying to negotiate this feature when it hasn't been exposed will result in an error. Acked-by: Jason Wang Signed-off-by: Sebastien Boeuf Message-Id: Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 16 +++++++++++++++- include/uapi/linux/vhost_types.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ec32f785dfde..abbe397b8509 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -359,6 +359,14 @@ static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) return ops->suspend; } +static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->resume; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -606,11 +614,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; if (features & ~(VHOST_VDPA_BACKEND_FEATURES | - BIT_ULL(VHOST_BACKEND_F_SUSPEND))) + BIT_ULL(VHOST_BACKEND_F_SUSPEND) | + BIT_ULL(VHOST_BACKEND_F_RESUME))) return -EOPNOTSUPP; if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && !vhost_vdpa_can_suspend(v)) return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && + !vhost_vdpa_can_resume(v)) + return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; } @@ -662,6 +674,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, features = VHOST_VDPA_BACKEND_FEATURES; if (vhost_vdpa_can_suspend(v)) features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); + if (vhost_vdpa_can_resume(v)) + features |= BIT_ULL(VHOST_BACKEND_F_RESUME); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 53601ce2c20a..c5690a8992d8 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -163,5 +163,7 @@ struct vhost_vdpa_iova_range { #define VHOST_BACKEND_F_IOTLB_ASID 0x3 /* Device can be suspended */ #define VHOST_BACKEND_F_SUSPEND 0x4 +/* Device can be resumed */ +#define VHOST_BACKEND_F_RESUME 0x5 #endif -- cgit v1.2.3 From 3b688d7a086d0438649ea37990c6e811954fc780 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 3 Jan 2023 11:51:07 +0100 Subject: vhost-vdpa: uAPI to resume the device This new ioctl adds support for resuming the device from userspace. This is required when trying to restore the device in a functioning state after it's been suspended. It is already possible to reset a suspended device, but that means the device must be reconfigured and all the IOMMU/IOTLB mappings must be recreated. This new operation allows the device to be resumed without going through a full reset. This is particularly useful when trying to perform offline migration of a virtual machine (also known as snapshot/restore) as it allows the VMM to resume the virtual machine back to a running state after the snapshot is performed. Acked-by: Jason Wang Signed-off-by: Sebastien Boeuf Message-Id: <73b75fb87d25cff59768b4955a81fe7ffe5b4770.1672742878.git.sebastien.boeuf@intel.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 18 ++++++++++++++++++ include/uapi/linux/vhost.h | 8 ++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index abbe397b8509..23db92388393 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -506,6 +506,21 @@ static long vhost_vdpa_suspend(struct vhost_vdpa *v) return ops->suspend(vdpa); } +/* After a successful return of this ioctl the device resumes processing + * virtqueue descriptors. The device becomes fully operational the same way it + * was before it was suspended. + */ +static long vhost_vdpa_resume(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->resume) + return -EOPNOTSUPP; + + return ops->resume(vdpa); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -691,6 +706,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_SUSPEND: r = vhost_vdpa_suspend(v); break; + case VHOST_VDPA_RESUME: + r = vhost_vdpa_resume(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index f9f115a7c75b..92e1b700b51c 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -180,4 +180,12 @@ */ #define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) +/* Resume a device so it can resume processing virtqueue requests + * + * After the return of this ioctl the device will have restored all the + * necessary states and it is fully operational to continue processing the + * virtqueue descriptors. + */ +#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) + #endif -- cgit v1.2.3 From 2713ea3c7d930aa1ff5ef7d4a57a1e4fcc3b9985 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 Jan 2023 14:15:21 +0800 Subject: virtio_ring: per virtqueue dma device This patch introduces a per virtqueue dma device. This will be used for virtio devices whose virtqueue are backed by different underlayer devices. One example is the vDPA that where the control virtqueue could be implemented through software mediation. Some of the work are actually done before since the helper like vring_dma_device(). This work left are: - Let vring_dma_device() return the per virtqueue dma device instead of the vdev's parent. - Allow passing a dma_device when creating the virtqueue through a new helper, old vring creation helper will keep using vdev's parent. Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20230119061525.75068-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 133 ++++++++++++++++++++++++++++++------------- include/linux/virtio_ring.h | 16 ++++++ 2 files changed, 109 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 723c4e29e1d3..41144b5246a8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -202,6 +202,9 @@ struct vring_virtqueue { /* DMA, allocation, and size information */ bool we_own_ring; + /* Device used for doing DMA */ + struct device *dma_dev; + #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; @@ -219,7 +222,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), - const char *name); + const char *name, + struct device *dma_dev); static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); static void vring_free(struct virtqueue *_vq); @@ -297,10 +301,11 @@ size_t virtio_max_dma_size(struct virtio_device *vdev) EXPORT_SYMBOL_GPL(virtio_max_dma_size); static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *dma_handle, gfp_t flag, + struct device *dma_dev) { if (vring_use_dma_api(vdev)) { - return dma_alloc_coherent(vdev->dev.parent, size, + return dma_alloc_coherent(dma_dev, size, dma_handle, flag); } else { void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); @@ -330,10 +335,11 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, } static void vring_free_queue(struct virtio_device *vdev, size_t size, - void *queue, dma_addr_t dma_handle) + void *queue, dma_addr_t dma_handle, + struct device *dma_dev) { if (vring_use_dma_api(vdev)) - dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); + dma_free_coherent(dma_dev, size, queue, dma_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); } @@ -341,11 +347,11 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size, /* * The DMA ops on various arches are rather gnarly right now, and * making all of the arch DMA ops work on the vring device itself - * is a mess. For now, we use the parent device for DMA ops. + * is a mess. */ static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) { - return vq->vq.vdev->dev.parent; + return vq->dma_dev; } /* Map one sg entry. */ @@ -1032,11 +1038,12 @@ err_state: } static void vring_free_split(struct vring_virtqueue_split *vring_split, - struct virtio_device *vdev) + struct virtio_device *vdev, struct device *dma_dev) { vring_free_queue(vdev, vring_split->queue_size_in_bytes, vring_split->vring.desc, - vring_split->queue_dma_addr); + vring_split->queue_dma_addr, + dma_dev); kfree(vring_split->desc_state); kfree(vring_split->desc_extra); @@ -1046,7 +1053,8 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, struct virtio_device *vdev, u32 num, unsigned int vring_align, - bool may_reduce_num) + bool may_reduce_num, + struct device *dma_dev) { void *queue = NULL; dma_addr_t dma_addr; @@ -1061,7 +1069,8 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, - GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + dma_dev); if (queue) break; if (!may_reduce_num) @@ -1074,7 +1083,8 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, if (!queue) { /* Try to get a single page. You are my only hope! */ queue = vring_alloc_queue(vdev, vring_size(num, vring_align), - &dma_addr, GFP_KERNEL | __GFP_ZERO); + &dma_addr, GFP_KERNEL | __GFP_ZERO, + dma_dev); } if (!queue) return -ENOMEM; @@ -1100,21 +1110,22 @@ static struct virtqueue *vring_create_virtqueue_split( bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), - const char *name) + const char *name, + struct device *dma_dev) { struct vring_virtqueue_split vring_split = {}; struct virtqueue *vq; int err; err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, - may_reduce_num); + may_reduce_num, dma_dev); if (err) return NULL; vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, - context, notify, callback, name); + context, notify, callback, name, dma_dev); if (!vq) { - vring_free_split(&vring_split, vdev); + vring_free_split(&vring_split, vdev, dma_dev); return NULL; } @@ -1132,7 +1143,8 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) err = vring_alloc_queue_split(&vring_split, vdev, num, vq->split.vring_align, - vq->split.may_reduce_num); + vq->split.may_reduce_num, + vring_dma_dev(vq)); if (err) goto err; @@ -1150,7 +1162,7 @@ static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) return 0; err_state_extra: - vring_free_split(&vring_split, vdev); + vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); err: virtqueue_reinit_split(vq); return -ENOMEM; @@ -1841,22 +1853,26 @@ static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) } static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, - struct virtio_device *vdev) + struct virtio_device *vdev, + struct device *dma_dev) { if (vring_packed->vring.desc) vring_free_queue(vdev, vring_packed->ring_size_in_bytes, vring_packed->vring.desc, - vring_packed->ring_dma_addr); + vring_packed->ring_dma_addr, + dma_dev); if (vring_packed->vring.driver) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.driver, - vring_packed->driver_event_dma_addr); + vring_packed->driver_event_dma_addr, + dma_dev); if (vring_packed->vring.device) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.device, - vring_packed->device_event_dma_addr); + vring_packed->device_event_dma_addr, + dma_dev); kfree(vring_packed->desc_state); kfree(vring_packed->desc_extra); @@ -1864,7 +1880,7 @@ static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, - u32 num) + u32 num, struct device *dma_dev) { struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; @@ -1875,7 +1891,8 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, ring = vring_alloc_queue(vdev, ring_size_in_bytes, &ring_dma_addr, - GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + dma_dev); if (!ring) goto err; @@ -1887,7 +1904,8 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, driver = vring_alloc_queue(vdev, event_size_in_bytes, &driver_event_dma_addr, - GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + dma_dev); if (!driver) goto err; @@ -1897,7 +1915,8 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, device = vring_alloc_queue(vdev, event_size_in_bytes, &device_event_dma_addr, - GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + dma_dev); if (!device) goto err; @@ -1909,7 +1928,7 @@ static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, return 0; err: - vring_free_packed(vring_packed, vdev); + vring_free_packed(vring_packed, vdev, dma_dev); return -ENOMEM; } @@ -1987,13 +2006,14 @@ static struct virtqueue *vring_create_virtqueue_packed( bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), - const char *name) + const char *name, + struct device *dma_dev) { struct vring_virtqueue_packed vring_packed = {}; struct vring_virtqueue *vq; int err; - if (vring_alloc_queue_packed(&vring_packed, vdev, num)) + if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) goto err_ring; vq = kmalloc(sizeof(*vq), GFP_KERNEL); @@ -2014,6 +2034,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->broken = false; #endif vq->packed_ring = true; + vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && @@ -2040,7 +2061,7 @@ static struct virtqueue *vring_create_virtqueue_packed( err_state_extra: kfree(vq); err_vq: - vring_free_packed(&vring_packed, vdev); + vring_free_packed(&vring_packed, vdev, dma_dev); err_ring: return NULL; } @@ -2052,7 +2073,7 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) struct virtio_device *vdev = _vq->vdev; int err; - if (vring_alloc_queue_packed(&vring_packed, vdev, num)) + if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) goto err_ring; err = vring_alloc_state_extra_packed(&vring_packed); @@ -2069,7 +2090,7 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return 0; err_state_extra: - vring_free_packed(&vring_packed, vdev); + vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); err_ring: virtqueue_reinit_packed(vq); return -ENOMEM; @@ -2481,7 +2502,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), - const char *name) + const char *name, + struct device *dma_dev) { struct vring_virtqueue *vq; int err; @@ -2507,6 +2529,7 @@ static struct virtqueue *__vring_new_virtqueue(unsigned int index, #else vq->broken = false; #endif + vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && @@ -2549,14 +2572,39 @@ struct virtqueue *vring_create_virtqueue( if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name); + context, notify, callback, name, vdev->dev.parent); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, - context, notify, callback, name); + context, notify, callback, name, vdev->dev.parent); } EXPORT_SYMBOL_GPL(vring_create_virtqueue); +struct virtqueue *vring_create_virtqueue_dma( + unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool context, + bool (*notify)(struct virtqueue *), + void (*callback)(struct virtqueue *), + const char *name, + struct device *dma_dev) +{ + + if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) + return vring_create_virtqueue_packed(index, num, vring_align, + vdev, weak_barriers, may_reduce_num, + context, notify, callback, name, dma_dev); + + return vring_create_virtqueue_split(index, num, vring_align, + vdev, weak_barriers, may_reduce_num, + context, notify, callback, name, dma_dev); +} +EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); + /** * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. @@ -2645,7 +2693,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, vring_init(&vring_split.vring, num, pages, vring_align); return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, - context, notify, callback, name); + context, notify, callback, name, + vdev->dev.parent); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); @@ -2658,17 +2707,20 @@ static void vring_free(struct virtqueue *_vq) vring_free_queue(vq->vq.vdev, vq->packed.ring_size_in_bytes, vq->packed.vring.desc, - vq->packed.ring_dma_addr); + vq->packed.ring_dma_addr, + vring_dma_dev(vq)); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.driver, - vq->packed.driver_event_dma_addr); + vq->packed.driver_event_dma_addr, + vring_dma_dev(vq)); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.device, - vq->packed.device_event_dma_addr); + vq->packed.device_event_dma_addr, + vring_dma_dev(vq)); kfree(vq->packed.desc_state); kfree(vq->packed.desc_extra); @@ -2676,7 +2728,8 @@ static void vring_free(struct virtqueue *_vq) vring_free_queue(vq->vq.vdev, vq->split.queue_size_in_bytes, vq->split.vring.desc, - vq->split.queue_dma_addr); + vq->split.queue_dma_addr, + vring_dma_dev(vq)); } } if (!vq->packed_ring) { diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 8b8af1a38991..8b95b69ef694 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -76,6 +76,22 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, void (*callback)(struct virtqueue *vq), const char *name); +/* + * Creates a virtqueue and allocates the descriptor ring with per + * virtqueue DMA device. + */ +struct virtqueue *vring_create_virtqueue_dma(unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool ctx, + bool (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name, + struct device *dma_dev); + /* * Creates a virtqueue with a standard layout but a caller-allocated * ring. -- cgit v1.2.3 From 25da258fa61b1a902c781406a4e24a8284fc3d8a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 Jan 2023 14:15:22 +0800 Subject: vdpa: introduce get_vq_dma_device() This patch introduces a new method to query the dma device that is use for a specific virtqueue. Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20230119061525.75068-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 96d308cbf97b..43f59ef10cc9 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -285,6 +285,11 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @get_vq_dma_dev: Get the dma device for a specific + * virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns pointer to structure device or error (NULL) * @free: Free resources that belongs to vDPA (optional) * @vdev: vdpa device */ @@ -345,6 +350,7 @@ struct vdpa_config_ops { u64 iova, u64 size); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); + struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); /* Free device resources */ void (*free)(struct vdpa_device *vdev); -- cgit v1.2.3 From 2e44ca3f1f0ba2022f949003f02cc091144e54a3 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Thu, 2 Feb 2023 19:42:48 +0900 Subject: vringh: fix a typo in comments for vringh_kiov Probably it is a simple copy error from struct vring_iov. Signed-off-by: Shunsuke Mie Message-Id: <20230202104248.2040652-1-mie@igel.co.jp> Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 212892cf9822..1991a02c6431 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -92,7 +92,7 @@ struct vringh_iov { }; /** - * struct vringh_iov - kvec mangler. + * struct vringh_kiov - kvec mangler. * * Mangles kvec in place, and restores it. * Remaining data is iov + i, of used - i elements. -- cgit v1.2.3