From 95bfec41bd3d39b7659cba65b72080420bf5691e Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 15 Oct 2022 23:41:27 -0400 Subject: virtio-blk: add support for zoned block devices This patch adds support for Zoned Block Devices (ZBDs) to the kernel virtio-blk driver. The patch accompanies the virtio-blk ZBD support draft that is now being proposed for standardization. The latest version of the draft is linked at https://github.com/oasis-tcs/virtio-spec/issues/143 . The QEMU zoned device code that implements these protocol extensions has been developed by Sam Li and it is currently in review at the QEMU mailing list. A number of virtblk request structure changes has been introduced to accommodate the functionality that is specific to zoned block devices and, most importantly, make room for carrying the Zoned Append sector value from the device back to the driver along with the request status. The zone-specific code in the patch is heavily influenced by NVMe ZNS code in drivers/nvme/host/zns.c, but it is simpler because the proposed virtio ZBD draft only covers the zoned device features that are relevant to the zoned functionality provided by Linux block layer. includes the following fixup: virtio-blk: fix probe without CONFIG_BLK_DEV_ZONED When building without CONFIG_BLK_DEV_ZONED, VIRTIO_BLK_F_ZONED is excluded from array of driver features. As a result virtio_has_feature panics in virtio_check_driver_offered_feature since that by design verifies that a feature we are checking for is listed in the feature array. To fix, replace the call to virtio_has_feature with a stub. Message-Id: <20221016034127.330942-3-dmitry.fomichev@wdc.com> Co-developed-by: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi Signed-off-by: Dmitry Fomichev Message-Id: <20221220112340.518841-1-mst@redhat.com> Reported-by: Linux Kernel Functional Testing Tested-by: Linux Kernel Functional Testing Reported-by: Xuan Zhuo Debugged-by: Xuan Zhuo Tested-by: Anders Roxell Tested-by: Marek Szyprowski Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/block/virtio_blk.c | 387 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 369 insertions(+), 18 deletions(-) (limited to 'drivers/block/virtio_blk.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6a77fa917428..71e1e4bd8439 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define PART_BITS 4 @@ -80,22 +81,51 @@ struct virtio_blk { int num_vqs; int io_queues[HCTX_MAX_TYPES]; struct virtio_blk_vq *vqs; + + /* For zoned device */ + unsigned int zone_sectors; }; struct virtblk_req { + /* Out header */ struct virtio_blk_outhdr out_hdr; - u8 status; + + /* In header */ + union { + u8 status; + + /* + * The zone append command has an extended in header. + * The status field in zone_append_in_hdr must have + * the same offset in virtblk_req as the non-zoned + * status field above. + */ + struct { + u8 status; + u8 reserved[7]; + u64 append_sector; + } zone_append_in_hdr; + }; + + size_t in_hdr_len; + struct sg_table sg_table; struct scatterlist sg[]; }; -static inline blk_status_t virtblk_result(struct virtblk_req *vbr) +static inline blk_status_t virtblk_result(u8 status) { - switch (vbr->status) { + switch (status) { case VIRTIO_BLK_S_OK: return BLK_STS_OK; case VIRTIO_BLK_S_UNSUPP: return BLK_STS_NOTSUPP; + case VIRTIO_BLK_S_ZONE_OPEN_RESOURCE: + return BLK_STS_ZONE_OPEN_RESOURCE; + case VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE: + return BLK_STS_ZONE_ACTIVE_RESOURCE; + case VIRTIO_BLK_S_IOERR: + case VIRTIO_BLK_S_ZONE_UNALIGNED_WP: default: return BLK_STS_IOERR; } @@ -111,11 +141,11 @@ static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) { - struct scatterlist hdr, status, *sgs[3]; + struct scatterlist out_hdr, in_hdr, *sgs[3]; unsigned int num_out = 0, num_in = 0; - sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); - sgs[num_out++] = &hdr; + sg_init_one(&out_hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sgs[num_out++] = &out_hdr; if (vbr->sg_table.nents) { if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) @@ -124,8 +154,8 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) sgs[num_out + num_in++] = vbr->sg_table.sgl; } - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); - sgs[num_out + num_in++] = &status; + sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len); + sgs[num_out + num_in++] = &in_hdr; return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } @@ -214,21 +244,22 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, struct virtblk_req *vbr) { + size_t in_hdr_len = sizeof(vbr->status); bool unmap = false; u32 type; + u64 sector = 0; - vbr->out_hdr.sector = 0; + /* Set fields for all request types */ + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); switch (req_op(req)) { case REQ_OP_READ: type = VIRTIO_BLK_T_IN; - vbr->out_hdr.sector = cpu_to_virtio64(vdev, - blk_rq_pos(req)); + sector = blk_rq_pos(req); break; case REQ_OP_WRITE: type = VIRTIO_BLK_T_OUT; - vbr->out_hdr.sector = cpu_to_virtio64(vdev, - blk_rq_pos(req)); + sector = blk_rq_pos(req); break; case REQ_OP_FLUSH: type = VIRTIO_BLK_T_FLUSH; @@ -243,16 +274,42 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, case REQ_OP_SECURE_ERASE: type = VIRTIO_BLK_T_SECURE_ERASE; break; - case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; + case REQ_OP_ZONE_OPEN: + type = VIRTIO_BLK_T_ZONE_OPEN; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_CLOSE: + type = VIRTIO_BLK_T_ZONE_CLOSE; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_FINISH: + type = VIRTIO_BLK_T_ZONE_FINISH; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_APPEND: + type = VIRTIO_BLK_T_ZONE_APPEND; + sector = blk_rq_pos(req); + in_hdr_len = sizeof(vbr->zone_append_in_hdr); break; + case REQ_OP_ZONE_RESET: + type = VIRTIO_BLK_T_ZONE_RESET; + sector = blk_rq_pos(req); + break; + case REQ_OP_ZONE_RESET_ALL: + type = VIRTIO_BLK_T_ZONE_RESET_ALL; + break; + case REQ_OP_DRV_IN: + /* Out header already filled in, nothing to do */ + return 0; default: WARN_ON_ONCE(1); return BLK_STS_IOERR; } + /* Set fields for non-REQ_OP_DRV_IN request types */ + vbr->in_hdr_len = in_hdr_len; vbr->out_hdr.type = cpu_to_virtio32(vdev, type); - vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector); if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES || type == VIRTIO_BLK_T_SECURE_ERASE) { @@ -266,10 +323,15 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + int status = virtblk_result(vbr->status); virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); - blk_mq_end_request(req, virtblk_result(vbr)); + + if (req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector); + + blk_mq_end_request(req, status); } static void virtblk_done(struct virtqueue *vq) @@ -457,6 +519,275 @@ static void virtio_queue_rqs(struct request **rqlist) *rqlist = requeue_list; } +#ifdef CONFIG_BLK_DEV_ZONED +static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, + unsigned int nr_zones, + unsigned int zone_sectors, + size_t *buflen) +{ + struct request_queue *q = vblk->disk->queue; + size_t bufsize; + void *buf; + + nr_zones = min_t(unsigned int, nr_zones, + get_capacity(vblk->disk) >> ilog2(zone_sectors)); + + bufsize = sizeof(struct virtio_blk_zone_report) + + nr_zones * sizeof(struct virtio_blk_zone_descriptor); + bufsize = min_t(size_t, bufsize, + queue_max_hw_sectors(q) << SECTOR_SHIFT); + bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); + + while (bufsize >= sizeof(struct virtio_blk_zone_report)) { + buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); + if (buf) { + *buflen = bufsize; + return buf; + } + bufsize >>= 1; + } + + return NULL; +} + +static int virtblk_submit_zone_report(struct virtio_blk *vblk, + char *report_buf, size_t report_len, + sector_t sector) +{ + struct request_queue *q = vblk->disk->queue; + struct request *req; + struct virtblk_req *vbr; + int err; + + req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + vbr = blk_mq_rq_to_pdu(req); + vbr->in_hdr_len = sizeof(vbr->status); + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); + vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); + + err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL); + if (err) + goto out; + + blk_execute_rq(req, false); + err = blk_status_to_errno(virtblk_result(vbr->status)); +out: + blk_mq_free_request(req); + return err; +} + +static int virtblk_parse_zone(struct virtio_blk *vblk, + struct virtio_blk_zone_descriptor *entry, + unsigned int idx, unsigned int zone_sectors, + report_zones_cb cb, void *data) +{ + struct blk_zone zone = { }; + + if (entry->z_type != VIRTIO_BLK_ZT_SWR && + entry->z_type != VIRTIO_BLK_ZT_SWP && + entry->z_type != VIRTIO_BLK_ZT_CONV) { + dev_err(&vblk->vdev->dev, "invalid zone type %#x\n", + entry->z_type); + return -EINVAL; + } + + zone.type = entry->z_type; + zone.cond = entry->z_state; + zone.len = zone_sectors; + zone.capacity = le64_to_cpu(entry->z_cap); + zone.start = le64_to_cpu(entry->z_start); + if (zone.cond == BLK_ZONE_COND_FULL) + zone.wp = zone.start + zone.len; + else + zone.wp = le64_to_cpu(entry->z_wp); + + return cb(&zone, idx, data); +} + +static int virtblk_report_zones(struct gendisk *disk, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, + void *data) +{ + struct virtio_blk *vblk = disk->private_data; + struct virtio_blk_zone_report *report; + unsigned int zone_sectors = vblk->zone_sectors; + unsigned int nz, i; + int ret, zone_idx = 0; + size_t buflen; + + if (WARN_ON_ONCE(!vblk->zone_sectors)) + return -EOPNOTSUPP; + + report = virtblk_alloc_report_buffer(vblk, nr_zones, + zone_sectors, &buflen); + if (!report) + return -ENOMEM; + + while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) { + memset(report, 0, buflen); + + ret = virtblk_submit_zone_report(vblk, (char *)report, + buflen, sector); + if (ret) { + if (ret > 0) + ret = -EIO; + goto out_free; + } + nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); + if (!nz) + break; + + for (i = 0; i < nz && zone_idx < nr_zones; i++) { + ret = virtblk_parse_zone(vblk, &report->zones[i], + zone_idx, zone_sectors, cb, data); + if (ret) + goto out_free; + sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors; + zone_idx++; + } + } + + if (zone_idx > 0) + ret = zone_idx; + else + ret = -EINVAL; +out_free: + kvfree(report); + return ret; +} + +static void virtblk_revalidate_zones(struct virtio_blk *vblk) +{ + u8 model; + + if (!vblk->zone_sectors) + return; + + virtio_cread(vblk->vdev, struct virtio_blk_config, + zoned.model, &model); + if (!blk_revalidate_disk_zones(vblk->disk, NULL)) + set_capacity_and_notify(vblk->disk, 0); +} + +static int virtblk_probe_zoned_device(struct virtio_device *vdev, + struct virtio_blk *vblk, + struct request_queue *q) +{ + u32 v; + u8 model; + int ret; + + virtio_cread(vdev, struct virtio_blk_config, + zoned.model, &model); + + switch (model) { + case VIRTIO_BLK_Z_NONE: + return 0; + case VIRTIO_BLK_Z_HM: + break; + case VIRTIO_BLK_Z_HA: + /* + * Present the host-aware device as a regular drive. + * TODO It is possible to add an option to make it appear + * in the system as a zoned drive. + */ + return 0; + default: + dev_err(&vdev->dev, "unsupported zone model %d\n", model); + return -EINVAL; + } + + dev_dbg(&vdev->dev, "probing host-managed zoned device\n"); + + disk_set_zoned(vblk->disk, BLK_ZONED_HM); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_open_zones, &v); + disk_set_max_open_zones(vblk->disk, le32_to_cpu(v)); + + dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v)); + + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_active_zones, &v); + disk_set_max_active_zones(vblk->disk, le32_to_cpu(v)); + dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v)); + + virtio_cread(vdev, struct virtio_blk_config, + zoned.write_granularity, &v); + if (!v) { + dev_warn(&vdev->dev, "zero write granularity reported\n"); + return -ENODEV; + } + blk_queue_physical_block_size(q, le32_to_cpu(v)); + blk_queue_io_min(q, le32_to_cpu(v)); + + dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v)); + + /* + * virtio ZBD specification doesn't require zones to be a power of + * two sectors in size, but the code in this driver expects that. + */ + virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v); + vblk->zone_sectors = le32_to_cpu(v); + if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) { + dev_err(&vdev->dev, + "zoned device with non power of two zone size %u\n", + vblk->zone_sectors); + return -ENODEV; + } + dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors); + + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { + dev_warn(&vblk->vdev->dev, + "ignoring negotiated F_DISCARD for zoned device\n"); + blk_queue_max_discard_sectors(q, 0); + } + + ret = blk_revalidate_disk_zones(vblk->disk, NULL); + if (!ret) { + virtio_cread(vdev, struct virtio_blk_config, + zoned.max_append_sectors, &v); + if (!v) { + dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); + return -ENODEV; + } + blk_queue_max_zone_append_sectors(q, le32_to_cpu(v)); + dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v)); + } + + return ret; +} + +static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); +} +#else + +/* + * Zoned block device support is not configured in this kernel. + * We only need to define a few symbols to avoid compilation errors. + */ +#define virtblk_report_zones NULL +static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) +{ +} +static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, + struct virtio_blk *vblk, struct request_queue *q) +{ + return -EOPNOTSUPP; +} + +static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + /* return id (s/n) string for *disk to *id_str */ static int virtblk_get_id(struct gendisk *disk, char *id_str) @@ -464,18 +795,24 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct virtio_blk *vblk = disk->private_data; struct request_queue *q = vblk->disk->queue; struct request *req; + struct virtblk_req *vbr; int err; req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return PTR_ERR(req); + vbr = blk_mq_rq_to_pdu(req); + vbr->in_hdr_len = sizeof(vbr->status); + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); + vbr->out_hdr.sector = 0; + err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); if (err) goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); + err = blk_status_to_errno(virtblk_result(vbr->status)); out: blk_mq_free_request(req); return err; @@ -526,6 +863,7 @@ static const struct block_device_operations virtblk_fops = { .owner = THIS_MODULE, .getgeo = virtblk_getgeo, .free_disk = virtblk_free_disk, + .report_zones = virtblk_report_zones, }; static int index_to_minor(int index) @@ -596,6 +934,7 @@ static void virtblk_config_changed_work(struct work_struct *work) struct virtio_blk *vblk = container_of(work, struct virtio_blk, config_work); + virtblk_revalidate_zones(vblk); virtblk_update_capacity(vblk, true); } @@ -1152,6 +1491,15 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); + if (virtblk_has_zoned_feature(vdev)) { + err = virtblk_probe_zoned_device(vdev, vblk, q); + if (err) + goto out_cleanup_disk; + } + + dev_info(&vdev->dev, "blk config size: %zu\n", + sizeof(struct virtio_blk_config)); + err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk; @@ -1253,6 +1601,9 @@ static unsigned int features[] = { VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, VIRTIO_BLK_F_SECURE_ERASE, +#ifdef CONFIG_BLK_DEV_ZONED + VIRTIO_BLK_F_ZONED, +#endif /* CONFIG_BLK_DEV_ZONED */ }; static struct virtio_driver virtio_blk = { -- cgit v1.2.3 From 04e5421e6f61538f510aae9329e77d010159863f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 20 Dec 2022 07:37:49 -0500 Subject: virtio_blk: temporary variable type tweak virtblk_result returns blk_status_t which is a bitwise restricted type, so we are not supposed to stuff it in a plain int temporary variable. All we do with it is pass it on to a function expecting blk_status_t so the generated code is ok, but we get warnings from sparse: drivers/block/virtio_blk.c:326:36: sparse: sparse: incorrect type in initializer (different base types) @@ expected int status @@ +got restricted blk_status_t @@ drivers/block/virtio_blk.c:334:33: sparse: sparse: incorrect type in argument 2 (different base types) @@ expected restricted +blk_status_t [usertype] error @@ got int status @@ Make sparse happy by using the correct type. Message-Id: <20221220124152.523531-1-mst@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Chaitanya Kulkarni --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/virtio_blk.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 71e1e4bd8439..abc6db9e15b7 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -323,7 +323,7 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - int status = virtblk_result(vbr->status); + blk_status_t status = virtblk_result(vbr->status); virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); -- cgit v1.2.3 From 2a9c844e896b2895cde9fc0276f6243c68d82e70 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 20 Dec 2022 07:37:49 -0500 Subject: virtio_blk: zone append in header type tweak virtio blk returns a 64 bit append_sector in an input buffer, in LE format. This field is not tagged as LE correctly, so even though the generated code is ok, we get warnings from sparse: drivers/block/virtio_blk.c:332:33: sparse: sparse: cast to restricted __le64 Make sparse happy by using the correct type. Message-Id: <20221220125154.564265-1-mst@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/virtio_blk.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index abc6db9e15b7..386d7c1ca021 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -103,7 +103,7 @@ struct virtblk_req { struct { u8 status; u8 reserved[7]; - u64 append_sector; + __le64 append_sector; } zone_append_in_hdr; }; -- cgit v1.2.3 From 489e18f3d73282f6bf6203324b3b17d459e2e750 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Wed, 21 Dec 2022 23:54:55 +0900 Subject: virtio-blk: set req->state to MQ_RQ_COMPLETE after polling I/O is finished Driver should set req->state to MQ_RQ_COMPLETE after it finishes to process req. But virtio-blk doesn't set MQ_RQ_COMPLETE after virtblk_poll() handles req and req->state still remains MQ_RQ_IN_FLIGHT. Fortunately so far there is no issue about it because blk_mq_end_request_batch() sets req->state to MQ_RQ_IDLE. In this patch, virblk_poll() calls blk_mq_complete_request_remote() to set req->state to MQ_RQ_COMPLETE before it adds req to a batch completion list. So it properly sets req->state after polling I/O is finished. Fixes: 4e0400525691 ("virtio-blk: support polling I/O") Signed-off-by: Suwan Kim Reviewed-by: Christoph Hellwig Message-Id: <20221221145456.281218-2-suwan.kim027@gmail.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/block/virtio_blk.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 386d7c1ca021..f2fb49aadb1e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1202,9 +1202,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) struct request *req = blk_mq_rq_from_pdu(vbr); found++; - if (!blk_mq_add_to_batch(req, iob, vbr->status, + if (!blk_mq_complete_request_remote(req) && + !blk_mq_add_to_batch(req, iob, vbr->status, virtblk_complete_batch)) - blk_mq_complete_request(req); + virtblk_request_done(req); } if (found) -- cgit v1.2.3 From 07b679f70d73483930e8d3c293942416d9cd5c13 Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Wed, 21 Dec 2022 23:54:56 +0900 Subject: virtio-blk: support completion batching for the IRQ path This patch adds completion batching to the IRQ path. It reuses batch completion code of virtblk_poll(). It collects requests to io_comp_batch and processes them all at once. It can boost up the performance by 2%. To validate the performance improvement and stabilty, I did fio test with 4 vCPU VM and 12 vCPU VM respectively. Both VMs have 8GB ram and the same number of HW queues as vCPU. The fio cammad is as follows and I ran the fio 5 times and got IOPS average. (io_uring, randread, direct=1, bs=512, iodepth=64 numjobs=2,4) Test result shows about 2% improvement. 4 vcpu VM | numjobs=2 | numjobs=4 ----------------------------------------------------------- fio without patch | 367.2K IOPS | 397.6K IOPS ----------------------------------------------------------- fio with patch | 372.8K IOPS | 407.7K IOPS 12 vcpu VM | numjobs=2 | numjobs=4 ----------------------------------------------------------- fio without patch | 363.6K IOPS | 374.8K IOPS ----------------------------------------------------------- fio with patch | 373.8K IOPS | 385.3K IOPS Signed-off-by: Suwan Kim Reviewed-by: Christoph Hellwig Message-Id: <20221221145456.281218-3-suwan.kim027@gmail.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 82 +++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 37 deletions(-) (limited to 'drivers/block/virtio_blk.c') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f2fb49aadb1e..23d4fa3252a4 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -334,33 +334,63 @@ static inline void virtblk_request_done(struct request *req) blk_mq_end_request(req, status); } +static void virtblk_complete_batch(struct io_comp_batch *iob) +{ + struct request *req; + + rq_list_for_each(&iob->req_list, req) { + virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); + virtblk_cleanup_cmd(req); + } + blk_mq_end_request_batch(iob); +} + +static int virtblk_handle_req(struct virtio_blk_vq *vq, + struct io_comp_batch *iob) +{ + struct virtblk_req *vbr; + int req_done = 0; + unsigned int len; + + while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + + if (likely(!blk_should_fake_timeout(req->q)) && + !blk_mq_complete_request_remote(req) && + !blk_mq_add_to_batch(req, iob, vbr->status, + virtblk_complete_batch)) + virtblk_request_done(req); + req_done++; + } + + return req_done; +} + static void virtblk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; - bool req_done = false; - int qid = vq->index; - struct virtblk_req *vbr; + struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index]; + int req_done = 0; unsigned long flags; - unsigned int len; + DEFINE_IO_COMP_BATCH(iob); - spin_lock_irqsave(&vblk->vqs[qid].lock, flags); + spin_lock_irqsave(&vblk_vq->lock, flags); do { virtqueue_disable_cb(vq); - while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { - struct request *req = blk_mq_rq_from_pdu(vbr); + req_done += virtblk_handle_req(vblk_vq, &iob); - if (likely(!blk_should_fake_timeout(req->q))) - blk_mq_complete_request(req); - req_done = true; - } if (unlikely(virtqueue_is_broken(vq))) break; } while (!virtqueue_enable_cb(vq)); - /* In case queue is stopped waiting for more buffers. */ - if (req_done) + if (req_done) { + if (!rq_list_empty(iob.req_list)) + iob.complete(&iob); + + /* In case queue is stopped waiting for more buffers. */ blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); - spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + } + spin_unlock_irqrestore(&vblk_vq->lock, flags); } static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) @@ -1176,37 +1206,15 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set) } } -static void virtblk_complete_batch(struct io_comp_batch *iob) -{ - struct request *req; - - rq_list_for_each(&iob->req_list, req) { - virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); - virtblk_cleanup_cmd(req); - } - blk_mq_end_request_batch(iob); -} - static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct virtio_blk *vblk = hctx->queue->queuedata; struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx); - struct virtblk_req *vbr; unsigned long flags; - unsigned int len; int found = 0; spin_lock_irqsave(&vq->lock, flags); - - while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { - struct request *req = blk_mq_rq_from_pdu(vbr); - - found++; - if (!blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, vbr->status, - virtblk_complete_batch)) - virtblk_request_done(req); - } + found = virtblk_handle_req(vq, iob); if (found) blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); -- cgit v1.2.3