From b66f79b706f0cfc09bde8465668428eef188a94c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 Apr 2024 10:41:15 +0900 Subject: null_blk: Do not request ELEVATOR_F_ZBD_SEQ_WRITE elevator feature With zone write plugging enabled at the block layer level, a zoned device can only ever see at most a single write operation per zone. There is thus no need to request a block scheduler with strick per-zone sequential write ordering control through the ELEVATOR_F_ZBD_SEQ_WRITE feature. Removing this allows using a zoned null_blk device with any scheduler, including "none". Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Tested-by: Hans Holmberg Tested-by: Dennis Maisenbacher Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240408014128.205141-16-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 1689e2584104..8e217f8fadcd 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -165,7 +165,6 @@ int null_register_zoned_dev(struct nullb *nullb) struct request_queue *q = nullb->q; blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); - blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); return blk_revalidate_disk_zones(nullb->disk, NULL); } -- cgit v1.2.3 From 997a1f08b4d4283687477470bcc256dfd33ba9d0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 Apr 2024 10:41:16 +0900 Subject: null_blk: Introduce zone_append_max_sectors attribute Add the zone_append_max_sectors configfs attribute and module parameter to allow configuring the maximum number of 512B sectors of zone append operations. This attribute is meaningful only for zoned null block devices. If not specified, the default is unchanged and the zoned device max append sectors limit is set to the device max sectors limit. If a non 0 value is used for this attribute, which is the default, then native support for zone append operations is enabled. Setting a 0 value disables native zone append operations support to instead use the block layer emulation. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Tested-by: Hans Holmberg Tested-by: Dennis Maisenbacher Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240408014128.205141-17-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 10 +++++++++- drivers/block/null_blk/null_blk.h | 1 + drivers/block/null_blk/zoned.c | 20 +++++++++++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 71c39bcd872c..a5a50ba6ad9f 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -253,6 +253,11 @@ static unsigned int g_zone_max_active; module_param_named(zone_max_active, g_zone_max_active, uint, 0444); MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)"); +static int g_zone_append_max_sectors = INT_MAX; +module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444); +MODULE_PARM_DESC(zone_append_max_sectors, + "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation"); + static struct nullb_device *null_alloc_dev(void); static void null_free_dev(struct nullb_device *dev); static void null_del_dev(struct nullb *nullb); @@ -436,6 +441,7 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); NULLB_DEVICE_ATTR(zone_max_open, uint, NULL); NULLB_DEVICE_ATTR(zone_max_active, uint, NULL); +NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL); NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); NULLB_DEVICE_ATTR(shared_tags, bool, NULL); @@ -580,6 +586,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_zone_nr_conv, &nullb_device_attr_zone_max_open, &nullb_device_attr_zone_max_active, + &nullb_device_attr_zone_append_max_sectors, &nullb_device_attr_zone_readonly, &nullb_device_attr_zone_offline, &nullb_device_attr_virt_boundary, @@ -671,7 +678,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page) "shared_tags,size,submit_queues,use_per_node_hctx," "virt_boundary,zoned,zone_capacity,zone_max_active," "zone_max_open,zone_nr_conv,zone_offline,zone_readonly," - "zone_size\n"); + "zone_size,zone_append_max_sectors\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -751,6 +758,7 @@ static struct nullb_device *null_alloc_dev(void) dev->zone_nr_conv = g_zone_nr_conv; dev->zone_max_open = g_zone_max_open; dev->zone_max_active = g_zone_max_active; + dev->zone_append_max_sectors = g_zone_append_max_sectors; dev->virt_boundary = g_virt_boundary; dev->no_sched = g_no_sched; dev->shared_tags = g_shared_tags; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 477b97746823..a9c5df650ddb 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -82,6 +82,7 @@ struct nullb_device { unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int zone_max_open; /* max number of open zones */ unsigned int zone_max_active; /* max number of active zones */ + unsigned int zone_append_max_sectors; /* Max sectors per zone append command */ unsigned int submit_queues; /* number of submission queues */ unsigned int prev_submit_queues; /* number of submission queues before change */ unsigned int poll_queues; /* number of IOPOLL submission queues */ diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 8e217f8fadcd..0b2af273adaf 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -103,6 +103,11 @@ int null_init_zoned_dev(struct nullb_device *dev, dev->zone_nr_conv); } + dev->zone_append_max_sectors = + min(ALIGN_DOWN(dev->zone_append_max_sectors, + dev->blocksize >> SECTOR_SHIFT), + zone_capacity_sects); + /* Max active zones has to be < nbr of seq zones in order to be enforceable */ if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) { dev->zone_max_active = 0; @@ -154,7 +159,7 @@ int null_init_zoned_dev(struct nullb_device *dev, lim->zoned = true; lim->chunk_sectors = dev->zone_size_sects; - lim->max_zone_append_sectors = dev->zone_size_sects; + lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; lim->max_active_zones = dev->zone_max_active; return 0; @@ -163,10 +168,16 @@ int null_init_zoned_dev(struct nullb_device *dev, int null_register_zoned_dev(struct nullb *nullb) { struct request_queue *q = nullb->q; + struct gendisk *disk = nullb->disk; blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); - nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); - return blk_revalidate_disk_zones(nullb->disk, NULL); + disk->nr_zones = bdev_nr_zones(disk->part0); + + pr_info("%s: using %s zone append\n", + disk->disk_name, + queue_emulates_zone_append(q) ? "emulated" : "native"); + + return blk_revalidate_disk_zones(disk, NULL); } void null_free_zoned_dev(struct nullb_device *dev) @@ -365,6 +376,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, trace_nullb_zone_op(cmd, zno, zone->cond); + if (WARN_ON_ONCE(append && !dev->zone_append_max_sectors)) + return BLK_STS_IOERR; + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { if (append) return BLK_STS_IOERR; -- cgit v1.2.3 From f4f84586c8b9c7d5312d6f8fb4db1e12f2b83c27 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 Apr 2024 10:41:17 +0900 Subject: null_blk: Introduce fua attribute Add the fua configfs attribute and module parameter to allow configuring if the device supports FUA or not. Using this attribute has an effect on the null_blk device only if memory backing is enabled together with a write cache (cache_size option). This new attribute allows configuring a null_blk device with a write cache but without FUA support. This is convenient to test the block layer flush machinery. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Tested-by: Hans Holmberg Tested-by: Dennis Maisenbacher Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240408014128.205141-18-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 12 ++++++++++-- drivers/block/null_blk/null_blk.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index a5a50ba6ad9f..578660b1ad01 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -225,6 +225,10 @@ static unsigned long g_cache_size; module_param_named(cache_size, g_cache_size, ulong, 0444); MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)"); +static bool g_fua = true; +module_param_named(fua, g_fua, bool, 0444); +MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true"); + static unsigned int g_mbps; module_param_named(mbps, g_mbps, uint, 0444); MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)"); @@ -446,6 +450,7 @@ NULLB_DEVICE_ATTR(virt_boundary, bool, NULL); NULLB_DEVICE_ATTR(no_sched, bool, NULL); NULLB_DEVICE_ATTR(shared_tags, bool, NULL); NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL); +NULLB_DEVICE_ATTR(fua, bool, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -593,6 +598,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_no_sched, &nullb_device_attr_shared_tags, &nullb_device_attr_shared_tag_bitmap, + &nullb_device_attr_fua, NULL, }; @@ -671,7 +677,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "badblocks,blocking,blocksize,cache_size," + "badblocks,blocking,blocksize,cache_size,fua," "completion_nsec,discard,home_node,hw_queue_depth," "irqmode,max_sectors,mbps,memory_backed,no_sched," "poll_queues,power,queue_mode,shared_tag_bitmap," @@ -763,6 +769,8 @@ static struct nullb_device *null_alloc_dev(void) dev->no_sched = g_no_sched; dev->shared_tags = g_shared_tags; dev->shared_tag_bitmap = g_shared_tag_bitmap; + dev->fua = g_fua; + return dev; } @@ -1920,7 +1928,7 @@ static int null_add_dev(struct nullb_device *dev) if (dev->cache_size > 0) { set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags); - blk_queue_write_cache(nullb->q, true, true); + blk_queue_write_cache(nullb->q, true, dev->fua); } nullb->q->queuedata = nullb; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index a9c5df650ddb..3234e6c85eed 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -105,6 +105,7 @@ struct nullb_device { bool no_sched; /* no IO scheduler for the device */ bool shared_tags; /* share tag set between devices for blk-mq */ bool shared_tag_bitmap; /* use hostwide shared tags */ + bool fua; /* Support FUA */ }; struct nullb { -- cgit v1.2.3 From 9b3c08b90fc212de58c34621d83e74977170b2cd Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 Apr 2024 10:41:20 +0900 Subject: block: Simplify blk_revalidate_disk_zones() interface The only user of blk_revalidate_disk_zones() second argument was the SCSI disk driver (sd). Now that this driver does not require this update_driver_data argument, remove it to simplify the interface of blk_revalidate_disk_zones(). Also update the function kdoc comment to be more accurate (i.e. there is no gendisk ->revalidate method). Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Tested-by: Hans Holmberg Tested-by: Dennis Maisenbacher Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240408014128.205141-21-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 21 +++++++-------------- drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/md/dm-zone.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/scsi/sd_zbc.c | 2 +- include/linux/blkdev.h | 3 +-- 8 files changed, 14 insertions(+), 22 deletions(-) (limited to 'drivers/block/null_blk') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index da0fc7e2d00a..e46d23ad2fa9 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1713,21 +1713,17 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, /** * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps * @disk: Target disk - * @update_driver_data: Callback to update driver data on the frozen disk * - * Helper function for low-level device drivers to check and (re) allocate and - * initialize a disk request queue zone bitmaps. This functions should normally - * be called within the disk ->revalidate method for blk-mq based drivers. + * Helper function for low-level device drivers to check, (re) allocate and + * initialize resources used for managing zoned disks. This function should + * normally be called by blk-mq based drivers when a zoned gendisk is probed + * and when the zone configuration of the gendisk changes (e.g. after a format). * Before calling this function, the device driver must already have set the * device zone size (chunk_sector limit) and the max zone append limit. * BIO based drivers can also use this function as long as the device queue * can be safely frozen. - * If the @update_driver_data callback function is not NULL, the callback is - * executed with the device request queue frozen after all zones have been - * checked. */ -int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)) +int blk_revalidate_disk_zones(struct gendisk *disk) { struct request_queue *q = disk->queue; sector_t zone_sectors = q->limits.chunk_sectors; @@ -1794,13 +1790,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk, * referencing the bitmaps). */ blk_mq_freeze_queue(q); - if (ret > 0) { + if (ret > 0) ret = disk_update_zone_resources(disk, &args); - if (update_driver_data) - update_driver_data(disk); - } else { + else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); - } if (ret) disk_free_zone_resources(disk); blk_mq_unfreeze_queue(q); diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 0b2af273adaf..4ddd84752557 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -177,7 +177,7 @@ int null_register_zoned_dev(struct nullb *nullb) disk->disk_name, queue_emulates_zone_append(q) ? "emulated" : "native"); - return blk_revalidate_disk_zones(disk, NULL); + return blk_revalidate_disk_zones(disk); } void null_free_zoned_dev(struct nullb_device *dev) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index ab6af84e327c..851c78913de2 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -221,7 +221,7 @@ static int ublk_get_nr_zones(const struct ublk_device *ub) static int ublk_revalidate_disk_zones(struct ublk_device *ub) { - return blk_revalidate_disk_zones(ub->ub_disk, NULL); + return blk_revalidate_disk_zones(ub->ub_disk); } static int ublk_dev_param_zoned_validate(const struct ublk_device *ub) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 42dea7601d87..c1af0a7d56c8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1543,7 +1543,7 @@ static int virtblk_probe(struct virtio_device *vdev) */ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) { blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue); - err = blk_revalidate_disk_zones(vblk->disk, NULL); + err = blk_revalidate_disk_zones(vblk->disk); if (err) goto out_cleanup_disk; } diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 34769f3e3175..d17ae4486a6a 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -169,7 +169,7 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) * our table for dm_blk_report_zones() to use directly. */ md->zone_revalidate_map = t; - ret = blk_revalidate_disk_zones(disk, NULL); + ret = blk_revalidate_disk_zones(disk); md->zone_revalidate_map = NULL; if (ret) { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 943d72bdd794..c9955ecd1790 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2150,7 +2150,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, blk_mq_unfreeze_queue(ns->disk->queue); if (blk_queue_is_zoned(ns->queue)) { - ret = blk_revalidate_disk_zones(ns->disk, NULL); + ret = blk_revalidate_disk_zones(ns->disk); if (ret && !nvme_first_scan(ns->disk)) goto out; } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index d0ead9858954..806036e48abe 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -572,7 +572,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) blk_queue_max_zone_append_sectors(q, 0); flags = memalloc_noio_save(); - ret = blk_revalidate_disk_zones(disk, NULL); + ret = blk_revalidate_disk_zones(disk); memalloc_noio_restore(flags); if (ret) { sdkp->zone_info = (struct zoned_disk_info){ }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 46613bf6a402..fbc6860b3622 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -336,8 +336,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sectors, sector_t nr_sectors); -int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)); +int blk_revalidate_disk_zones(struct gendisk *disk); /* * Independent access ranges: struct blk_independent_access_range describes -- cgit v1.2.3 From cb9e5273f6d97830c44aeecd28cf5f5723ef2ba3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Apr 2024 17:55:00 +0900 Subject: null_blk: Have all null_handle_xxx() return a blk_status_t Modify the null_handle_flush() and null_handle_rq() functions to return a blk_status_t instead of an errno to simplify the call sites of these functions and to be consistant with other null_handle_xxx() functions. Signed-off-by: Damien Le Moal Reviewed-by: Nitesh Shetty Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240411085502.728558-2-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 578660b1ad01..bccee438b4b7 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1167,7 +1167,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev, return BLK_STS_OK; } -static int null_handle_flush(struct nullb *nullb) +static blk_status_t null_handle_flush(struct nullb *nullb) { int err; @@ -1184,7 +1184,7 @@ static int null_handle_flush(struct nullb *nullb) WARN_ON(!radix_tree_empty(&nullb->dev->cache)); spin_unlock_irq(&nullb->lock); - return err; + return errno_to_blk_status(err); } static int null_transfer(struct nullb *nullb, struct page *page, @@ -1222,7 +1222,7 @@ static int null_handle_rq(struct nullb_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); struct nullb *nullb = cmd->nq->dev->nullb; - int err; + int err = 0; unsigned int len; sector_t sector = blk_rq_pos(rq); struct req_iterator iter; @@ -1234,15 +1234,13 @@ static int null_handle_rq(struct nullb_cmd *cmd) err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset, op_is_write(req_op(rq)), sector, rq->cmd_flags & REQ_FUA); - if (err) { - spin_unlock_irq(&nullb->lock); - return err; - } + if (err) + break; sector += len >> SECTOR_SHIFT; } spin_unlock_irq(&nullb->lock); - return 0; + return errno_to_blk_status(err); } static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd) @@ -1289,8 +1287,8 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, if (op == REQ_OP_DISCARD) return null_handle_discard(dev, sector, nr_sectors); - return errno_to_blk_status(null_handle_rq(cmd)); + return null_handle_rq(cmd); } static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd) @@ -1359,7 +1357,7 @@ static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, blk_status_t sts; if (op == REQ_OP_FLUSH) { - cmd->error = errno_to_blk_status(null_handle_flush(nullb)); + cmd->error = null_handle_flush(nullb); goto out; } -- cgit v1.2.3 From 3bdde0701e5f819df0fa0e32fa8d5df7dc184cb5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Apr 2024 17:55:01 +0900 Subject: null_blk: Do zone resource management only if necessary For zoned null_blk devices setup without any limit on the maximum number of open and active zones, there is no need to count the number of zones that are implicitly open, explicitly open and closed. This is indicated by the boolean field need_zone_res_mgmt of sturct nullb_device. Modify the zone management functions null_reset_zone(), null_finish_zone(), null_open_zone() and null_close_zone() to manage the zone condition counters only if the device need_zone_res_mgmt field is true. With this change, the function __null_close_zone() is removed and integrated into the 2 caller sites directly, with the null_close_imp_open_zone() call site greatly simplified as this function closes zones that are known to be in the implicit open condition. null_zone_write() is modified in a similar manner to do zone condition accouting only when the device need_zone_res_mgmt field is true. With these changes, the inline helpers null_lock_zone_res() and null_unlock_zone_res() are removed and replaced with direct calls to spin_lock()/spin_unlock(). Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240411085502.728558-3-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 311 ++++++++++++++++++++++------------------- 1 file changed, 165 insertions(+), 146 deletions(-) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 4ddd84752557..c31ad2620eb0 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -19,18 +19,6 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) return sect >> ilog2(dev->zone_size_sects); } -static inline void null_lock_zone_res(struct nullb_device *dev) -{ - if (dev->need_zone_res_mgmt) - spin_lock_irq(&dev->zone_res_lock); -} - -static inline void null_unlock_zone_res(struct nullb_device *dev) -{ - if (dev->need_zone_res_mgmt) - spin_unlock_irq(&dev->zone_res_lock); -} - static inline void null_init_zone_lock(struct nullb_device *dev, struct nullb_zone *zone) { @@ -251,35 +239,6 @@ size_t null_zone_valid_read_len(struct nullb *nullb, return (zone->wp - sector) << SECTOR_SHIFT; } -static blk_status_t __null_close_zone(struct nullb_device *dev, - struct nullb_zone *zone) -{ - switch (zone->cond) { - case BLK_ZONE_COND_CLOSED: - /* close operation on closed is not an error */ - return BLK_STS_OK; - case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; - case BLK_ZONE_COND_EXP_OPEN: - dev->nr_zones_exp_open--; - break; - case BLK_ZONE_COND_EMPTY: - case BLK_ZONE_COND_FULL: - default: - return BLK_STS_IOERR; - } - - if (zone->wp == zone->start) { - zone->cond = BLK_ZONE_COND_EMPTY; - } else { - zone->cond = BLK_ZONE_COND_CLOSED; - dev->nr_zones_closed++; - } - - return BLK_STS_OK; -} - static void null_close_imp_open_zone(struct nullb_device *dev) { struct nullb_zone *zone; @@ -296,7 +255,13 @@ static void null_close_imp_open_zone(struct nullb_device *dev) zno = dev->zone_nr_conv; if (zone->cond == BLK_ZONE_COND_IMP_OPEN) { - __null_close_zone(dev, zone); + dev->nr_zones_imp_open--; + if (zone->wp == zone->start) { + zone->cond = BLK_ZONE_COND_EMPTY; + } else { + zone->cond = BLK_ZONE_COND_CLOSED; + dev->nr_zones_closed++; + } dev->imp_close_zone_no = zno; return; } @@ -392,7 +357,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, zone->cond == BLK_ZONE_COND_OFFLINE) { /* Cannot write to the zone */ ret = BLK_STS_IOERR; - goto unlock; + goto unlock_zone; } /* @@ -406,54 +371,57 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, blk_mq_rq_from_pdu(cmd)->__sector = sector; } else if (sector != zone->wp) { ret = BLK_STS_IOERR; - goto unlock; + goto unlock_zone; } if (zone->wp + nr_sectors > zone->start + zone->capacity) { ret = BLK_STS_IOERR; - goto unlock; + goto unlock_zone; } if (zone->cond == BLK_ZONE_COND_CLOSED || zone->cond == BLK_ZONE_COND_EMPTY) { - null_lock_zone_res(dev); + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) { - null_unlock_zone_res(dev); - goto unlock; - } - if (zone->cond == BLK_ZONE_COND_CLOSED) { - dev->nr_zones_closed--; - dev->nr_zones_imp_open++; - } else if (zone->cond == BLK_ZONE_COND_EMPTY) { - dev->nr_zones_imp_open++; - } + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + goto unlock_zone; + } + if (zone->cond == BLK_ZONE_COND_CLOSED) { + dev->nr_zones_closed--; + dev->nr_zones_imp_open++; + } else if (zone->cond == BLK_ZONE_COND_EMPTY) { + dev->nr_zones_imp_open++; + } - if (zone->cond != BLK_ZONE_COND_EXP_OPEN) - zone->cond = BLK_ZONE_COND_IMP_OPEN; + spin_unlock(&dev->zone_res_lock); + } - null_unlock_zone_res(dev); + zone->cond = BLK_ZONE_COND_IMP_OPEN; } ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); if (ret != BLK_STS_OK) - goto unlock; + goto unlock_zone; zone->wp += nr_sectors; if (zone->wp == zone->start + zone->capacity) { - null_lock_zone_res(dev); - if (zone->cond == BLK_ZONE_COND_EXP_OPEN) - dev->nr_zones_exp_open--; - else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) - dev->nr_zones_imp_open--; + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); + if (zone->cond == BLK_ZONE_COND_EXP_OPEN) + dev->nr_zones_exp_open--; + else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) + dev->nr_zones_imp_open--; + spin_unlock(&dev->zone_res_lock); + } zone->cond = BLK_ZONE_COND_FULL; - null_unlock_zone_res(dev); } ret = BLK_STS_OK; -unlock: +unlock_zone: null_unlock_zone(dev, zone); return ret; @@ -467,54 +435,100 @@ static blk_status_t null_open_zone(struct nullb_device *dev, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); - switch (zone->cond) { case BLK_ZONE_COND_EXP_OPEN: - /* open operation on exp open is not an error */ - goto unlock; + /* Open operation on exp open is not an error */ + return BLK_STS_OK; case BLK_ZONE_COND_EMPTY: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - break; case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; case BLK_ZONE_COND_CLOSED: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - dev->nr_zones_closed--; break; case BLK_ZONE_COND_FULL: default: - ret = BLK_STS_IOERR; - goto unlock; + return BLK_STS_IOERR; } - zone->cond = BLK_ZONE_COND_EXP_OPEN; - dev->nr_zones_exp_open++; + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); -unlock: - null_unlock_zone_res(dev); + switch (zone->cond) { + case BLK_ZONE_COND_EMPTY: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + break; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_CLOSED: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + dev->nr_zones_closed--; + break; + default: + break; + } - return ret; + dev->nr_zones_exp_open++; + + spin_unlock(&dev->zone_res_lock); + } + + zone->cond = BLK_ZONE_COND_EXP_OPEN; + + return BLK_STS_OK; } static blk_status_t null_close_zone(struct nullb_device *dev, struct nullb_zone *zone) { - blk_status_t ret; - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); - ret = __null_close_zone(dev, zone); - null_unlock_zone_res(dev); + switch (zone->cond) { + case BLK_ZONE_COND_CLOSED: + /* close operation on closed is not an error */ + return BLK_STS_OK; + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_FULL: + default: + return BLK_STS_IOERR; + } + + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - return ret; + switch (zone->cond) { + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + default: + break; + } + + if (zone->wp > zone->start) + dev->nr_zones_closed++; + + spin_unlock(&dev->zone_res_lock); + } + + if (zone->wp == zone->start) + zone->cond = BLK_ZONE_COND_EMPTY; + else + zone->cond = BLK_ZONE_COND_CLOSED; + + return BLK_STS_OK; } static blk_status_t null_finish_zone(struct nullb_device *dev, @@ -525,41 +539,47 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - switch (zone->cond) { - case BLK_ZONE_COND_FULL: - /* finish operation on full is not an error */ - goto unlock; - case BLK_ZONE_COND_EMPTY: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - break; - case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; - case BLK_ZONE_COND_EXP_OPEN: - dev->nr_zones_exp_open--; - break; - case BLK_ZONE_COND_CLOSED: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - dev->nr_zones_closed--; - break; - default: - ret = BLK_STS_IOERR; - goto unlock; + switch (zone->cond) { + case BLK_ZONE_COND_FULL: + /* Finish operation on full is not an error */ + spin_unlock(&dev->zone_res_lock); + return BLK_STS_OK; + case BLK_ZONE_COND_EMPTY: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + break; + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_CLOSED: + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + spin_unlock(&dev->zone_res_lock); + return ret; + } + dev->nr_zones_closed--; + break; + default: + spin_unlock(&dev->zone_res_lock); + return BLK_STS_IOERR; + } + + spin_unlock(&dev->zone_res_lock); } zone->cond = BLK_ZONE_COND_FULL; zone->wp = zone->start + zone->len; -unlock: - null_unlock_zone_res(dev); - - return ret; + return BLK_STS_OK; } static blk_status_t null_reset_zone(struct nullb_device *dev, @@ -568,34 +588,33 @@ static blk_status_t null_reset_zone(struct nullb_device *dev, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; - null_lock_zone_res(dev); + if (dev->need_zone_res_mgmt) { + spin_lock(&dev->zone_res_lock); - switch (zone->cond) { - case BLK_ZONE_COND_EMPTY: - /* reset operation on empty is not an error */ - null_unlock_zone_res(dev); - return BLK_STS_OK; - case BLK_ZONE_COND_IMP_OPEN: - dev->nr_zones_imp_open--; - break; - case BLK_ZONE_COND_EXP_OPEN: - dev->nr_zones_exp_open--; - break; - case BLK_ZONE_COND_CLOSED: - dev->nr_zones_closed--; - break; - case BLK_ZONE_COND_FULL: - break; - default: - null_unlock_zone_res(dev); - return BLK_STS_IOERR; + switch (zone->cond) { + case BLK_ZONE_COND_IMP_OPEN: + dev->nr_zones_imp_open--; + break; + case BLK_ZONE_COND_EXP_OPEN: + dev->nr_zones_exp_open--; + break; + case BLK_ZONE_COND_CLOSED: + dev->nr_zones_closed--; + break; + case BLK_ZONE_COND_EMPTY: + case BLK_ZONE_COND_FULL: + break; + default: + spin_unlock(&dev->zone_res_lock); + return BLK_STS_IOERR; + } + + spin_unlock(&dev->zone_res_lock); } zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; - null_unlock_zone_res(dev); - if (dev->memory_backed) return null_handle_discard(dev, zone->start, zone->len); -- cgit v1.2.3 From e994ff5b55e3bf30ecb6ed354eaec77c989aa4ae Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Apr 2024 17:55:02 +0900 Subject: null_blk: Simplify null_zone_write() In null_zone_write, we do not need to first check if the target zone condition is FULL, READONLY or OFFLINE: for theses conditions, the check of the command sector against the zone write pointer will always result in the command failing. Remove these checks. We still however need to check that the target zone write pointer is not invalid for zone append operations. To do so, add the macro NULL_ZONE_INVALID_WP and use it in null_set_zone_cond() when changing a zone to READONLY or OFFLINE condition. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240411085502.728558-4-dlemoal@kernel.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index c31ad2620eb0..5b5a63adacc1 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -9,6 +9,8 @@ #undef pr_fmt #define pr_fmt(fmt) "null_blk: " fmt +#define NULL_ZONE_INVALID_WP ((sector_t)-1) + static inline sector_t mb_to_sects(unsigned long mb) { return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT; @@ -341,9 +343,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, trace_nullb_zone_op(cmd, zno, zone->cond); - if (WARN_ON_ONCE(append && !dev->zone_append_max_sectors)) - return BLK_STS_IOERR; - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { if (append) return BLK_STS_IOERR; @@ -352,29 +351,26 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, null_lock_zone(dev, zone); - if (zone->cond == BLK_ZONE_COND_FULL || - zone->cond == BLK_ZONE_COND_READONLY || - zone->cond == BLK_ZONE_COND_OFFLINE) { - /* Cannot write to the zone */ - ret = BLK_STS_IOERR; - goto unlock_zone; - } - /* - * Regular writes must be at the write pointer position. - * Zone append writes are automatically issued at the write - * pointer and the position returned using the request or BIO - * sector. + * Regular writes must be at the write pointer position. Zone append + * writes are automatically issued at the write pointer and the position + * returned using the request sector. Note that we do not check the zone + * condition because for FULL, READONLY and OFFLINE zones, the sector + * check against the zone write pointer will always result in failing + * the command. */ if (append) { + if (WARN_ON_ONCE(!dev->zone_append_max_sectors) || + zone->wp == NULL_ZONE_INVALID_WP) { + ret = BLK_STS_IOERR; + goto unlock_zone; + } sector = zone->wp; blk_mq_rq_from_pdu(cmd)->__sector = sector; - } else if (sector != zone->wp) { - ret = BLK_STS_IOERR; - goto unlock_zone; } - if (zone->wp + nr_sectors > zone->start + zone->capacity) { + if (sector != zone->wp || + zone->wp + nr_sectors > zone->start + zone->capacity) { ret = BLK_STS_IOERR; goto unlock_zone; } @@ -743,7 +739,7 @@ static void null_set_zone_cond(struct nullb_device *dev, zone->cond != BLK_ZONE_COND_OFFLINE) null_finish_zone(dev, zone); zone->cond = cond; - zone->wp = (sector_t)-1; + zone->wp = NULL_ZONE_INVALID_WP; } null_unlock_zone(dev, zone); -- cgit v1.2.3 From 07d1b99825f40f9c0d93e6b99d79a08d0717bac1 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 25 Apr 2024 19:16:35 +0200 Subject: null_blk: Fix missing mutex_destroy() at module removal When a mutex lock is not used any more, the function mutex_destroy should be called to mark the mutex lock uninitialized. Fixes: f2298c0403b0 ("null_blk: multi queue aware block test driver") Signed-off-by: Zhu Yanjun Link: https://lore.kernel.org/r/20240425171635.4227-1-yanjun.zhu@linux.dev Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index bccee438b4b7..8c0e5e3fd1ed 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2127,6 +2127,8 @@ static void __exit null_exit(void) if (tag_set.ops) blk_mq_free_tag_set(&tag_set); + + mutex_destroy(&lock); } module_init(null_init); -- cgit v1.2.3 From 9e6727f824edcdb8fdd3e6e8a0862eb49546e1cd Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 6 May 2024 09:55:38 +0200 Subject: null_blk: Fix the WARNING: modpost: missing MODULE_DESCRIPTION() No functional changes intended. Fixes: f2298c0403b0 ("null_blk: multi queue aware block test driver") Signed-off-by: Zhu Yanjun Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240506075538.6064-1-yanjun.zhu@linux.dev Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block/null_blk') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 8c0e5e3fd1ed..f7b9078f6913 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2135,4 +2135,5 @@ module_init(null_init); module_exit(null_exit); MODULE_AUTHOR("Jens Axboe "); +MODULE_DESCRIPTION("multi queue aware block test driver"); MODULE_LICENSE("GPL"); -- cgit v1.2.3