summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 22:22:37 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 22:22:37 +0300
commit464a47f45d2ae2db859f0e7c128b5f01aff19a53 (patch)
tree81862e9d67614d5283dfb3d45d86b74c548f0386 /block
parent323264eefba1ea288d5962c0a9e23ebd62107ca8 (diff)
parenta468168130ec1a3245812f2c713be97081149ca2 (diff)
downloadlinux-464a47f45d2ae2db859f0e7c128b5f01aff19a53.tar.xz
Merge tag 'for-5.5/zoned-20191122' of git://git.kernel.dk/linux-block
Pull zoned block device update from Jens Axboe: "Enhancements and improvements to the zoned device support" * tag 'for-5.5/zoned-20191122' of git://git.kernel.dk/linux-block: scsi: sd_zbc: Remove set but not used variable 'buflen' block: rework zone reporting scsi: sd_zbc: Cleanup sd_zbc_alloc_report_buffer() null_blk: Add zone_nr_conv to features null_blk: clean up report zones null_blk: clean up the block device operations block: Remove partition support for zoned block devices block: Simplify report zones execution block: cleanup the !zoned case in blk_revalidate_disk_zones block: Enhance blk_revalidate_disk_zones()
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-zoned.c356
-rw-r--r--block/partition-generic.c74
3 files changed, 156 insertions, 280 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index f0d82227a2fc..a1e228752083 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -851,11 +851,7 @@ static inline int blk_partition_remap(struct bio *bio)
if (unlikely(bio_check_ro(bio, p)))
goto out;
- /*
- * Zone management bios do not have a sector count but they do have
- * a start sector filled out and need to be remapped.
- */
- if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
+ if (bio_sectors(bio)) {
if (bio_check_eod(bio, part_nr_sects_read(p)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 481eaf7d04d4..6fad6f3f6980 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -93,112 +93,43 @@ unsigned int blkdev_nr_zones(struct block_device *bdev)
if (!blk_queue_is_zoned(q))
return 0;
- return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
+ return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk));
}
EXPORT_SYMBOL_GPL(blkdev_nr_zones);
-/*
- * Check that a zone report belongs to this partition, and if yes, fix its start
- * sector and write pointer and return true. Return false otherwise.
- */
-static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
-{
- sector_t offset = get_start_sect(bdev);
-
- if (rep->start < offset)
- return false;
-
- rep->start -= offset;
- if (rep->start + rep->len > bdev->bd_part->nr_sects)
- return false;
-
- if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
- rep->wp = rep->start + rep->len;
- else
- rep->wp -= offset;
- return true;
-}
-
-static int blk_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
-{
- struct request_queue *q = disk->queue;
- unsigned int z = 0, n, nrz = *nr_zones;
- sector_t capacity = get_capacity(disk);
- int ret;
-
- while (z < nrz && sector < capacity) {
- n = nrz - z;
- ret = disk->fops->report_zones(disk, sector, &zones[z], &n);
- if (ret)
- return ret;
- if (!n)
- break;
- sector += blk_queue_zone_sectors(q) * n;
- z += n;
- }
-
- WARN_ON(z > *nr_zones);
- *nr_zones = z;
-
- return 0;
-}
-
/**
* blkdev_report_zones - Get zones information
* @bdev: Target block device
* @sector: Sector from which to report zones
- * @zones: Array of zone structures where to return the zones information
- * @nr_zones: Number of zone structures in the zone array
+ * @nr_zones: Maximum number of zones to report
+ * @cb: Callback function called for each reported zone
+ * @data: Private data for the callback
*
* Description:
- * Get zone information starting from the zone containing @sector.
- * The number of zone information reported may be less than the number
- * requested by @nr_zones. The number of zones actually reported is
- * returned in @nr_zones.
- * The caller must use memalloc_noXX_save/restore() calls to control
- * memory allocations done within this function (zone array and command
- * buffer allocation by the device driver).
+ * Get zone information starting from the zone containing @sector for at most
+ * @nr_zones, and call @cb for each zone reported by the device.
+ * To report all zones in a device starting from @sector, the BLK_ALL_ZONES
+ * constant can be passed to @nr_zones.
+ * Returns the number of zones reported by the device, or a negative errno
+ * value in case of failure.
+ *
+ * Note: The caller must use memalloc_noXX_save/restore() calls to control
+ * memory allocations done within this function.
*/
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
- struct request_queue *q = bdev_get_queue(bdev);
- unsigned int i, nrz;
- int ret;
+ struct gendisk *disk = bdev->bd_disk;
+ sector_t capacity = get_capacity(disk);
- if (!blk_queue_is_zoned(q))
+ if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
+ WARN_ON_ONCE(!disk->fops->report_zones))
return -EOPNOTSUPP;
- /*
- * A block device that advertized itself as zoned must have a
- * report_zones method. If it does not have one defined, the device
- * driver has a bug. So warn about that.
- */
- if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
- return -EOPNOTSUPP;
-
- if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
- *nr_zones = 0;
+ if (!nr_zones || sector >= capacity)
return 0;
- }
-
- nrz = min(*nr_zones,
- __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
- ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
- zones, &nrz);
- if (ret)
- return ret;
-
- for (i = 0; i < nrz; i++) {
- if (!blkdev_report_zone(bdev, zones))
- break;
- zones++;
- }
-
- *nr_zones = i;
- return 0;
+ return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
@@ -209,15 +140,11 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
return false;
- if (sector || nr_sectors != part_nr_sects_read(bdev->bd_part))
- return false;
/*
- * REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
- * the entire disk, that is, if the blocks device start offset is 0 and
- * its capacity is the same as the entire disk.
+ * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
+ * of the applicable zone range is the entire disk.
*/
- return get_start_sect(bdev) == 0 &&
- part_nr_sects_read(bdev->bd_part) == get_capacity(bdev->bd_disk);
+ return !sector && nr_sectors == get_capacity(bdev->bd_disk);
}
/**
@@ -242,6 +169,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
{
struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors = blk_queue_zone_sectors(q);
+ sector_t capacity = get_capacity(bdev->bd_disk);
sector_t end_sector = sector + nr_sectors;
struct bio *bio = NULL;
int ret;
@@ -255,7 +183,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
if (!op_is_zone_mgmt(op))
return -EOPNOTSUPP;
- if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
+ if (!nr_sectors || end_sector > capacity)
/* Out of range */
return -EINVAL;
@@ -263,8 +191,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
if (sector & (zone_sectors - 1))
return -EINVAL;
- if ((nr_sectors & (zone_sectors - 1)) &&
- end_sector != bdev->bd_part->nr_sects)
+ if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
return -EINVAL;
while (sector < end_sector) {
@@ -296,6 +223,20 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
}
EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
+struct zone_report_args {
+ struct blk_zone __user *zones;
+};
+
+static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ struct zone_report_args *args = data;
+
+ if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
+ return -EFAULT;
+ return 0;
+}
+
/*
* BLKREPORTZONE ioctl processing.
* Called from blkdev_ioctl.
@@ -304,9 +245,9 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
+ struct zone_report_args args;
struct request_queue *q;
struct blk_zone_report rep;
- struct blk_zone *zones;
int ret;
if (!argp)
@@ -328,32 +269,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (!rep.nr_zones)
return -EINVAL;
- rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
-
- zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
- GFP_KERNEL | __GFP_ZERO);
- if (!zones)
- return -ENOMEM;
-
- ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones);
- if (ret)
- goto out;
-
- if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
- ret = -EFAULT;
- goto out;
- }
-
- if (rep.nr_zones) {
- if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
- sizeof(struct blk_zone) * rep.nr_zones))
- ret = -EFAULT;
- }
-
- out:
- kvfree(zones);
+ args.zones = argp + sizeof(struct blk_zone_report);
+ ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
+ blkdev_copy_zone_to_user, &args);
+ if (ret < 0)
+ return ret;
- return ret;
+ rep.nr_zones = ret;
+ if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
+ return -EFAULT;
+ return 0;
}
/*
@@ -415,37 +340,99 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
GFP_NOIO, node);
}
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+ kfree(q->seq_zones_bitmap);
+ q->seq_zones_bitmap = NULL;
+ kfree(q->seq_zones_wlock);
+ q->seq_zones_wlock = NULL;
+}
+
+struct blk_revalidate_zone_args {
+ struct gendisk *disk;
+ unsigned long *seq_zones_bitmap;
+ unsigned long *seq_zones_wlock;
+ sector_t sector;
+};
+
/*
- * Allocate an array of struct blk_zone to get nr_zones zone information.
- * The allocated array may be smaller than nr_zones.
+ * Helper function to check the validity of zones of a zoned block device.
*/
-static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
+static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
{
- struct blk_zone *zones;
- size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
+ struct blk_revalidate_zone_args *args = data;
+ struct gendisk *disk = args->disk;
+ struct request_queue *q = disk->queue;
+ sector_t zone_sectors = blk_queue_zone_sectors(q);
+ sector_t capacity = get_capacity(disk);
/*
- * GFP_KERNEL here is meaningless as the caller task context has
- * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
- * with memalloc_noio_save().
+ * All zones must have the same size, with the exception on an eventual
+ * smaller last zone.
*/
- zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
- if (!zones) {
- *nr_zones = 0;
- return NULL;
+ if (zone->start + zone_sectors < capacity &&
+ zone->len != zone_sectors) {
+ pr_warn("%s: Invalid zoned device with non constant zone size\n",
+ disk->disk_name);
+ return false;
+ }
+
+ if (zone->start + zone->len >= capacity &&
+ zone->len > zone_sectors) {
+ pr_warn("%s: Invalid zoned device with larger last zone size\n",
+ disk->disk_name);
+ return -ENODEV;
}
- *nr_zones = nrz;
+ /* Check for holes in the zone report */
+ if (zone->start != args->sector) {
+ pr_warn("%s: Zone gap at sectors %llu..%llu\n",
+ disk->disk_name, args->sector, zone->start);
+ return -ENODEV;
+ }
+
+ /* Check zone type */
+ switch (zone->type) {
+ case BLK_ZONE_TYPE_CONVENTIONAL:
+ case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ break;
+ default:
+ pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
+ disk->disk_name, (int)zone->type, zone->start);
+ return -ENODEV;
+ }
+
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(idx, args->seq_zones_bitmap);
- return zones;
+ args->sector += zone->len;
+ return 0;
}
-void blk_queue_free_zone_bitmaps(struct request_queue *q)
+static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones,
+ struct blk_revalidate_zone_args *args)
{
- kfree(q->seq_zones_bitmap);
- q->seq_zones_bitmap = NULL;
- kfree(q->seq_zones_wlock);
- q->seq_zones_wlock = NULL;
+ /*
+ * Ensure that all memory allocations in this context are done as
+ * if GFP_NOIO was specified.
+ */
+ unsigned int noio_flag = memalloc_noio_save();
+ struct request_queue *q = disk->queue;
+ int ret;
+
+ args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!args->seq_zones_wlock)
+ return -ENOMEM;
+ args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!args->seq_zones_bitmap)
+ return -ENOMEM;
+
+ ret = disk->fops->report_zones(disk, 0, nr_zones,
+ blk_revalidate_zone_cb, args);
+ memalloc_noio_restore(noio_flag);
+ return ret;
}
/**
@@ -461,13 +448,12 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
- unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
- unsigned int i, rep_nr_zones = 0, z = 0, nrz;
- struct blk_zone *zones = NULL;
- unsigned int noio_flag;
- sector_t sector = 0;
+ struct blk_revalidate_zone_args args = { .disk = disk };
int ret = 0;
+ if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
+ return -EIO;
+
/*
* BIO based queues do not use a scheduler so only q->nr_zones
* needs to be updated so that the sysfs exposed value is correct.
@@ -477,78 +463,28 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
return 0;
}
- /*
- * Ensure that all memory allocations in this context are done as
- * if GFP_NOIO was specified.
- */
- noio_flag = memalloc_noio_save();
+ if (nr_zones)
+ ret = blk_update_zone_info(disk, nr_zones, &args);
- if (!blk_queue_is_zoned(q) || !nr_zones) {
- nr_zones = 0;
- goto update;
- }
-
- /* Allocate bitmaps */
- ret = -ENOMEM;
- seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
- if (!seq_zones_wlock)
- goto out;
- seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
- if (!seq_zones_bitmap)
- goto out;
-
- /* Get zone information and initialize seq_zones_bitmap */
- rep_nr_zones = nr_zones;
- zones = blk_alloc_zones(&rep_nr_zones);
- if (!zones)
- goto out;
-
- while (z < nr_zones) {
- nrz = min(nr_zones - z, rep_nr_zones);
- ret = blk_report_zones(disk, sector, zones, &nrz);
- if (ret)
- goto out;
- if (!nrz)
- break;
- for (i = 0; i < nrz; i++) {
- if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(z, seq_zones_bitmap);
- z++;
- }
- sector += nrz * blk_queue_zone_sectors(q);
- }
-
- if (WARN_ON(z != nr_zones)) {
- ret = -EIO;
- goto out;
- }
-
-update:
/*
* Install the new bitmaps, making sure the queue is stopped and
* all I/Os are completed (i.e. a scheduler is not referencing the
* bitmaps).
*/
blk_mq_freeze_queue(q);
- q->nr_zones = nr_zones;
- swap(q->seq_zones_wlock, seq_zones_wlock);
- swap(q->seq_zones_bitmap, seq_zones_bitmap);
- blk_mq_unfreeze_queue(q);
-
-out:
- memalloc_noio_restore(noio_flag);
-
- kvfree(zones);
- kfree(seq_zones_wlock);
- kfree(seq_zones_bitmap);
-
- if (ret) {
+ if (ret >= 0) {
+ q->nr_zones = nr_zones;
+ swap(q->seq_zones_wlock, args.seq_zones_wlock);
+ swap(q->seq_zones_bitmap, args.seq_zones_bitmap);
+ ret = 0;
+ } else {
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
- blk_mq_freeze_queue(q);
blk_queue_free_zone_bitmaps(q);
- blk_mq_unfreeze_queue(q);
}
+ blk_mq_unfreeze_queue(q);
+ kfree(args.seq_zones_wlock);
+ kfree(args.seq_zones_bitmap);
return ret;
}
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3db8b73a96b1..3084d4cba5c7 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -462,56 +462,6 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}
-static bool part_zone_aligned(struct gendisk *disk,
- struct block_device *bdev,
- sector_t from, sector_t size)
-{
- unsigned int zone_sectors = bdev_zone_sectors(bdev);
-
- /*
- * If this function is called, then the disk is a zoned block device
- * (host-aware or host-managed). This can be detected even if the
- * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not
- * set). In this case, however, only host-aware devices will be seen
- * as a block device is not created for host-managed devices. Without
- * zoned block device support, host-aware drives can still be used as
- * regular block devices (no zone operation) and their zone size will
- * be reported as 0. Allow this case.
- */
- if (!zone_sectors)
- return true;
-
- /*
- * Check partition start and size alignement. If the drive has a
- * smaller last runt zone, ignore it and allow the partition to
- * use it. Check the zone size too: it should be a power of 2 number
- * of sectors.
- */
- if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
- u32 rem;
-
- div_u64_rem(from, zone_sectors, &rem);
- if (rem)
- return false;
- if ((from + size) < get_capacity(disk)) {
- div_u64_rem(size, zone_sectors, &rem);
- if (rem)
- return false;
- }
-
- } else {
-
- if (from & (zone_sectors - 1))
- return false;
- if ((from + size) < get_capacity(disk) &&
- (size & (zone_sectors - 1)))
- return false;
-
- }
-
- return true;
-}
-
int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
{
struct parsed_partitions *state = NULL;
@@ -547,6 +497,14 @@ rescan:
}
return -EIO;
}
+
+ /* Partitions are not supported on zoned block devices */
+ if (bdev_is_zoned(bdev)) {
+ pr_warn("%s: ignoring partition table on zoned block device\n",
+ disk->disk_name);
+ goto out;
+ }
+
/*
* If any partition code tried to read beyond EOD, try
* unlocking native capacity even if partition table is
@@ -610,21 +568,6 @@ rescan:
}
}
- /*
- * On a zoned block device, partitions should be aligned on the
- * device zone size (i.e. zone boundary crossing not allowed).
- * Otherwise, resetting the write pointer of the last zone of
- * one partition may impact the following partition.
- */
- if (bdev_is_zoned(bdev) &&
- !part_zone_aligned(disk, bdev, from, size)) {
- printk(KERN_WARNING
- "%s: p%d start %llu+%llu is not zone aligned\n",
- disk->disk_name, p, (unsigned long long) from,
- (unsigned long long) size);
- continue;
- }
-
part = add_partition(disk, p, from, size,
state->parts[p].flags,
&state->parts[p].info);
@@ -638,6 +581,7 @@ rescan:
md_autodetect_dev(part_to_dev(part)->devt);
#endif
}
+out:
free_partitions(state);
return 0;
}