From 9964e674559b02619fee2012a56839624143d02e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2022 06:52:41 +0200 Subject: mm: use bdev_is_zoned in claim_swapfile Use the bdev based helper instead of poking into the queue. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220415045258.199825-11-hch@lst.de Signed-off-by: Jens Axboe --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/swapfile.c b/mm/swapfile.c index 63c61f8b2611..4c7537162af5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2761,7 +2761,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ - if (blk_queue_is_zoned(p->bdev->bd_disk->queue)) + if (bdev_is_zoned(p->bdev)) return -EINVAL; p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { -- cgit v1.2.3 From 10f0d2a517796b8f6dc04fb0cc3e49003ae6b0bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2022 06:52:42 +0200 Subject: block: add a bdev_nonrot helper Add a helper to check the nonrot flag based on the block_device instead of having to poke into the block layer internal request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: David Sterba [btrfs] Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220415045258.199825-12-hch@lst.de Signed-off-by: Jens Axboe --- block/ioctl.c | 2 +- drivers/block/loop.c | 2 +- drivers/md/dm-table.c | 4 +--- drivers/md/md.c | 3 +-- drivers/md/raid1.c | 2 +- drivers/md/raid10.c | 2 +- drivers/md/raid5.c | 2 +- drivers/target/target_core_file.c | 3 +-- drivers/target/target_core_iblock.c | 2 +- fs/btrfs/volumes.c | 4 ++-- fs/ext4/mballoc.c | 2 +- include/linux/blkdev.h | 5 +++++ mm/swapfile.c | 4 ++-- 13 files changed, 19 insertions(+), 18 deletions(-) (limited to 'mm') diff --git a/block/ioctl.c b/block/ioctl.c index f8703db99c73..19c32b763451 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -489,7 +489,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, queue_max_sectors(bdev_get_queue(bdev))); return put_ushort(argp, max_sectors); case BLKROTATIONAL: - return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev))); + return put_ushort(argp, !bdev_nonrot(bdev)); case BLKRASET: case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a58595f5ee2c..8d800d46e498 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -903,7 +903,7 @@ static void loop_update_rotational(struct loop_device *lo) /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */ if (file_bdev) - nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev)); + nonrot = bdev_nonrot(file_bdev); if (nonrot) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 03541cfc2317..5e38d0dd009d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1820,9 +1820,7 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_nonrot(q); + return !bdev_nonrot(dev->bdev); } static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, diff --git a/drivers/md/md.c b/drivers/md/md.c index 309b3af906ad..19636c2f2cda 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5991,8 +5991,7 @@ int md_run(struct mddev *mddev) bool nonrot = true; rdev_for_each(rdev, mddev) { - if (rdev->raid_disk >= 0 && - !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { + if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) { nonrot = false; break; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d785c3169426..b7b21a4793d4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -708,7 +708,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect /* At least two disks to choose from so failfast is OK */ set_bit(R1BIO_FailFast, &r1_bio->state); - nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); + nonrot = bdev_nonrot(rdev->bdev); has_nonrot_disk |= nonrot; pending = atomic_read(&rdev->nr_pending); dist = abs(this_sector - conf->mirrors[disk].head_position); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ee2438c0e55e..0987916066f2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -803,7 +803,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, if (!do_balance) break; - nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); + nonrot = bdev_nonrot(rdev->bdev); has_nonrot_disk |= nonrot; pending = atomic_read(&rdev->nr_pending); if (min_pending > pending && nonrot) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 351d341a1ffa..0bbae0e63866 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7242,7 +7242,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) rdev_for_each(rdev, mddev) { if (test_bit(Journal, &rdev->flags)) continue; - if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { + if (bdev_nonrot(rdev->bdev)) { conf->batch_bio_dispatch = false; break; } diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 8d191fdc3321..b6ba582b0677 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -135,7 +135,6 @@ static int fd_configure_device(struct se_device *dev) inode = file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); - struct request_queue *q = bdev_get_queue(bdev); unsigned long long dev_size; fd_dev->fd_block_size = bdev_logical_block_size(bdev); @@ -160,7 +159,7 @@ static int fd_configure_device(struct se_device *dev) */ dev->dev_attrib.max_write_same_len = 0xFFFF; - if (blk_queue_nonrot(q)) + if (bdev_nonrot(bdev)) dev->dev_attrib.is_nonrot = 1; } else { if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) { diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index b886ce1770bf..b41ee5c3b5b8 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -133,7 +133,7 @@ static int iblock_configure_device(struct se_device *dev) else dev->dev_attrib.max_write_same_len = 0xFFFF; - if (blk_queue_nonrot(q)) + if (bdev_nonrot(bd)) dev->dev_attrib.is_nonrot = 1; bi = bdev_get_integrity(bd); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 678504c9055a..b6b00338037c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -642,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); } - if (!blk_queue_nonrot(bdev_get_queue(bdev))) + if (!bdev_nonrot(bdev)) fs_devices->rotating = true; device->bdev = bdev; @@ -2705,7 +2705,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path atomic64_add(device->total_bytes, &fs_info->free_chunk_space); - if (!blk_queue_nonrot(bdev_get_queue(bdev))) + if (!bdev_nonrot(bdev)) fs_devices->rotating = true; orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 252c168454c7..c3668c977cd9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3498,7 +3498,7 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&lg->lg_prealloc_lock); } - if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev))) + if (bdev_nonrot(sb->s_bdev)) sbi->s_mb_max_linear_groups = 0; else sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 60d016138997..3a9578e14a6b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1326,6 +1326,11 @@ static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) return 0; } +static inline bool bdev_nonrot(struct block_device *bdev) +{ + return blk_queue_nonrot(bdev_get_queue(bdev)); +} + static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); diff --git a/mm/swapfile.c b/mm/swapfile.c index 4c7537162af5..d5ab7ec4d92c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2466,7 +2466,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); - if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev))) + if (!p->bdev || !bdev_nonrot(p->bdev)) atomic_dec(&nr_rotate_swap); mutex_lock(&swapon_mutex); @@ -3071,7 +3071,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (p->bdev && p->bdev->bd_disk->fops->rw_page) p->flags |= SWP_SYNCHRONOUS_IO; - if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { + if (p->bdev && bdev_nonrot(p->bdev)) { int cpu; unsigned long ci, nr_cluster; -- cgit v1.2.3 From 36d254893aa6a6e204075c3cce94bb572ac32c04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2022 06:52:45 +0200 Subject: block: add a bdev_stable_writes helper Add a helper to check the stable writes flag based on the block_device instead of having to poke into the block layer internal request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220415045258.199825-15-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 4 +--- fs/super.c | 2 +- include/linux/blkdev.h | 6 ++++++ mm/swapfile.c | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5e38d0dd009d..d46839faa0ca 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1950,9 +1950,7 @@ static int device_requires_stable_pages(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return blk_queue_stable_writes(q); + return bdev_stable_writes(dev->bdev); } int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, diff --git a/fs/super.c b/fs/super.c index f1d4a193602d..60f57c7bc0a6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1204,7 +1204,7 @@ static int set_bdev_super(struct super_block *s, void *data) s->s_dev = s->s_bdev->bd_dev; s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi); - if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue)) + if (bdev_stable_writes(s->s_bdev)) s->s_iflags |= SB_I_STABLE_WRITES; return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 075b16d4560e..a433798c3343 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1330,6 +1330,12 @@ static inline bool bdev_nonrot(struct block_device *bdev) return blk_queue_nonrot(bdev_get_queue(bdev)); } +static inline bool bdev_stable_writes(struct block_device *bdev) +{ + return test_bit(QUEUE_FLAG_STABLE_WRITES, + &bdev_get_queue(bdev)->queue_flags); +} + static inline bool bdev_write_cache(struct block_device *bdev) { return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); diff --git a/mm/swapfile.c b/mm/swapfile.c index d5ab7ec4d92c..4069f17a82c8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3065,7 +3065,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } - if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) + if (p->bdev && bdev_stable_writes(p->bdev)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && p->bdev->bd_disk->fops->rw_page) -- cgit v1.2.3 From 70200574cc229f6ba038259e8142af2aa09e6976 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2022 06:52:55 +0200 Subject: block: remove QUEUE_FLAG_DISCARD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just use a non-zero max_discard_sectors as an indicator for discard support, similar to what is done for write zeroes. The only places where needs special attention is the RAID5 driver, which must clear discard support for security reasons by default, even if the default stacking rules would allow for it. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: Christoph Böhmwalder [drbd] Acked-by: Jan Höppner [s390] Acked-by: Coly Li [bcache] Acked-by: David Sterba [btrfs] Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220415045258.199825-25-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 2 -- block/blk-core.c | 2 +- block/blk-lib.c | 2 +- block/blk-mq-debugfs.c | 1 - block/ioctl.c | 3 +-- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 19 ++----------------- drivers/block/drbd/drbd_receiver.c | 3 +-- drivers/block/loop.c | 11 +++-------- drivers/block/nbd.c | 5 +---- drivers/block/null_blk/main.c | 1 - drivers/block/rbd.c | 1 - drivers/block/rnbd/rnbd-clt.c | 2 -- drivers/block/rnbd/rnbd-srv-dev.h | 3 --- drivers/block/virtio_blk.c | 2 -- drivers/block/xen-blkback/xenbus.c | 2 +- drivers/block/xen-blkfront.c | 3 +-- drivers/block/zram/zram_drv.c | 1 - drivers/md/bcache/request.c | 4 ++-- drivers/md/bcache/super.c | 3 +-- drivers/md/bcache/sysfs.c | 2 +- drivers/md/dm-cache-target.c | 9 +-------- drivers/md/dm-clone-target.c | 9 +-------- drivers/md/dm-log-writes.c | 3 +-- drivers/md/dm-raid.c | 9 ++------- drivers/md/dm-table.c | 9 ++------- drivers/md/dm-thin.c | 11 +---------- drivers/md/dm.c | 3 +-- drivers/md/md-linear.c | 11 +---------- drivers/md/raid0.c | 7 ------- drivers/md/raid1.c | 16 +--------------- drivers/md/raid10.c | 18 ++---------------- drivers/md/raid5-cache.c | 2 +- drivers/md/raid5.c | 12 ++++-------- drivers/mmc/core/queue.c | 1 - drivers/mtd/mtd_blkdevs.c | 1 - drivers/nvme/host/core.c | 4 ++-- drivers/s390/block/dasd_fba.c | 1 - drivers/scsi/sd.c | 2 -- drivers/target/target_core_device.c | 2 +- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/ioctl.c | 2 +- fs/exfat/file.c | 2 +- fs/exfat/super.c | 10 +++------- fs/ext4/ioctl.c | 10 +++------- fs/ext4/super.c | 10 +++------- fs/f2fs/f2fs.h | 3 +-- fs/fat/file.c | 2 +- fs/fat/inode.c | 10 +++------- fs/gfs2/rgrp.c | 2 +- fs/jbd2/journal.c | 7 ++----- fs/jfs/ioctl.c | 2 +- fs/jfs/super.c | 8 ++------ fs/nilfs2/ioctl.c | 2 +- fs/ntfs3/file.c | 2 +- fs/ntfs3/super.c | 2 +- fs/ocfs2/ioctl.c | 2 +- fs/xfs/xfs_discard.c | 2 +- fs/xfs/xfs_super.c | 12 ++++-------- include/linux/blkdev.h | 2 -- mm/swapfile.c | 17 ++--------------- 61 files changed, 73 insertions(+), 244 deletions(-) (limited to 'mm') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b03269faef71..085ffdf98e57 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -483,7 +483,6 @@ static void ubd_handler(void) if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { blk_queue_max_discard_sectors(io_req->req->q, 0); blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); } blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); @@ -803,7 +802,6 @@ static int ubd_open_dev(struct ubd *ubd_dev) ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); } blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); return 0; diff --git a/block/blk-core.c b/block/blk-core.c index 937bb6b86331..b5c3a8049134 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -820,7 +820,7 @@ void submit_bio_noacct(struct bio *bio) switch (bio_op(bio)) { case REQ_OP_DISCARD: - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) goto not_supported; break; case REQ_OP_SECURE_ERASE: diff --git a/block/blk-lib.c b/block/blk-lib.c index 2ae32a722851..8b4b66d3a9bf 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -53,7 +53,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -EOPNOTSUPP; op = REQ_OP_SECURE_ERASE; } else { - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; op = REQ_OP_DISCARD; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index aa0349e9f083..fd111c500125 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -113,7 +113,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(FAIL_IO), QUEUE_FLAG_NAME(NONROT), QUEUE_FLAG_NAME(IO_STAT), - QUEUE_FLAG_NAME(DISCARD), QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(ADD_RANDOM), QUEUE_FLAG_NAME(SECERASE), diff --git a/block/ioctl.c b/block/ioctl.c index 19c32b763451..eaee0efc0bea 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -87,14 +87,13 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, { uint64_t range[2]; uint64_t start, len; - struct request_queue *q = bdev_get_queue(bdev); struct inode *inode = bdev->bd_inode; int err; if (!(mode & FMODE_WRITE)) return -EBADF; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(range, (void __user *)arg, sizeof(range))) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7b501c8d5992..912560f611c3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -942,7 +942,7 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu cpu_to_be32(bdev_alignment_offset(bdev)); p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev)); p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev)); - p->qlim->discard_enabled = blk_queue_discard(q); + p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev); put_ldev(device); } else { struct request_queue *q = device->rq_queue; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a0a06e238e91..0678ceb50579 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1210,7 +1210,7 @@ static void decide_on_discard_support(struct drbd_device *device, first_peer_device(device)->connection; struct request_queue *q = device->rq_queue; - if (bdev && !blk_queue_discard(bdev->backing_bdev->bd_disk->queue)) + if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) goto not_supported; if (connection->cstate >= C_CONNECTED && @@ -1230,30 +1230,16 @@ static void decide_on_discard_support(struct drbd_device *device, */ blk_queue_discard_granularity(q, 512); q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); return; not_supported: - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); blk_queue_discard_granularity(q, 0); q->limits.max_discard_sectors = 0; q->limits.max_write_zeroes_sectors = 0; } -static void fixup_discard_if_not_supported(struct request_queue *q) -{ - /* To avoid confusion, if this queue does not support discard, clear - * max_discard_sectors, which is what lsblk -D reports to the user. - * Older kernels got this wrong in "stack limits". - * */ - if (!blk_queue_discard(q)) { - blk_queue_max_discard_sectors(q, 0); - blk_queue_discard_granularity(q, 0); - } -} - static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) { /* Fixup max_write_zeroes_sectors after blk_stack_limits(): @@ -1300,7 +1286,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi blk_stack_limits(&q->limits, &b->limits, 0); disk_update_readahead(device->vdisk); } - fixup_discard_if_not_supported(q); fixup_write_zeroes(device, q); } @@ -1447,7 +1432,7 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis if (disk_conf->al_extents > drbd_al_extents_max(nbc)) disk_conf->al_extents = drbd_al_extents_max(nbc); - if (!blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(bdev)) { if (disk_conf->rs_discard_granularity) { disk_conf->rs_discard_granularity = 0; /* disable feature */ drbd_info(device, "rs_discard_granularity feature disabled\n"); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0b4c7de46398..8a4a47da56fe 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1575,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u static bool can_do_reliable_discards(struct drbd_device *device) { - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); struct disk_conf *dc; bool can_do; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(device->ldev->backing_bdev)) return false; rcu_read_lock(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8d800d46e498..0e061c9896ef 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -314,15 +314,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, mode |= FALLOC_FL_KEEP_SIZE; - if (!blk_queue_discard(lo->lo_queue)) { - ret = -EOPNOTSUPP; - goto out; - } + if (!bdev_max_discard_sectors(lo->lo_device)) + return -EOPNOTSUPP; ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) - ret = -EIO; - out: + return -EIO; return ret; } @@ -787,12 +784,10 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = granularity; blk_queue_max_discard_sectors(q, max_discard_sectors); blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } else { q->limits.discard_granularity = 0; blk_queue_max_discard_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); } q->limits.discard_alignment = 0; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5a1f98494ddd..4729aef8c646 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1231,8 +1231,6 @@ static void nbd_parse_flags(struct nbd_device *nbd) set_disk_ro(nbd->disk, true); else set_disk_ro(nbd->disk, false); - if (config->flags & NBD_FLAG_SEND_TRIM) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (config->flags & NBD_FLAG_SEND_FLUSH) { if (config->flags & NBD_FLAG_SEND_FUA) blk_queue_write_cache(nbd->disk->queue, true, true); @@ -1319,8 +1317,7 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->tag_set.timeout = 0; nbd->disk->queue->limits.discard_granularity = 0; nbd->disk->queue->limits.discard_alignment = 0; - blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); + blk_queue_max_discard_sectors(nbd->disk->queue, 0); mutex_unlock(&nbd->config_lock); nbd_put(nbd); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index c441a4972064..5cb4c92cdffe 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1767,7 +1767,6 @@ static void null_config_discard(struct nullb *nullb) nullb->q->limits.discard_granularity = nullb->dev->blocksize; nullb->q->limits.discard_alignment = nullb->dev->blocksize; blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); } static const struct block_device_operations null_bio_ops = { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b844432bad20..2b21f717cce1 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4942,7 +4942,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_io_opt(q, rbd_dev->opts->alloc_size); if (rbd_dev->opts->trim) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.discard_granularity = rbd_dev->opts->alloc_size; blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index b66e8840b94b..efa99a388450 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1364,8 +1364,6 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors); dev->queue->limits.discard_granularity = dev->discard_granularity; dev->queue->limits.discard_alignment = dev->discard_alignment; - if (dev->max_discard_sectors) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue); if (dev->secure_discard) blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue); diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index f82fbb4bbda8..1f7e1c8fd4d9 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -49,9 +49,6 @@ static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev) static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev) { - if (!blk_queue_discard(bdev_get_queue(dev->bdev))) - return 0; - return bdev_max_discard_sectors(dev->bdev); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a8bcf3f664af..6ccf15253dee 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -888,8 +888,6 @@ static int virtblk_probe(struct virtio_device *vdev) v = sg_elems; blk_queue_max_discard_segments(q, min(v, MAX_DISCARD_SEGMENTS)); - - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8b691fe50475..83cd08041e6b 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -583,7 +583,7 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1)) return; - if (blk_queue_discard(q)) { + if (bdev_max_discard_sectors(bdev)) { err = xenbus_printf(xbt, dev->nodename, "discard-granularity", "%u", q->limits.discard_granularity); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 003056d4f7f5..e13cb4d48f1e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -944,7 +944,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info) blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity ?: info->physical_sector_size; @@ -1606,7 +1605,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq); + blk_queue_max_discard_sectors(rq, 0); blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } break; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e9474b02012d..59ff444bf6c7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1952,7 +1952,6 @@ static int zram_add(void) blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); /* * zram_bio_discard() will clear all logical blocks if logical block diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fdd0194f84dd..e27f67f06a42 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1005,7 +1005,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) bio_get(s->iop.bio); if (bio_op(bio) == REQ_OP_DISCARD && - !blk_queue_discard(bdev_get_queue(dc->bdev))) + !bdev_max_discard_sectors(dc->bdev)) goto insert_data; /* I/O request sent to backing device */ @@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio, bio->bi_private = ddip; if ((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(dc->bdev))) + !bdev_max_discard_sectors(dc->bdev)) bio->bi_end_io(bio); else submit_bio_noacct(bio); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index bf3de149d3c9..2f49e31142f6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -973,7 +973,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue); blk_queue_write_cache(q, true, true); @@ -2350,7 +2349,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, ca->bdev->bd_holder = ca; ca->sb_disk = sb_disk; - if (blk_queue_discard(bdev_get_queue(bdev))) + if (bdev_max_discard_sectors((bdev))) ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index d1029d71ff3b..c6f677059214 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -1151,7 +1151,7 @@ STORE(__bch_cache) if (attr == &sysfs_discard) { bool v = strtoul_or_return(buf); - if (blk_queue_discard(bdev_get_queue(ca->bdev))) + if (bdev_max_discard_sectors(ca->bdev)) ca->discard = v; if (v != CACHE_DISCARD(&ca->sb)) { diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 780a61bc6cc0..28c5de8eca4a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3329,13 +3329,6 @@ static int cache_iterate_devices(struct dm_target *ti, return r; } -static bool origin_dev_supports_discard(struct block_device *origin_bdev) -{ - struct request_queue *q = bdev_get_queue(origin_bdev); - - return blk_queue_discard(q); -} - /* * If discard_passdown was enabled verify that the origin device * supports discards. Disable discard_passdown if not. @@ -3349,7 +3342,7 @@ static void disable_passdown_if_not_supported(struct cache *cache) if (!cache->features.discard_passdown) return; - if (!origin_dev_supports_discard(origin_bdev)) + if (!bdev_max_discard_sectors(origin_bdev)) reason = "discard unsupported"; else if (origin_limits->max_discard_sectors < cache->sectors_per_block) diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 128316a73d01..811b0a5379d0 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -2016,13 +2016,6 @@ static void clone_resume(struct dm_target *ti) do_waker(&clone->waker.work); } -static bool bdev_supports_discards(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - return (q && blk_queue_discard(q)); -} - /* * If discard_passdown was enabled verify that the destination device supports * discards. Disable discard_passdown if not. @@ -2036,7 +2029,7 @@ static void disable_passdown_if_not_supported(struct clone *clone) if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) return; - if (!bdev_supports_discards(dest_dev)) + if (!bdev_max_discard_sectors(dest_dev)) reason = "discard unsupported"; else if (dest_limits->max_discard_sectors < clone->region_size) reason = "max discard sectors smaller than a region"; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c9d036d6bb2e..e194226c89e5 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -866,9 +866,8 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv, static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct log_writes_c *lc = ti->private; - struct request_queue *q = bdev_get_queue(lc->dev->bdev); - if (!q || !blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(lc->dev->bdev)) { lc->device_supports_discard = false; limits->discard_granularity = lc->sectorsize; limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 2b26435a6946..9526ccbedafb 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2963,13 +2963,8 @@ static void configure_discard_support(struct raid_set *rs) raid456 = rs_is_raid456(rs); for (i = 0; i < rs->raid_disks; i++) { - struct request_queue *q; - - if (!rs->dev[i].rdev.bdev) - continue; - - q = bdev_get_queue(rs->dev[i].rdev.bdev); - if (!q || !blk_queue_discard(q)) + if (!rs->dev[i].rdev.bdev || + !bdev_max_discard_sectors(rs->dev[i].rdev.bdev)) return; if (raid456) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d46839faa0ca..0dff6907fd00 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1888,9 +1888,7 @@ static bool dm_table_supports_nowait(struct dm_table *t) static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_discard(q); + return !bdev_max_discard_sectors(dev->bdev); } static bool dm_table_supports_discards(struct dm_table *t) @@ -1970,15 +1968,12 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); if (!dm_table_supports_discards(t)) { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); - /* Must also clear discard limits... */ q->limits.max_discard_sectors = 0; q->limits.max_hw_discard_sectors = 0; q->limits.discard_granularity = 0; q->limits.discard_alignment = 0; q->limits.discard_misaligned = 0; - } else - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + } if (dm_table_supports_secure_erase(t)) blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 4d25d0e27031..eded4bcc4545 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2802,13 +2802,6 @@ static void requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ -static bool data_dev_supports_discard(struct pool_c *pt) -{ - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - - return blk_queue_discard(q); -} - static bool is_factor(sector_t block_size, uint32_t n) { return !sector_div(block_size, n); @@ -2828,7 +2821,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) if (!pt->adjusted_pf.discard_passdown) return; - if (!data_dev_supports_discard(pt)) + if (!bdev_max_discard_sectors(pt->data_dev->bdev)) reason = "discard unsupported"; else if (data_limits->max_discard_sectors < pool->sectors_per_block) @@ -4057,8 +4050,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) /* * Must explicitly disallow stacking discard limits otherwise the * block layer will stack them if pool's data device has support. - * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the - * user to see that, so make sure to set all discard limits to 0. */ limits->discard_granularity = 0; return; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 82957bd460e8..39081338ca61 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -955,7 +955,6 @@ void disable_discard(struct mapped_device *md) /* device doesn't really support DISCARD, disable it */ limits->max_discard_sectors = 0; - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue); } void disable_write_zeroes(struct mapped_device *md) @@ -982,7 +981,7 @@ static void clone_endio(struct bio *bio) if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_DISCARD && - !q->limits.max_discard_sectors) + !bdev_max_discard_sectors(bio->bi_bdev)) disable_discard(md); else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && !q->limits.max_write_zeroes_sectors) diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 0f55b079371b..138a3b25c5c8 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -64,7 +64,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) struct linear_conf *conf; struct md_rdev *rdev; int i, cnt; - bool discard_supported = false; conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); if (!conf) @@ -96,9 +95,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) conf->array_sectors += rdev->sectors; cnt++; - - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; } if (cnt != raid_disks) { pr_warn("md/linear:%s: not enough drives present. Aborting!\n", @@ -106,11 +102,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) goto out; } - if (!discard_supported) - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); - else - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); - /* * Here we calculate the device offsets. */ @@ -252,7 +243,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) start_sector + data_offset; if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) { + !bdev_max_discard_sectors(bio->bi_bdev))) { /* Just ignore it */ bio_endio(bio); } else { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index b21e101183f4..7231f5e1eaa7 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,7 +399,6 @@ static int raid0_run(struct mddev *mddev) conf = mddev->private; if (mddev->queue) { struct md_rdev *rdev; - bool discard_supported = false; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); @@ -412,13 +411,7 @@ static int raid0_run(struct mddev *mddev) rdev_for_each(rdev, mddev) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; } - if (!discard_supported) - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); - else - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); } /* calculate array device size */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b7b21a4793d4..5aed2c8b746e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -806,7 +806,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) + !bdev_max_discard_sectors(bio->bi_bdev))) /* Just ignore it */ bio_endio(bio); else @@ -1830,8 +1830,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } } - if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); return err; } @@ -3110,7 +3108,6 @@ static int raid1_run(struct mddev *mddev) int i; struct md_rdev *rdev; int ret; - bool discard_supported = false; if (mddev->level != 1) { pr_warn("md/raid1:%s: raid level not set to mirroring (%d)\n", @@ -3145,8 +3142,6 @@ static int raid1_run(struct mddev *mddev) continue; disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; } mddev->degraded = 0; @@ -3183,15 +3178,6 @@ static int raid1_run(struct mddev *mddev) md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); - if (mddev->queue) { - if (discard_supported) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, - mddev->queue); - else - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, - mddev->queue); - } - ret = md_integrity_register(mddev); if (ret) { md_unregister_thread(&mddev->thread); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0987916066f2..834eb3ba95a6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -895,7 +895,7 @@ static void flush_pending_writes(struct r10conf *conf) if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) + !bdev_max_discard_sectors(bio->bi_bdev))) /* Just ignore it */ bio_endio(bio); else @@ -1090,7 +1090,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) + !bdev_max_discard_sectors(bio->bi_bdev))) /* Just ignore it */ bio_endio(bio); else @@ -2151,8 +2151,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) rcu_assign_pointer(p->rdev, rdev); break; } - if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); return err; @@ -4076,7 +4074,6 @@ static int raid10_run(struct mddev *mddev) sector_t size; sector_t min_offset_diff = 0; int first = 1; - bool discard_supported = false; if (mddev_init_writes_pending(mddev) < 0) return -ENOMEM; @@ -4147,20 +4144,9 @@ static int raid10_run(struct mddev *mddev) rdev->data_offset << 9); disk->head_position = 0; - - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; first = 0; } - if (mddev->queue) { - if (discard_supported) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, - mddev->queue); - else - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, - mddev->queue); - } /* need to check that every block has at least one working mirror */ if (!enough(conf, -1)) { pr_err("md/raid10:%s: not enough operational mirrors.\n", diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index a7d50ff9020a..c3cbf9a574a3 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1318,7 +1318,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, r5l_write_super(log, end); - if (!blk_queue_discard(bdev_get_queue(bdev))) + if (!bdev_max_discard_sectors(bdev)) return; mddev = log->rdev->mddev; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0bbae0e63866..59f91e392a2a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7776,14 +7776,10 @@ static int raid5_run(struct mddev *mddev) * A better idea might be to turn DISCARD into WRITE_ZEROES * requests, as that is required to be safe. */ - if (devices_handle_discard_safely && - mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && - mddev->queue->limits.discard_granularity >= stripe) - blk_queue_flag_set(QUEUE_FLAG_DISCARD, - mddev->queue); - else - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, - mddev->queue); + if (!devices_handle_discard_safely || + mddev->queue->limits.max_discard_sectors < (stripe >> 9) || + mddev->queue->limits.discard_granularity < stripe) + blk_queue_max_discard_sectors(mddev->queue, 0); blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); } diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index c69b2d9df6f1..cac6315010a3 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -183,7 +183,6 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (!max_discard) return; - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); blk_queue_max_discard_sectors(q, max_discard); q->limits.discard_granularity = card->pref_erase << 9; /* granularity must not be greater than max. discard */ diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 64d2b093f114..f73172111465 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -377,7 +377,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); if (tr->discard) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq); blk_queue_max_discard_sectors(new->rq, UINT_MAX); new->rq->limits.discard_granularity = tr->blksize; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e1846d04817f..b9b0fbde97c8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1621,7 +1621,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) u32 size = queue_logical_block_size(queue); if (ctrl->max_discard_sectors == 0) { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + blk_queue_max_discard_sectors(queue, 0); return; } @@ -1632,7 +1632,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) queue->limits.discard_granularity = size; /* If discard is already enabled, don't reset queue limits */ - if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + if (queue->limits.max_discard_sectors) return; blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index e084f4dedddd..8bd5665db919 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -791,7 +791,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block) blk_queue_max_discard_sectors(q, max_discard_sectors); blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static int dasd_fba_pe_handler(struct dasd_device *device, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index dc6e55761fd1..9694e2cfaf9a 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -797,7 +797,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) case SD_LBP_FULL: case SD_LBP_DISABLE: blk_queue_max_discard_sectors(q, 0); - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); return; case SD_LBP_UNMAP: @@ -830,7 +829,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) } blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index c3e25bac90d5..6cb9f8784327 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -838,7 +838,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, struct request_queue *q = bdev_get_queue(bdev); int block_size = bdev_logical_block_size(bdev); - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return false; attrib->max_unmap_lba_count = diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6aa92f84f465..aba616ff953f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1291,7 +1291,7 @@ static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes) ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len, &discarded); discarded += src_disc; - } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) { + } else if (bdev_max_discard_sectors(stripe->dev->bdev)) { ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded); } else { ret = 0; @@ -5987,7 +5987,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) *trimmed = 0; /* Discard not supported = nothing to do. */ - if (!blk_queue_discard(bdev_get_queue(device->bdev))) + if (!bdev_max_discard_sectors(device->bdev)) return 0; /* Not writable = nothing to do. */ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index be6c24577dbe..db2b52c20ca8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -501,7 +501,7 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, if (!device->bdev) continue; q = bdev_get_queue(device->bdev); - if (blk_queue_discard(q)) { + if (bdev_max_discard_sectors(device->bdev)) { num_devices++; minlen = min_t(u64, q->limits.discard_granularity, minlen); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index 2f5130059236..765e4f63dd18 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -358,7 +358,7 @@ static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(inode->i_sb->s_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 8ca21e7917d1..be0788ecaf20 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -627,13 +627,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) if (opts->allow_utime == (unsigned short)-1) opts->allow_utime = ~opts->fs_dmask & 0022; - if (opts->discard) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - - if (!blk_queue_discard(q)) { - exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); - opts->discard = 0; - } + if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) { + exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard"); + opts->discard = 0; } sb->s_flags |= SB_NODIRATIME; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 992229ca2d83..6e3b9eea126f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1044,7 +1044,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) __u32 flags = 0; unsigned int flush_flags = 0; struct super_block *sb = file_inode(filp)->i_sb; - struct request_queue *q; if (copy_from_user(&flags, (__u32 __user *)arg, sizeof(__u32))) @@ -1065,10 +1064,8 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) return -EINVAL; - q = bdev_get_queue(EXT4_SB(sb)->s_journal->j_dev); - if (!q) - return -ENXIO; - if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q)) + if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && + !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev)) return -EOPNOTSUPP; if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN) @@ -1393,14 +1390,13 @@ resizefs_out: case FITRIM: { - struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 81749eaddf4c..93f4e4e9e263 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5458,13 +5458,9 @@ no_journal: goto failed_mount9; } - if (test_opt(sb, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - ext4_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) + ext4_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but the device does not support discard"); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cd1e65bcf0b0..0ea9a5fa7c1d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4381,8 +4381,7 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) static inline bool f2fs_bdev_support_discard(struct block_device *bdev) { - return blk_queue_discard(bdev_get_queue(bdev)) || - bdev_is_zoned(bdev); + return bdev_max_discard_sectors(bdev) || bdev_is_zoned(bdev); } static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) diff --git a/fs/fat/file.c b/fs/fat/file.c index a5a309fcc7fa..e4c7d10e8012 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -133,7 +133,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; user_range = (struct fstrim_range __user *)arg; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index bf6051bdf1d1..3d1afb95a925 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1872,13 +1872,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, goto out_fail; } - if (sbi->options.discard) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - fat_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + if (sbi->options.discard && !bdev_max_discard_sectors(sb->s_bdev)) + fat_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but the device does not support discard"); fat_set_state(sb, 1, 0); return 0; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 801ad9f4f2be..7f20ac9133bc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1405,7 +1405,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sdp->sd_vfs->s_bdev)) return -EOPNOTSUPP; if (copy_from_user(&r, argp, sizeof(r))) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fcacafa4510d..19d226cd4ff4 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1762,7 +1762,6 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) unsigned long block, log_offset; /* logical */ unsigned long long phys_block, block_start, block_stop; /* physical */ loff_t byte_start, byte_stop, byte_count; - struct request_queue *q = bdev_get_queue(journal->j_dev); /* flags must be set to either discard or zeroout */ if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags || @@ -1770,10 +1769,8 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) (flags & JBD2_JOURNAL_FLUSH_ZEROOUT))) return -EINVAL; - if (!q) - return -ENXIO; - - if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q)) + if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && + !bdev_max_discard_sectors(journal->j_dev)) return -EOPNOTSUPP; /* diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 03a845ab4f00..357ae6e5c36e 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -117,7 +117,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) { + if (!bdev_max_discard_sectors(sb->s_bdev)) { jfs_warn("FITRIM not supported on device"); return -EOPNOTSUPP; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index f1a13a74cddf..85d4f44f2ac4 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -372,19 +372,16 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } case Opt_discard: - { - struct request_queue *q = bdev_get_queue(sb->s_bdev); /* if set to 1, even copying files will cause * trimming :O * -> user has more control over the online trimming */ sbi->minblks_trim = 64; - if (blk_queue_discard(q)) + if (bdev_max_discard_sectors(sb->s_bdev)) *flag |= JFS_DISCARD; else pr_err("JFS: discard option not supported on device\n"); break; - } case Opt_nodiscard: *flag &= ~JFS_DISCARD; @@ -392,10 +389,9 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_discard_minblk: { - struct request_queue *q = bdev_get_queue(sb->s_bdev); char *minblks_trim = args[0].from; int rc; - if (blk_queue_discard(q)) { + if (bdev_max_discard_sectors(sb->s_bdev)) { *flag |= JFS_DISCARD; rc = kstrtouint(minblks_trim, 0, &sbi->minblks_trim); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index fec194a666f4..52b73f558fcb 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1059,7 +1059,7 @@ static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(nilfs->ns_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, argp, sizeof(range))) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 787b53b984ee..e76323616933 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -28,7 +28,7 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sbi->sb->s_bdev)) return -EOPNOTSUPP; user_range = (struct fstrim_range __user *)arg; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cd30e81abbce..c734085bcce4 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -913,7 +913,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } rq = bdev_get_queue(bdev); - if (blk_queue_discard(rq) && rq->limits.discard_granularity) { + if (bdev_max_discard_sectors(bdev) && rq->limits.discard_granularity) { sbi->discard_granularity = rq->limits.discard_granularity; sbi->discard_granularity_mask_inv = ~(u64)(sbi->discard_granularity - 1); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index f59461d85da4..9b78ef103ada 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -910,7 +910,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; if (copy_from_user(&range, argp, sizeof(range))) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 0191de8ce9ce..a4e6609d616b 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -162,7 +162,7 @@ xfs_ioc_trim( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) return -EOPNOTSUPP; /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 54be9d64093e..a276b8111f63 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1608,14 +1608,10 @@ xfs_fs_fill_super( goto out_filestream_unmount; } - if (xfs_has_discard(mp)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - - if (!blk_queue_discard(q)) { - xfs_warn(mp, "mounting with \"discard\" option, but " - "the device does not support discard"); - mp->m_features &= ~XFS_FEAT_DISCARD; - } + if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { + xfs_warn(mp, + "mounting with \"discard\" option, but the device does not support discard"); + mp->m_features &= ~XFS_FEAT_DISCARD; } if (xfs_has_reflink(mp)) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ce16247d3afa..767ab22e1052 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,7 +540,6 @@ struct request_queue { #define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ -#define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SECERASE 11 /* supports secure erase */ @@ -582,7 +581,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_secure_erase(q) \ diff --git a/mm/swapfile.c b/mm/swapfile.c index 4069f17a82c8..5d9cedf9e7b8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2957,20 +2957,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p, return nr_extents; } -/* - * Helper to sys_swapon determining if a given swap - * backing device queue supports DISCARD operations. - */ -static bool swap_discardable(struct swap_info_struct *si) -{ - struct request_queue *q = bdev_get_queue(si->bdev); - - if (!blk_queue_discard(q)) - return false; - - return true; -} - SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *p; @@ -3132,7 +3118,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sizeof(long), GFP_KERNEL); - if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { + if ((swap_flags & SWAP_FLAG_DISCARD) && + p->bdev && bdev_max_discard_sectors(p->bdev)) { /* * When discard is enabled for swap with no particular * policy flagged, we set all swap discard flags here in -- cgit v1.2.3 From 44abff2c0b970ae3d310b97617525dc01f248d7c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2022 06:52:57 +0200 Subject: block: decouple REQ_OP_SECURE_ERASE from REQ_OP_DISCARD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Secure erase is a very different operation from discard in that it is a data integrity operation vs hint. Fully split the limits and helper infrastructure to make the separation more clear. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: Christoph Böhmwalder [drbd] Acked-by: Ryusuke Konishi [nifs2] Acked-by: Jaegeuk Kim [f2fs] Acked-by: Coly Li [bcache] Acked-by: David Sterba [btrfs] Acked-by: Chao Yu Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220415045258.199825-27-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-lib.c | 64 ++++++++++++++++++++++++++----------- block/blk-mq-debugfs.c | 1 - block/blk-settings.c | 16 +++++++++- block/fops.c | 2 +- block/ioctl.c | 43 ++++++++++++++++++++----- drivers/block/drbd/drbd_receiver.c | 5 +-- drivers/block/rnbd/rnbd-clt.c | 4 +-- drivers/block/rnbd/rnbd-srv-dev.h | 2 +- drivers/block/xen-blkback/blkback.c | 15 +++++---- drivers/block/xen-blkback/xenbus.c | 5 +-- drivers/block/xen-blkfront.c | 5 +-- drivers/md/bcache/alloc.c | 2 +- drivers/md/dm-table.c | 8 ++--- drivers/md/dm-thin.c | 4 +-- drivers/md/md.c | 2 +- drivers/md/raid5-cache.c | 6 ++-- drivers/mmc/core/queue.c | 2 +- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/target/target_core_file.c | 2 +- drivers/target/target_core_iblock.c | 2 +- fs/btrfs/extent-tree.c | 4 +-- fs/ext4/mballoc.c | 2 +- fs/f2fs/file.c | 16 +++++----- fs/f2fs/segment.c | 2 +- fs/jbd2/journal.c | 2 +- fs/nilfs2/sufile.c | 4 +-- fs/nilfs2/the_nilfs.c | 4 +-- fs/ntfs3/super.c | 2 +- fs/xfs/xfs_discard.c | 2 +- fs/xfs/xfs_log_cil.c | 2 +- include/linux/blkdev.h | 27 +++++++++------- mm/swapfile.c | 6 ++-- 33 files changed, 168 insertions(+), 99 deletions(-) (limited to 'mm') diff --git a/block/blk-core.c b/block/blk-core.c index b5c3a8049134..ee18b6a699bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -824,7 +824,7 @@ void submit_bio_noacct(struct bio *bio) goto not_supported; break; case REQ_OP_SECURE_ERASE: - if (!blk_queue_secure_erase(q)) + if (!bdev_max_secure_erase_sectors(bdev)) goto not_supported; break; case REQ_OP_ZONE_APPEND: diff --git a/block/blk-lib.c b/block/blk-lib.c index 43aa4d7fe859..09b7e1200c0f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -36,26 +36,15 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) } int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int flags, - struct bio **biop) + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) { - struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; - unsigned int op; sector_t bs_mask; if (bdev_read_only(bdev)) return -EPERM; - - if (flags & BLKDEV_DISCARD_SECURE) { - if (!blk_queue_secure_erase(q)) - return -EOPNOTSUPP; - op = REQ_OP_SECURE_ERASE; - } else { - if (!bdev_max_discard_sectors(bdev)) - return -EOPNOTSUPP; - op = REQ_OP_DISCARD; - } + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; /* In case the discard granularity isn't set by buggy device driver */ if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) { @@ -77,7 +66,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t req_sects = min(nr_sects, bio_discard_limit(bdev, sector)); - bio = blk_next_bio(bio, bdev, 0, op, gfp_mask); + bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask); bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = req_sects << 9; sector += req_sects; @@ -103,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard); * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_DISCARD_* flags to control behaviour * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { struct bio *bio = NULL; struct blk_plug plug; int ret; blk_start_plug(&plug); - ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, - &bio); + ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); if (ret == -EOPNOTSUPP) @@ -314,3 +301,42 @@ retry: return ret; } EXPORT_SYMBOL(blkdev_issue_zeroout); + +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp) +{ + sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; + unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); + struct bio *bio = NULL; + struct blk_plug plug; + int ret = 0; + + if (max_sectors == 0) + return -EOPNOTSUPP; + if ((sector | nr_sects) & bs_mask) + return -EINVAL; + if (bdev_read_only(bdev)) + return -EPERM; + + blk_start_plug(&plug); + for (;;) { + unsigned int len = min_t(sector_t, nr_sects, max_sectors); + + bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); + bio->bi_iter.bi_sector = sector; + bio->bi_iter.bi_size = len; + + sector += len << SECTOR_SHIFT; + nr_sects -= len << SECTOR_SHIFT; + if (!nr_sects) { + ret = submit_bio_wait(bio); + bio_put(bio); + break; + } + cond_resched(); + } + blk_finish_plug(&plug); + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_secure_erase); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index fd111c500125..7e4136a60e1c 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -115,7 +115,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(IO_STAT), QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(ADD_RANDOM), - QUEUE_FLAG_NAME(SECERASE), QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(INIT_DONE), diff --git a/block/blk-settings.c b/block/blk-settings.c index fd83d674afd0..6ccceb421ed2 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_zone_append_sectors = 0; lim->max_discard_sectors = 0; lim->max_hw_discard_sectors = 0; + lim->max_secure_erase_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; lim->discard_misaligned = 0; @@ -176,6 +177,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_discard_sectors); +/** + * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase + * @q: the request queue for the device + * @max_sectors: maximum number of sectors to secure_erase + **/ +void blk_queue_max_secure_erase_sectors(struct request_queue *q, + unsigned int max_sectors) +{ + q->limits.max_secure_erase_sectors = max_sectors; +} +EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors); + /** * blk_queue_max_write_zeroes_sectors - set max sectors for a single * write zeroes @@ -661,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) % t->discard_granularity; } - + t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors, + b->max_secure_erase_sectors); t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); t->zoned = max(t->zoned, b->zoned); diff --git a/block/fops.c b/block/fops.c index ba5e7d5ff9a5..e3643362c244 100644 --- a/block/fops.c +++ b/block/fops.c @@ -677,7 +677,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, - len >> SECTOR_SHIFT, GFP_KERNEL, 0); + len >> SECTOR_SHIFT, GFP_KERNEL); break; default: error = -EOPNOTSUPP; diff --git a/block/ioctl.c b/block/ioctl.c index eaee0efc0bea..46949f1b0dba 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -83,7 +83,7 @@ static int compat_blkpg_ioctl(struct block_device *bdev, #endif static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, - unsigned long arg, unsigned long flags) + unsigned long arg) { uint64_t range[2]; uint64_t start, len; @@ -114,15 +114,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) goto fail; - - err = blkdev_issue_discard(bdev, start >> 9, len >> 9, - GFP_KERNEL, flags); - + err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); fail: filemap_invalidate_unlock(inode->i_mapping); return err; } +static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode, + void __user *argp) +{ + uint64_t start, len; + uint64_t range[2]; + int err; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + if (!bdev_max_secure_erase_sectors(bdev)) + return -EOPNOTSUPP; + if (copy_from_user(range, argp, sizeof(range))) + return -EFAULT; + + start = range[0]; + len = range[1]; + if ((start & 511) || (len & 511)) + return -EINVAL; + if (start + len > bdev_nr_bytes(bdev)) + return -EINVAL; + + filemap_invalidate_lock(bdev->bd_inode->i_mapping); + err = truncate_bdev_range(bdev, mode, start, start + len - 1); + if (!err) + err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, + GFP_KERNEL); + filemap_invalidate_unlock(bdev->bd_inode->i_mapping); + return err; +} + + static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, unsigned long arg) { @@ -450,10 +478,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, case BLKROSET: return blkdev_roset(bdev, mode, cmd, arg); case BLKDISCARD: - return blk_ioctl_discard(bdev, mode, arg, 0); + return blk_ioctl_discard(bdev, mode, arg); case BLKSECDISCARD: - return blk_ioctl_discard(bdev, mode, arg, - BLKDEV_DISCARD_SECURE); + return blk_ioctl_secure_erase(bdev, mode, argp); case BLKZEROOUT: return blk_ioctl_zeroout(bdev, mode, arg); case BLKGETDISKSEQ: diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 275c53c7b629..2957b0b68d60 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1547,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u start = tmp; } while (nr_sectors >= max_discard_sectors) { - err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0); + err |= blkdev_issue_discard(bdev, start, max_discard_sectors, + GFP_NOIO); nr_sectors -= max_discard_sectors; start += max_discard_sectors; } @@ -1559,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u nr = nr_sectors; nr -= (unsigned int)nr % granularity; if (nr) { - err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); + err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO); nr_sectors -= nr; start += nr; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index efa99a388450..d178be175ad9 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1365,8 +1365,8 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) dev->queue->limits.discard_granularity = dev->discard_granularity; dev->queue->limits.discard_alignment = dev->discard_alignment; if (dev->secure_discard) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue); - + blk_queue_max_secure_erase_sectors(dev->queue, + dev->max_discard_sectors); blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); blk_queue_max_segments(dev->queue, dev->max_segments); diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index 1f7e1c8fd4d9..d080a0de5922 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -44,7 +44,7 @@ static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev) static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev) { - return blk_queue_secure_erase(bdev_get_queue(dev->bdev)); + return bdev_max_secure_erase_sectors(dev->bdev); } static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index de42458195bc..a97f2bf5b01b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, int status = BLKIF_RSP_OKAY; struct xen_blkif *blkif = ring->blkif; struct block_device *bdev = blkif->vbd.bdev; - unsigned long secure; struct phys_req preq; xen_blkif_get(blkif); @@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, } ring->st_ds_req++; - secure = (blkif->vbd.discard_secure && - (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? - BLKDEV_DISCARD_SECURE : 0; + if (blkif->vbd.discard_secure && + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) + err = blkdev_issue_secure_erase(bdev, + req->u.discard.sector_number, + req->u.discard.nr_sectors, GFP_KERNEL); + else + err = blkdev_issue_discard(bdev, req->u.discard.sector_number, + req->u.discard.nr_sectors, GFP_KERNEL); - err = blkdev_issue_discard(bdev, req->u.discard.sector_number, - req->u.discard.nr_sectors, - GFP_KERNEL, secure); fail_response: if (err == -EOPNOTSUPP) { pr_debug("discard op failed, not supported\n"); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 83cd08041e6b..b21bffc9c50b 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -484,7 +484,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, { struct xen_vbd *vbd; struct block_device *bdev; - struct request_queue *q; vbd = &blkif->vbd; vbd->handle = handle; @@ -516,11 +515,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; - q = bdev_get_queue(bdev); if (bdev_write_cache(bdev)) vbd->flush_support = true; - - if (q && blk_queue_secure_erase(q)) + if (bdev_max_secure_erase_sectors(bdev)) vbd->discard_secure = true; vbd->feature_gnt_persistent = feature_persistent; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index e13cb4d48f1e..0f3f5238f7bc 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -949,7 +949,8 @@ static void blkif_set_queue_limits(struct blkfront_info *info) info->physical_sector_size; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); + blk_queue_max_secure_erase_sectors(rq, + get_capacity(gd)); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -1606,7 +1607,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) info->feature_discard = 0; info->feature_secdiscard = 0; blk_queue_max_discard_sectors(rq, 0); - blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); + blk_queue_max_secure_erase_sectors(rq, 0); } break; case BLKIF_OP_FLUSH_DISKCACHE: diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 097577ae3c47..ce13c272c387 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -336,7 +336,7 @@ static int bch_allocator_thread(void *arg) mutex_unlock(&ca->set->bucket_lock); blkdev_issue_discard(ca->bdev, bucket_to_sector(ca->set, bucket), - ca->sb.bucket_size, GFP_KERNEL, 0); + ca->sb.bucket_size, GFP_KERNEL); mutex_lock(&ca->set->bucket_lock); } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0dff6907fd00..e7d42f6335a2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1920,9 +1920,7 @@ static int device_not_secure_erase_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return !blk_queue_secure_erase(q); + return !bdev_max_secure_erase_sectors(dev->bdev); } static bool dm_table_supports_secure_erase(struct dm_table *t) @@ -1975,8 +1973,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, q->limits.discard_misaligned = 0; } - if (dm_table_supports_secure_erase(t)) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); + if (!dm_table_supports_secure_erase(t)) + q->limits.max_secure_erase_sectors = 0; if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index eded4bcc4545..84c083f76673 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -398,8 +398,8 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da sector_t s = block_to_sectors(tc->pool, data_b); sector_t len = block_to_sectors(tc->pool, data_e - data_b); - return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, - GFP_NOWAIT, 0, &op->bio); + return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT, + &op->bio); } static void end_discard(struct discard_op *op, int r) diff --git a/drivers/md/md.c b/drivers/md/md.c index 19636c2f2cda..2587f872c088 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8584,7 +8584,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, { struct bio *discard_bio = NULL; - if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0, + if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, &discard_bio) || !discard_bio) return; diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index c3cbf9a574a3..094a4042589e 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1344,14 +1344,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, if (log->last_checkpoint < end) { blkdev_issue_discard(bdev, log->last_checkpoint + log->rdev->data_offset, - end - log->last_checkpoint, GFP_NOIO, 0); + end - log->last_checkpoint, GFP_NOIO); } else { blkdev_issue_discard(bdev, log->last_checkpoint + log->rdev->data_offset, log->device_size - log->last_checkpoint, - GFP_NOIO, 0); + GFP_NOIO); blkdev_issue_discard(bdev, log->rdev->data_offset, end, - GFP_NOIO, 0); + GFP_NOIO); } } diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index cac6315010a3..a3d446005571 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -189,7 +189,7 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (card->pref_erase > max_discard) q->limits.discard_granularity = SECTOR_SIZE; if (mmc_can_secure_erase_trim(card)) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); + blk_queue_max_secure_erase_sectors(q, max_discard); } static unsigned short mmc_get_max_segments(struct mmc_host *host) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index d886c2c59554..27a72504d31c 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, ret = __blkdev_issue_discard(ns->bdev, nvmet_lba_to_sect(ns, range->slba), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), - GFP_KERNEL, 0, bio); + GFP_KERNEL, bio); if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range->slba); return errno_to_nvme_status(req, ret); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b6ba582b0677..e68f1cc8ef98 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -558,7 +558,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) ret = blkdev_issue_discard(bdev, target_to_linux_sector(dev, lba), target_to_linux_sector(dev, nolb), - GFP_KERNEL, 0); + GFP_KERNEL); if (ret < 0) { pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n", ret); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index c4a903b8a47f..378c80313a0f 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -434,7 +434,7 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) ret = blkdev_issue_discard(bdev, target_to_linux_sector(dev, lba), target_to_linux_sector(dev, nolb), - GFP_KERNEL, 0); + GFP_KERNEL); if (ret < 0) { pr_err("blkdev_issue_discard() failed: %d\n", ret); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aba616ff953f..6260784e74b5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1239,7 +1239,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, if (size) { ret = blkdev_issue_discard(bdev, start >> 9, size >> 9, - GFP_NOFS, 0); + GFP_NOFS); if (!ret) *discarded_bytes += size; else if (ret != -EOPNOTSUPP) @@ -1256,7 +1256,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, if (bytes_left) { ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9, - GFP_NOFS, 0); + GFP_NOFS); if (!ret) *discarded_bytes += bytes_left; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6d1820536d88..ea653d19f9ec 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3629,7 +3629,7 @@ static inline int ext4_issue_discard(struct super_block *sb, return __blkdev_issue_discard(sb->s_bdev, (sector_t)discard_block << (sb->s_blocksize_bits - 9), (sector_t)count << (sb->s_blocksize_bits - 9), - GFP_NOFS, 0, biop); + GFP_NOFS, biop); } else return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8053d99f3920..35b6c720c2bc 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3685,18 +3685,18 @@ out: static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, pgoff_t off, block_t block, block_t len, u32 flags) { - struct request_queue *q = bdev_get_queue(bdev); sector_t sector = SECTOR_FROM_BLOCK(block); sector_t nr_sects = SECTOR_FROM_BLOCK(len); int ret = 0; - if (!q) - return -ENXIO; - - if (flags & F2FS_TRIM_FILE_DISCARD) - ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS, - blk_queue_secure_erase(q) ? - BLKDEV_DISCARD_SECURE : 0); + if (flags & F2FS_TRIM_FILE_DISCARD) { + if (bdev_max_secure_erase_sectors(bdev)) + ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, + GFP_NOFS); + else + ret = blkdev_issue_discard(bdev, sector, nr_sects, + GFP_NOFS); + } if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { if (IS_ENCRYPTED(inode)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 71f09adbcba8..e433c61e64b9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1244,7 +1244,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), - GFP_NOFS, 0, &bio); + GFP_NOFS, &bio); submit: if (err) { spin_lock_irqsave(&dc->lock, flags); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 19d226cd4ff4..c0cbeeaec2d1 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1825,7 +1825,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) err = blkdev_issue_discard(journal->j_dev, byte_start >> SECTOR_SHIFT, byte_count >> SECTOR_SHIFT, - GFP_NOFS, 0); + GFP_NOFS); } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) { err = blkdev_issue_zeroout(journal->j_dev, byte_start >> SECTOR_SHIFT, diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index e385cca2004a..77ff8e95421f 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -1100,7 +1100,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (ret < 0) { put_bh(su_bh); goto out_sem; @@ -1134,7 +1134,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (!ret) ndiscarded += nblocks; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index dd48a8f74d57..3b4a079c9617 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -672,7 +672,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); if (ret < 0) return ret; nblocks = 0; @@ -682,7 +682,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, 0); + GFP_NOFS); return ret; } diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 5f2e414cfa79..5781b9e8e3d8 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1333,7 +1333,7 @@ int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len) return 0; err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9, - GFP_NOFS, 0); + GFP_NOFS); if (err == -EOPNOTSUPP) sbi->flags |= NTFS_FLAGS_NODISCARD; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index e2ada115c23f..c6fe3f6ebb6b 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -114,7 +114,7 @@ xfs_trim_extents( } trace_xfs_discard_extent(mp, agno, fbno, flen); - error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); + error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS); if (error) goto out_del_cursor; *blocks_trimmed += flen; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ba57323bfdce..c9f55e4f0957 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -605,7 +605,7 @@ xlog_discard_busy_extents( error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, 0, &bio); + GFP_NOFS, &bio); if (error && error != -EOPNOTSUPP) { xfs_info(mp, "discard failed for extent [0x%llx,%u], error %d", diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f1cf557ea20e..c9b5925af5a3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -248,6 +248,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; @@ -542,7 +543,6 @@ struct request_queue { #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ -#define QUEUE_FLAG_SECERASE 11 /* supports secure erase */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ @@ -583,8 +583,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) -#define blk_queue_secure_erase(q) \ - (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) @@ -947,6 +945,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_discard_segments(struct request_queue *, unsigned short); +void blk_queue_max_secure_erase_sectors(struct request_queue *q, + unsigned int max_sectors); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); @@ -1087,13 +1087,12 @@ static inline long nr_blockdev_pages(void) extern void blk_io_schedule(void); -#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ - -extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int flags, - struct bio **biop); +int blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask); +int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct bio **biop); +int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp); #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ @@ -1112,7 +1111,7 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block, SECTOR_SHIFT), nr_blocks << (sb->s_blocksize_bits - SECTOR_SHIFT), - gfp_mask, flags); + gfp_mask); } static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask) @@ -1262,6 +1261,12 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev) return bdev_get_queue(bdev)->limits.discard_granularity; } +static inline unsigned int +bdev_max_secure_erase_sectors(struct block_device *bdev) +{ + return bdev_get_queue(bdev)->limits.max_secure_erase_sectors; +} + static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); diff --git a/mm/swapfile.c b/mm/swapfile.c index 5d9cedf9e7b8..a2b31fea0c42 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -179,7 +179,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, 0); + nr_blocks, GFP_KERNEL); if (err) return err; cond_resched(); @@ -190,7 +190,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, 0); + nr_blocks, GFP_KERNEL); if (err) break; @@ -254,7 +254,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_NOIO, 0)) + nr_blocks, GFP_NOIO)) break; se = next_se(se); -- cgit v1.2.3 From 9650b453a3d4b1b8ed4ea8bcb9b40109608d1faf Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 20 Apr 2022 22:31:10 +0800 Subject: block: ignore RWF_HIPRI hint for sync dio So far bio is marked as REQ_POLLED if RWF_HIPRI/IOCB_HIPRI is passed from userspace sync io interface, then block layer tries to poll until the bio is completed. But the current implementation calls blk_io_schedule() if bio_poll() returns 0, and this way causes io hang or timeout easily. But looks no one reports this kind of issue, which should have been triggered in normal io poll sanity test or blktests block/007 as observed by Changhui, that means it is very likely that no one uses it or no one cares it. Also after io_uring is invented, io poll for sync dio becomes legacy interface. So ignore RWF_HIPRI hint for sync dio. CC: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Reported-by: Changhui Zhong Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Tested-by: Changhui Zhong Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220420143110.2679002-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/fops.c | 22 +--------------------- fs/iomap/direct-io.c | 7 +++---- mm/page_io.c | 4 +--- 3 files changed, 5 insertions(+), 28 deletions(-) (limited to 'mm') diff --git a/block/fops.c b/block/fops.c index e3643362c244..b9b83030e0df 100644 --- a/block/fops.c +++ b/block/fops.c @@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) #define DIO_INLINE_BIO_VECS 4 -static void blkdev_bio_end_io_simple(struct bio *bio) -{ - struct task_struct *waiter = bio->bi_private; - - WRITE_ONCE(bio->bi_private, NULL); - blk_wake_io_task(waiter); -} - static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, unsigned int nr_pages) { @@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; - bio.bi_private = current; - bio.bi_end_io = blkdev_bio_end_io_simple; bio.bi_ioprio = iocb->ki_ioprio; ret = bio_iov_iter_get_pages(&bio, iter); @@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iocb->ki_flags & IOCB_NOWAIT) bio.bi_opf |= REQ_NOWAIT; - if (iocb->ki_flags & IOCB_HIPRI) - bio_set_polled(&bio, iocb); - submit_bio(&bio); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!READ_ONCE(bio.bi_private)) - break; - if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) - blk_io_schedule(); - } - __set_current_state(TASK_RUNNING); + submit_bio_wait(&bio); bio_release_pages(&bio, should_dirty); if (unlikely(bio.bi_status)) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 62da020d02a1..80f9b047aa1b 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter, { atomic_inc(&dio->ref); - if (dio->iocb->ki_flags & IOCB_HIPRI) { + /* Sync dio can't be polled reliably */ + if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) { bio_set_polled(bio, dio->iocb); dio->submit.poll_bio = bio; } @@ -653,9 +654,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->submit.waiter)) break; - if (!dio->submit.poll_bio || - !bio_poll(dio->submit.poll_bio, NULL, 0)) - blk_io_schedule(); + blk_io_schedule(); } __set_current_state(TASK_RUNNING); } diff --git a/mm/page_io.c b/mm/page_io.c index 89fbf3cae30f..3fbdab6a940e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous) * attempt to access it in the page fault retry time check. */ if (synchronous) { - bio->bi_opf |= REQ_POLLED; get_task_struct(current); bio->bi_private = current; } @@ -372,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous) if (!READ_ONCE(bio->bi_private)) break; - if (!bio_poll(bio, NULL, 0)) - blk_io_schedule(); + blk_io_schedule(); } __set_current_state(TASK_RUNNING); bio_put(bio); -- cgit v1.2.3 From 397c9f46ee4d99024c64954b007c1b5762d01cb4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Apr 2022 06:27:14 +0200 Subject: blk-cgroup: move blkcg_{pin,unpin}_online out of line Move these two functions out of line as there is no good reason to inline them. Also switch to passing a cgroup_subsys_state instead of doing the conversion in the caller to prepare for making the blkcg structure private to blk-cgroup. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220420042723.1010598-7-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 88 ++++++++++++++++++++++++++++++++++------------ include/linux/blk-cgroup.h | 46 ++---------------------- mm/backing-dev.c | 5 ++- 3 files changed, 69 insertions(+), 70 deletions(-) (limited to 'mm') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 97266ebde975..3af0d4a61955 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -155,6 +155,17 @@ static void blkg_async_bio_workfn(struct work_struct *work) blk_finish_plug(&plug); } +/** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + return css_to_blkcg(blkcg->css.parent); +} + /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -1015,25 +1026,6 @@ static struct cftype blkcg_legacy_files[] = { * This finally frees the blkcg. */ -/** - * blkcg_css_offline - cgroup css_offline callback - * @css: css of interest - * - * This function is called when @css is about to go away. Here the cgwbs are - * offlined first and only once writeback associated with the blkcg has - * finished do we start step 2 (see above). - */ -static void blkcg_css_offline(struct cgroup_subsys_state *css) -{ - struct blkcg *blkcg = css_to_blkcg(css); - - /* this prevents anyone from attaching or migrating to this blkcg */ - wb_blkcg_offline(blkcg); - - /* put the base online pin allowing step 2 to be triggered */ - blkcg_unpin_online(blkcg); -} - /** * blkcg_destroy_blkgs - responsible for shooting down blkgs * @blkcg: blkcg of interest @@ -1045,7 +1037,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) * * This is the blkcg counterpart of ioc_release_fn(). */ -void blkcg_destroy_blkgs(struct blkcg *blkcg) +static void blkcg_destroy_blkgs(struct blkcg *blkcg) { might_sleep(); @@ -1075,6 +1067,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) spin_unlock_irq(&blkcg->lock); } +/** + * blkcg_pin_online - pin online state + * @blkcg_css: blkcg of interest + * + * While pinned, a blkcg is kept online. This is primarily used to + * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline + * while an associated cgwb is still active. + */ +void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css) +{ + refcount_inc(&css_to_blkcg(blkcg_css)->online_pin); +} + +/** + * blkcg_unpin_online - unpin online state + * @blkcg_css: blkcg of interest + * + * This is primarily used to impedance-match blkg and cgwb lifetimes so + * that blkg doesn't go offline while an associated cgwb is still active. + * When this count goes to zero, all active cgwbs have finished so the + * blkcg can continue destruction by calling blkcg_destroy_blkgs(). + */ +void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css) +{ + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + + do { + if (!refcount_dec_and_test(&blkcg->online_pin)) + break; + blkcg_destroy_blkgs(blkcg); + blkcg = blkcg_parent(blkcg); + } while (blkcg); +} + +/** + * blkcg_css_offline - cgroup css_offline callback + * @css: css of interest + * + * This function is called when @css is about to go away. Here the cgwbs are + * offlined first and only once writeback associated with the blkcg has + * finished do we start step 2 (see above). + */ +static void blkcg_css_offline(struct cgroup_subsys_state *css) +{ + /* this prevents anyone from attaching or migrating to this blkcg */ + wb_blkcg_offline(css_to_blkcg(css)); + + /* put the base online pin allowing step 2 to be triggered */ + blkcg_unpin_online(css); +} + static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); @@ -1163,8 +1206,7 @@ unlock: static int blkcg_css_online(struct cgroup_subsys_state *css) { - struct blkcg *blkcg = css_to_blkcg(css); - struct blkcg *parent = blkcg_parent(blkcg); + struct blkcg *parent = blkcg_parent(css_to_blkcg(css)); /* * blkcg_pin_online() is used to delay blkcg offline so that blkgs @@ -1172,7 +1214,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) * parent so that offline always happens towards the root. */ if (parent) - blkcg_pin_online(parent); + blkcg_pin_online(css); return 0; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 988965c1c27b..0fb7459096e9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -111,7 +111,6 @@ struct blkcg_gq { extern struct cgroup_subsys_state * const blkcg_root_css; -void blkcg_destroy_blkgs(struct blkcg *blkcg); void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); void blkcg_maybe_throttle_current(void); @@ -136,49 +135,8 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) } bool blk_cgroup_congested(void); - -/** - * blkcg_parent - get the parent of a blkcg - * @blkcg: blkcg of interest - * - * Return the parent blkcg of @blkcg. Can be called anytime. - */ -static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) -{ - return css_to_blkcg(blkcg->css.parent); -} - -/** - * blkcg_pin_online - pin online state - * @blkcg: blkcg of interest - * - * While pinned, a blkcg is kept online. This is primarily used to - * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline - * while an associated cgwb is still active. - */ -static inline void blkcg_pin_online(struct blkcg *blkcg) -{ - refcount_inc(&blkcg->online_pin); -} - -/** - * blkcg_unpin_online - unpin online state - * @blkcg: blkcg of interest - * - * This is primarily used to impedance-match blkg and cgwb lifetimes so - * that blkg doesn't go offline while an associated cgwb is still active. - * When this count goes to zero, all active cgwbs have finished so the - * blkcg can continue destruction by calling blkcg_destroy_blkgs(). - */ -static inline void blkcg_unpin_online(struct blkcg *blkcg) -{ - do { - if (!refcount_dec_and_test(&blkcg->online_pin)) - break; - blkcg_destroy_blkgs(blkcg); - blkcg = blkcg_parent(blkcg); - } while (blkcg); -} +void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css); +void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css); #else /* CONFIG_BLK_CGROUP */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7176af65b103..93cddbcd4eb8 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -390,7 +390,6 @@ static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); - struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); struct backing_dev_info *bdi = wb->bdi; mutex_lock(&wb->bdi->cgwb_release_mutex); @@ -401,7 +400,7 @@ static void cgwb_release_workfn(struct work_struct *work) mutex_unlock(&wb->bdi->cgwb_release_mutex); /* triggers blkg destruction if no online users left */ - blkcg_unpin_online(blkcg); + blkcg_unpin_online(wb->blkcg_css); fprop_local_destroy_percpu(&wb->memcg_completions); @@ -511,7 +510,7 @@ static int cgwb_create(struct backing_dev_info *bdi, list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); - blkcg_pin_online(blkcg); + blkcg_pin_online(blkcg_css); css_get(memcg_css); css_get(blkcg_css); } -- cgit v1.2.3 From dec223c92a4688f6c9642d640cfe15a99d289dd4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Apr 2022 06:27:15 +0200 Subject: blk-cgroup: move struct blkcg to block/blk-cgroup.h There is no real need to expose the blkcg structure to the whole kernel. Move it to the private header an expose a helper to let the writeback code access the cgwb_list member. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220420042723.1010598-8-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 9 ++++++++- block/blk-cgroup.h | 28 ++++++++++++++++++++++++++++ include/linux/backing-dev.h | 6 ++---- include/linux/blk-cgroup.h | 32 +------------------------------- mm/backing-dev.c | 13 ++++++------- 5 files changed, 45 insertions(+), 43 deletions(-) (limited to 'mm') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3af0d4a61955..bb52797c02bd 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1005,6 +1005,13 @@ static struct cftype blkcg_legacy_files[] = { { } /* terminate */ }; +#ifdef CONFIG_CGROUP_WRITEBACK +struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css) +{ + return &css_to_blkcg(css)->cgwb_list; +} +#endif + /* * blkcg destruction is a three-stage process. * @@ -1112,7 +1119,7 @@ void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css) static void blkcg_css_offline(struct cgroup_subsys_state *css) { /* this prevents anyone from attaching or migrating to this blkcg */ - wb_blkcg_offline(css_to_blkcg(css)); + wb_blkcg_offline(css); /* put the base online pin allowing step 2 to be triggered */ blkcg_unpin_online(css); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 49e88fc9cc39..b00fb1169e7c 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -17,10 +17,38 @@ #include #include +struct blkcg_gq; +struct blkg_policy_data; + + /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) #ifdef CONFIG_BLK_CGROUP +struct blkcg { + struct cgroup_subsys_state css; + spinlock_t lock; + refcount_t online_pin; + + struct radix_tree_root blkg_tree; + struct blkcg_gq __rcu *blkg_hint; + struct hlist_head blkg_list; + + struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; + + struct list_head all_blkcgs_node; +#ifdef CONFIG_BLK_CGROUP_FC_APPID + char fc_app_id[FC_APPID_LEN]; +#endif +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; +#endif +}; + +static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct blkcg, css) : NULL; +} /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 87ce24d238f3..2bd073fa6bb5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,8 +17,6 @@ #include #include -struct blkcg; - static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); @@ -154,7 +152,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp); void wb_memcg_offline(struct mem_cgroup *memcg); -void wb_blkcg_offline(struct blkcg *blkcg); +void wb_blkcg_offline(struct cgroup_subsys_state *css); /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode @@ -378,7 +376,7 @@ static inline void wb_memcg_offline(struct mem_cgroup *memcg) { } -static inline void wb_blkcg_offline(struct blkcg *blkcg) +static inline void wb_blkcg_offline(struct cgroup_subsys_state *css) { } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 0fb7459096e9..d7b188095040 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -37,29 +37,6 @@ enum blkg_iostat_type { BLKG_IOSTAT_NR, }; -struct blkcg_gq; -struct blkg_policy_data; - -struct blkcg { - struct cgroup_subsys_state css; - spinlock_t lock; - refcount_t online_pin; - - struct radix_tree_root blkg_tree; - struct blkcg_gq __rcu *blkg_hint; - struct hlist_head blkg_list; - - struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; - - struct list_head all_blkcgs_node; -#ifdef CONFIG_BLK_CGROUP_FC_APPID - char fc_app_id[FC_APPID_LEN]; -#endif -#ifdef CONFIG_CGROUP_WRITEBACK - struct list_head cgwb_list; -#endif -}; - struct blkg_iostat { u64 bytes[BLKG_IOSTAT_NR]; u64 ios[BLKG_IOSTAT_NR]; @@ -114,11 +91,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css; void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); void blkcg_maybe_throttle_current(void); -static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct blkcg, css) : NULL; -} - /** * bio_blkcg - grab the blkcg associated with a bio * @bio: target bio @@ -137,12 +109,10 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) bool blk_cgroup_congested(void); void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css); void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css); +struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css); #else /* CONFIG_BLK_CGROUP */ -struct blkcg { -}; - struct blkcg_gq { }; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 93cddbcd4eb8..98f8f62e52ca 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -445,7 +445,6 @@ static int cgwb_create(struct backing_dev_info *bdi, { struct mem_cgroup *memcg; struct cgroup_subsys_state *blkcg_css; - struct blkcg *blkcg; struct list_head *memcg_cgwb_list, *blkcg_cgwb_list; struct bdi_writeback *wb; unsigned long flags; @@ -453,9 +452,8 @@ static int cgwb_create(struct backing_dev_info *bdi, memcg = mem_cgroup_from_css(memcg_css); blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); - blkcg = css_to_blkcg(blkcg_css); memcg_cgwb_list = &memcg->cgwb_list; - blkcg_cgwb_list = &blkcg->cgwb_list; + blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css); /* look up again under lock and discard on blkcg mismatch */ spin_lock_irqsave(&cgwb_lock, flags); @@ -723,18 +721,19 @@ void wb_memcg_offline(struct mem_cgroup *memcg) /** * wb_blkcg_offline - kill all wb's associated with a blkcg being offlined - * @blkcg: blkcg being offlined + * @css: blkcg being offlined * * Also prevents creation of any new wb's associated with @blkcg. */ -void wb_blkcg_offline(struct blkcg *blkcg) +void wb_blkcg_offline(struct cgroup_subsys_state *css) { struct bdi_writeback *wb, *next; + struct list_head *list = blkcg_get_cgwb_list(css); spin_lock_irq(&cgwb_lock); - list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node) + list_for_each_entry_safe(wb, next, list, blkcg_node) cgwb_kill(wb); - blkcg->cgwb_list.next = NULL; /* prevent new wb's */ + list->next = NULL; /* prevent new wb's */ spin_unlock_irq(&cgwb_lock); } -- cgit v1.2.3 From c97ab271576dec2170e7b804cb05f7617b30fed9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Apr 2022 06:27:19 +0200 Subject: blk-cgroup: remove unneeded includes from Remove all the includes that aren't actually needed from and push them to the actual source files where needed. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20220420042723.1010598-12-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-cgroup.h | 2 ++ include/linux/blk-cgroup.h | 15 +++++---------- mm/backing-dev.c | 1 + mm/readahead.c | 1 + mm/swapfile.c | 1 + 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index a948f4eb0bff..62ed8ed50b6e 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -15,6 +15,8 @@ */ #include +#include +#include #include struct blkcg_gq; diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index abbfa97d6d46..9f40dbc65f82 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -14,16 +14,11 @@ * Nauman Rafique */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include + +struct bio; +struct cgroup_subsys_state; +struct request_queue; #define FC_APPID_LEN 129 diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 98f8f62e52ca..ff60bd7d74e0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include diff --git a/mm/readahead.c b/mm/readahead.c index 8e3775829513..c93671a2bf0f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -113,6 +113,7 @@ * ->readpage() which may be less efficient. */ +#include #include #include #include diff --git a/mm/swapfile.c b/mm/swapfile.c index a2b31fea0c42..981a6e85c88e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -6,6 +6,7 @@ * Swap reorganised 29.12.95, Stephen Tweedie */ +#include #include #include #include -- cgit v1.2.3