From 348332e000697b4ca82ef96719e02876434b8346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Sep 2021 14:33:12 +0200 Subject: mm: don't include in blk-cgroup.h pulls in blkdev.h and thus pretty much all the block headers. Break this dependency chain by turning wbc_blkcg_css into a macro and dropping the blk-cgroup.h include. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210920123328.1399408-2-hch@lst.de Signed-off-by: Jens Axboe --- fs/btrfs/inode.c | 1 + fs/quota/quota.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 487533c35ddb..4a9077c52444 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2bcc9a6f1bfc..052f143e2e0e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From e41d12f539f7c8bac9b0897031fee6cc9158a6be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Sep 2021 14:33:13 +0200 Subject: mm: don't include in There is no need to pull blk-cgroup.h and thus blkdev.h in here, so break the include chain. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210920123328.1399408-3-hch@lst.de Signed-off-by: Jens Axboe --- arch/powerpc/platforms/cell/spufs/inode.c | 1 + block/blk-iolatency.c | 1 + block/blk-mq.c | 1 + block/bounce.c | 1 + drivers/md/dm-ima.c | 1 + fs/btrfs/compression.c | 1 + fs/btrfs/ctree.c | 1 + fs/f2fs/compress.c | 1 + fs/orangefs/super.c | 1 + fs/ramfs/inode.c | 1 + include/linux/backing-dev.h | 3 ++- mm/backing-dev.c | 3 ++- mm/swapfile.c | 2 +- 13 files changed, 15 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index bed05b644c2c..cb25acccd746 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index c0545f9da549..6593c7123b97 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -74,6 +74,7 @@ #include #include #include +#include #include "blk-rq-qos.h" #include "blk-stat.h" #include "blk.h" diff --git a/block/blk-mq.c b/block/blk-mq.c index bc026372de43..0fefe00cebd4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/block/bounce.c b/block/bounce.c index 05fc7148489d..7af1a72835b9 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index 2c5edfbd7711..957999998d70 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -12,6 +12,7 @@ #include "dm-ima.h" #include +#include #include #include #include diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7869ad12bc6e..ddc4f5436cc9 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 84627cbd5b5b..66290b214f2b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1bf9ad4c220..20a083dc9042 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 2f2e430461b2..8bb0a53a658b 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -11,6 +11,7 @@ #include #include +#include /* a cache for orangefs-inode objects (i.e. orangefs inode private data) */ static struct kmem_cache *orangefs_inode_cache; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 65e7e56005b8..e2302342a67f 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "internal.h" struct ramfs_mount_opts { diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ac7f231b8825..843bf4be675f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -15,10 +15,11 @@ #include #include #include -#include #include #include +struct blkcg; + static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4a9d4e27d0d9..5245744437dc 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -2,8 +2,9 @@ #include #include -#include #include +#include +#include #include #include #include diff --git a/mm/swapfile.c b/mm/swapfile.c index 22d10f713848..30db362749c0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From ccdf774189b6466457ca9c7de1fe9ed18547d249 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Sep 2021 14:33:14 +0200 Subject: mm: don't include in Move inode_to_bdi out of line to avoid having to include blkdev.h. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210920123328.1399408-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mtd/mtdsuper.c | 1 + fs/ntfs/file.c | 1 + fs/ntfs3/file.c | 1 + fs/orangefs/inode.c | 2 +- include/linux/backing-dev.h | 16 +--------------- mm/backing-dev.c | 16 ++++++++++++++++ 6 files changed, 21 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index 38b6aa849c63..5ff001140ef4 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "mtdcore.h" diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index ab4f3362466d..373dbb627657 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -5,6 +5,7 @@ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. */ +#include #include #include #include diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 43b1451bff53..a3cd3c3f091e 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index c1bb4c4b5d67..e5e3e500ed46 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -10,7 +10,7 @@ * Linux VFS inode operations. */ -#include +#include #include #include "protocol.h" #include "orangefs-kernel.h" diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 843bf4be675f..4ac7ce096013 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -134,20 +133,7 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb) return test_bit(WB_writeback_running, &wb->state); } -static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) -{ - struct super_block *sb; - - if (!inode) - return &noop_backing_dev_info; - - sb = inode->i_sb; -#ifdef CONFIG_BLOCK - if (sb_is_blkdev_sb(sb)) - return I_BDEV(inode)->bd_disk->bdi; -#endif - return sb->s_bdi; -} +struct backing_dev_info *inode_to_bdi(struct inode *inode); static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) { diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5245744437dc..c878d995af06 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -978,6 +978,22 @@ void bdi_put(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_put); +struct backing_dev_info *inode_to_bdi(struct inode *inode) +{ + struct super_block *sb; + + if (!inode) + return &noop_backing_dev_info; + + sb = inode->i_sb; +#ifdef CONFIG_BLOCK + if (sb_is_blkdev_sb(sb)) + return I_BDEV(inode)->bd_disk->bdi; +#endif + return sb->s_bdi; +} +EXPORT_SYMBOL(inode_to_bdi); + const char *bdi_dev_name(struct backing_dev_info *bdi) { if (!bdi || !bdi->dev) -- cgit v1.2.3 From 94c2ed58d0d856a35c04365bdb39fee6e77547de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 13:12:11 +0200 Subject: direct-io: remove blk_poll support The polling support in the legacy direct-io support is a little crufty. It already doesn't support the asynchronous polling needed for io_uring polling, and is hard to adopt to upcoming changes in the polling interfaces. Given that all the major file systems already use the iomap direct I/O code, just drop the polling support. Signed-off-by: Christoph Hellwig Tested-by: Mark Wunderlich Link: https://lore.kernel.org/r/20211012111226.760968-2-hch@lst.de Signed-off-by: Jens Axboe --- fs/direct-io.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index b2e86e739d7a..453dcff0e7f5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -119,7 +119,6 @@ struct dio { int flags; /* doesn't change */ int op; int op_flags; - blk_qc_t bio_cookie; struct gendisk *bio_disk; struct inode *inode; loff_t i_size; /* i_size when submitted */ @@ -438,11 +437,10 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) dio->bio_disk = bio->bi_bdev->bd_disk; - if (sdio->submit_io) { + if (sdio->submit_io) sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); - dio->bio_cookie = BLK_QC_T_NONE; - } else - dio->bio_cookie = submit_bio(bio); + else + submit_bio(bio); sdio->bio = NULL; sdio->boundary = 0; @@ -481,9 +479,7 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) - blk_io_schedule(); + blk_io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; @@ -1214,8 +1210,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } else { dio->op = REQ_OP_READ; } - if (iocb->ki_flags & IOCB_HIPRI) - dio->op_flags |= REQ_HIPRI; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue -- cgit v1.2.3 From f79d474905fec0bfae1244e75571b7916fe02ea2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 13:12:13 +0200 Subject: iomap: don't try to poll multi-bio I/Os in __iomap_dio_rw If an iocb is split into multiple bios we can't poll for both. So don't bother to even try to poll in that case. Signed-off-by: Christoph Hellwig Tested-by: Mark Wunderlich Link: https://lore.kernel.org/r/20211012111226.760968-4-hch@lst.de Signed-off-by: Jens Axboe --- fs/iomap/direct-io.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 4ecd255e0511..560ae967f70e 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -282,6 +282,13 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, if (!iov_iter_count(dio->submit.iter)) goto out; + /* + * We can only poll for single bio I/Os. + */ + if (need_zeroout || + ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) + dio->iocb->ki_flags &= ~IOCB_HIPRI; + if (need_zeroout) { /* zero out from the start of the block to the write offset */ pad = pos & (fs_block_size - 1); @@ -339,6 +346,11 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); + /* + * We can only poll for single bio I/Os. + */ + if (nr_pages) + dio->iocb->ki_flags &= ~IOCB_HIPRI; iomap_dio_submit_bio(iter, dio, bio, pos); pos += n; } while (nr_pages); @@ -565,8 +577,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, inode_dio_begin(inode); blk_start_plug(&plug); - while ((ret = iomap_iter(&iomi, ops)) > 0) + while ((ret = iomap_iter(&iomi, ops)) > 0) { iomi.processed = iomap_dio_iter(&iomi, dio); + + /* + * We can only poll for single bio I/Os. + */ + iocb->ki_flags &= ~IOCB_HIPRI; + } + blk_finish_plug(&plug); /* -- cgit v1.2.3 From 30da1b45b130c70945b033900f45c9d61d6f3b4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 13:12:14 +0200 Subject: io_uring: fix a layering violation in io_iopoll_req_issued syscall-level code can't just poke into the details of the poll cookie, which is private information of the block layer. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211012111226.760968-5-hch@lst.de Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index e68d27829bb2..d2e86788c872 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2738,19 +2738,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ctx->poll_multi_queue = false; } else if (!ctx->poll_multi_queue) { struct io_kiocb *list_req; - unsigned int queue_num0, queue_num1; list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, inflight_entry); - if (list_req->file != req->file) { + if (list_req->file != req->file) ctx->poll_multi_queue = true; - } else { - queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); - queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); - if (queue_num0 != queue_num1) - ctx->poll_multi_queue = true; - } } /* -- cgit v1.2.3 From ef99b2d37666b7a600baab9e1c4944436652b0a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 13:12:19 +0200 Subject: block: replace the spin argument to blk_iopoll with a flags argument Switch the boolean spin argument to blk_poll to passing a set of flags instead. This will allow to control polling behavior in a more fine grained way. Signed-off-by: Christoph Hellwig Tested-by: Mark Wunderlich Link: https://lore.kernel.org/r/20211012111226.760968-10-hch@lst.de [axboe: adapt to changed io_uring iopoll] Signed-off-by: Jens Axboe --- block/blk-exec.c | 2 +- block/blk-mq.c | 17 +++++++---------- block/fops.c | 8 ++++---- fs/io_uring.c | 9 +++++---- fs/iomap/direct-io.c | 6 +++--- include/linux/blkdev.h | 4 +++- include/linux/fs.h | 2 +- include/linux/iomap.h | 2 +- mm/page_io.c | 2 +- 9 files changed, 26 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/block/blk-exec.c b/block/blk-exec.c index d6cd501c0d34..1fa7f25e5726 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -71,7 +71,7 @@ static bool blk_rq_is_poll(struct request *rq) static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), true); + blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0); cond_resched(); } while (!completion_done(wait)); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 7d0d947921a6..6609e10657a8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4052,7 +4052,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) } static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, - bool spin) + unsigned int flags) { struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); long state = get_current_state(); @@ -4075,7 +4075,7 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, if (task_is_running(current)) return 1; - if (ret < 0 || !spin) + if (ret < 0 || (flags & BLK_POLL_ONESHOT)) break; cpu_relax(); } while (!need_resched()); @@ -4088,15 +4088,13 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, * blk_poll - poll for IO completions * @q: the queue * @cookie: cookie passed back at IO submission time - * @spin: whether to spin for completions + * @flags: BLK_POLL_* flags that control the behavior * * Description: * Poll for completions on the passed in queue. Returns number of - * completed entries found. If @spin is true, then blk_poll will continue - * looping until at least one completion is found, unless the task is - * otherwise marked running (or we need to reschedule). + * completed entries found. */ -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) +int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) { if (cookie == BLK_QC_T_NONE || !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) @@ -4105,12 +4103,11 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) if (current->plug) blk_flush_plug_list(current->plug, false); - /* If specified not to spin, we also should not sleep. */ - if (spin && q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (q->poll_nsec != BLK_MQ_POLL_CLASSIC) { if (blk_mq_poll_hybrid(q, cookie)) return 1; } - return blk_mq_poll_classic(q, cookie, spin); + return blk_mq_poll_classic(q, cookie, flags); } EXPORT_SYMBOL_GPL(blk_poll); diff --git a/block/fops.c b/block/fops.c index 15324f2e5a91..db8f2fe68dd2 100644 --- a/block/fops.c +++ b/block/fops.c @@ -108,7 +108,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (!READ_ONCE(bio.bi_private)) break; if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, true)) + !blk_poll(bdev_get_queue(bdev), qc, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -141,12 +141,12 @@ struct blkdev_dio { static struct bio_set blkdev_dio_pool; -static int blkdev_iopoll(struct kiocb *kiocb, bool wait) +static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags) { struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); struct request_queue *q = bdev_get_queue(bdev); - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); + return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); } static void blkdev_bio_end_io(struct bio *bio) @@ -297,7 +297,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->waiter)) break; - if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, true)) + if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/fs/io_uring.c b/fs/io_uring.c index d2e86788c872..541fec2bd49a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2457,14 +2457,15 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { struct io_kiocb *req, *tmp; + unsigned int poll_flags = 0; LIST_HEAD(done); - bool spin; /* * Only spin for completions if we don't have multiple devices hanging * off our complete list, and we're under the requested amount. */ - spin = !ctx->poll_multi_queue && *nr_events < min; + if (ctx->poll_multi_queue || *nr_events >= min) + poll_flags |= BLK_POLL_ONESHOT; list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { struct kiocb *kiocb = &req->rw.kiocb; @@ -2482,11 +2483,11 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) - spin = false; + poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ if (READ_ONCE(req->iopoll_completed)) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 560ae967f70e..236aba256cd1 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -49,13 +49,13 @@ struct iomap_dio { }; }; -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin) +int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags) { struct request_queue *q = READ_ONCE(kiocb->private); if (!q) return 0; - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), spin); + return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); } EXPORT_SYMBOL_GPL(iomap_dio_iopoll); @@ -642,7 +642,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!(iocb->ki_flags & IOCB_HIPRI) || !dio->submit.last_queue || !blk_poll(dio->submit.last_queue, - dio->submit.cookie, true)) + dio->submit.cookie, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 17705c970d7e..e177346bc020 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -564,7 +564,9 @@ extern const char *blk_op_str(unsigned int op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); +/* only poll the hardware once, don't continue until a completion was found */ +#define BLK_POLL_ONESHOT (1 << 0) +int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd2..c443cddf414f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2075,7 +2075,7 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, bool spin); + int (*iopoll)(struct kiocb *kiocb, unsigned int flags); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 24f8489583ca..1e86b65567c2 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -337,7 +337,7 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags); ssize_t iomap_dio_complete(struct iomap_dio *dio); -int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); +int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags); #ifdef CONFIG_SWAP struct file; diff --git a/mm/page_io.c b/mm/page_io.c index c493ce9ebcf5..5d5543fcefa4 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -428,7 +428,7 @@ int swap_readpage(struct page *page, bool synchronous) if (!READ_ONCE(bio->bi_private)) break; - if (!blk_poll(disk->queue, qc, true)) + if (!blk_poll(disk->queue, qc, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); -- cgit v1.2.3 From d729cf9acb9311956c8a37113dcfa0160a2d9665 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 13:12:20 +0200 Subject: io_uring: don't sleep when polling for I/O There is no point in sleeping for the expected I/O completion timeout in the io_uring async polling model as we never poll for a specific I/O. Signed-off-by: Christoph Hellwig Tested-by: Mark Wunderlich Link: https://lore.kernel.org/r/20211012111226.760968-11-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- fs/io_uring.c | 2 +- include/linux/blkdev.h | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/block/blk-mq.c b/block/blk-mq.c index 6609e10657a8..97c24e461d0a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4103,7 +4103,8 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) if (current->plug) blk_flush_plug_list(current->plug, false); - if (q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (!(flags & BLK_POLL_NOSLEEP) && + q->poll_nsec != BLK_MQ_POLL_CLASSIC) { if (blk_mq_poll_hybrid(q, cookie)) return 1; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 541fec2bd49a..c5066146b8de 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2457,7 +2457,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { struct io_kiocb *req, *tmp; - unsigned int poll_flags = 0; + unsigned int poll_flags = BLK_POLL_NOSLEEP; LIST_HEAD(done); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e177346bc020..2b80c98fc373 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -566,6 +566,8 @@ blk_status_t errno_to_blk_status(int errno); /* only poll the hardware once, don't continue until a completion was found */ #define BLK_POLL_ONESHOT (1 << 0) +/* do not sleep to wait for the expected completion time */ +#define BLK_POLL_NOSLEEP (1 << 1) int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3 From 3e08773c3841e9db7a520908cc2b136a77d275ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 13:12:24 +0200 Subject: block: switch polling to be bio based Replace the blk_poll interface that requires the caller to keep a queue and cookie from the submissions with polling based on the bio. Polling for the bio itself leads to a few advantages: - the cookie construction can made entirely private in blk-mq.c - the caller does not need to remember the request_queue and cookie separately and thus sidesteps their lifetime issues - keeping the device and the cookie inside the bio allows to trivially support polling BIOs remapping by stacking drivers - a lot of code to propagate the cookie back up the submission path can be removed entirely. Signed-off-by: Christoph Hellwig Tested-by: Mark Wunderlich Link: https://lore.kernel.org/r/20211012111226.760968-15-hch@lst.de Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 3 +- arch/xtensa/platforms/iss/simdisk.c | 3 +- block/bio.c | 1 + block/blk-core.c | 127 +++++++++++++++++++++++++++--------- block/blk-exec.c | 10 ++- block/blk-mq.c | 72 +++++++------------- block/blk-mq.h | 2 + block/fops.c | 25 +++---- drivers/block/brd.c | 12 ++-- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_req.c | 3 +- drivers/block/n64cart.c | 12 ++-- drivers/block/null_blk/main.c | 3 +- drivers/block/pktcdvd.c | 7 +- drivers/block/ps3vram.c | 6 +- drivers/block/rsxx/dev.c | 7 +- drivers/block/zram/zram_drv.c | 10 +-- drivers/md/bcache/request.c | 13 ++-- drivers/md/bcache/request.h | 4 +- drivers/md/dm.c | 28 +++----- drivers/md/md.c | 10 ++- drivers/nvdimm/blk.c | 5 +- drivers/nvdimm/btt.c | 5 +- drivers/nvdimm/pmem.c | 3 +- drivers/nvme/host/multipath.c | 6 +- drivers/s390/block/dcssblk.c | 7 +- fs/btrfs/inode.c | 8 +-- fs/ext4/file.c | 2 +- fs/gfs2/file.c | 4 +- fs/iomap/direct-io.c | 36 ++++------ fs/xfs/xfs_file.c | 2 +- fs/zonefs/super.c | 2 +- include/linux/bio.h | 2 +- include/linux/blk-mq.h | 15 +---- include/linux/blk_types.h | 12 ++-- include/linux/blkdev.h | 8 ++- include/linux/fs.h | 6 +- include/linux/iomap.h | 5 +- mm/page_io.c | 8 +-- 39 files changed, 232 insertions(+), 264 deletions(-) (limited to 'fs') diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 9a8394e96388..4ef457ba5220 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -58,7 +58,7 @@ struct nfhd_device { struct gendisk *disk; }; -static blk_qc_t nfhd_submit_bio(struct bio *bio) +static void nfhd_submit_bio(struct bio *bio) { struct nfhd_device *dev = bio->bi_bdev->bd_disk->private_data; struct bio_vec bvec; @@ -76,7 +76,6 @@ static blk_qc_t nfhd_submit_bio(struct bio *bio) sec += len; } bio_endio(bio); - return BLK_QC_T_NONE; } static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 3cdfa00738e0..ddd1fe3db474 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -100,7 +100,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector, spin_unlock(&dev->lock); } -static blk_qc_t simdisk_submit_bio(struct bio *bio) +static void simdisk_submit_bio(struct bio *bio) { struct simdisk *dev = bio->bi_bdev->bd_disk->private_data; struct bio_vec bvec; @@ -118,7 +118,6 @@ static blk_qc_t simdisk_submit_bio(struct bio *bio) } bio_endio(bio); - return BLK_QC_T_NONE; } static int simdisk_open(struct block_device *bdev, fmode_t mode) diff --git a/block/bio.c b/block/bio.c index df45f4b996ac..a3c9ff23a036 100644 --- a/block/bio.c +++ b/block/bio.c @@ -282,6 +282,7 @@ void bio_init(struct bio *bio, struct bio_vec *table, atomic_set(&bio->__bi_remaining, 1); atomic_set(&bio->__bi_cnt, 1); + bio->bi_cookie = BLK_QC_T_NONE; bio->bi_max_vecs = max_vecs; bio->bi_io_vec = table; diff --git a/block/blk-core.c b/block/blk-core.c index 8eb0e08d5395..f008c38ae967 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -915,25 +915,22 @@ end_io: return false; } -static blk_qc_t __submit_bio(struct bio *bio) +static void __submit_bio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - blk_qc_t ret = BLK_QC_T_NONE; if (unlikely(bio_queue_enter(bio) != 0)) - return BLK_QC_T_NONE; + return; if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio)) goto queue_exit; - if (disk->fops->submit_bio) { - ret = disk->fops->submit_bio(bio); - goto queue_exit; + if (!disk->fops->submit_bio) { + blk_mq_submit_bio(bio); + return; } - return blk_mq_submit_bio(bio); - + disk->fops->submit_bio(bio); queue_exit: blk_queue_exit(disk->queue); - return ret; } /* @@ -955,10 +952,9 @@ queue_exit: * bio_list_on_stack[1] contains bios that were submitted before the current * ->submit_bio_bio, but that haven't been processed yet. */ -static blk_qc_t __submit_bio_noacct(struct bio *bio) +static void __submit_bio_noacct(struct bio *bio) { struct bio_list bio_list_on_stack[2]; - blk_qc_t ret = BLK_QC_T_NONE; BUG_ON(bio->bi_next); @@ -975,7 +971,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - ret = __submit_bio(bio); + __submit_bio(bio); /* * Sort new bios into those for a lower level and those for the @@ -998,22 +994,19 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); current->bio_list = NULL; - return ret; } -static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) +static void __submit_bio_noacct_mq(struct bio *bio) { struct bio_list bio_list[2] = { }; - blk_qc_t ret; current->bio_list = bio_list; do { - ret = __submit_bio(bio); + __submit_bio(bio); } while ((bio = bio_list_pop(&bio_list[0]))); current->bio_list = NULL; - return ret; } /** @@ -1025,7 +1018,7 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) * systems and other upper level users of the block layer should use * submit_bio() instead. */ -blk_qc_t submit_bio_noacct(struct bio *bio) +void submit_bio_noacct(struct bio *bio) { /* * We only want one ->submit_bio to be active at a time, else stack @@ -1033,14 +1026,12 @@ blk_qc_t submit_bio_noacct(struct bio *bio) * to collect a list of requests submited by a ->submit_bio method while * it is active, and then process them after it returned. */ - if (current->bio_list) { + if (current->bio_list) bio_list_add(¤t->bio_list[0], bio); - return BLK_QC_T_NONE; - } - - if (!bio->bi_bdev->bd_disk->fops->submit_bio) - return __submit_bio_noacct_mq(bio); - return __submit_bio_noacct(bio); + else if (!bio->bi_bdev->bd_disk->fops->submit_bio) + __submit_bio_noacct_mq(bio); + else + __submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio_noacct); @@ -1057,10 +1048,10 @@ EXPORT_SYMBOL(submit_bio_noacct); * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has * been called. */ -blk_qc_t submit_bio(struct bio *bio) +void submit_bio(struct bio *bio) { if (blkcg_punt_bio_submit(bio)) - return BLK_QC_T_NONE; + return; /* * If it's a regular read/write or a barrier with data attached, @@ -1092,19 +1083,91 @@ blk_qc_t submit_bio(struct bio *bio) if (unlikely(bio_op(bio) == REQ_OP_READ && bio_flagged(bio, BIO_WORKINGSET))) { unsigned long pflags; - blk_qc_t ret; psi_memstall_enter(&pflags); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); psi_memstall_leave(&pflags); - - return ret; + return; } - return submit_bio_noacct(bio); + submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); +/** + * bio_poll - poll for BIO completions + * @bio: bio to poll for + * @flags: BLK_POLL_* flags that control the behavior + * + * Poll for completions on queue associated with the bio. Returns number of + * completed entries found. + * + * Note: the caller must either be the context that submitted @bio, or + * be in a RCU critical section to prevent freeing of @bio. + */ +int bio_poll(struct bio *bio, unsigned int flags) +{ + struct request_queue *q = bio->bi_bdev->bd_disk->queue; + blk_qc_t cookie = READ_ONCE(bio->bi_cookie); + int ret; + + if (cookie == BLK_QC_T_NONE || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return 0; + + if (current->plug) + blk_flush_plug_list(current->plug, false); + + if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) + return 0; + if (WARN_ON_ONCE(!queue_is_mq(q))) + ret = 0; /* not yet implemented, should not happen */ + else + ret = blk_mq_poll(q, cookie, flags); + blk_queue_exit(q); + return ret; +} +EXPORT_SYMBOL_GPL(bio_poll); + +/* + * Helper to implement file_operations.iopoll. Requires the bio to be stored + * in iocb->private, and cleared before freeing the bio. + */ +int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags) +{ + struct bio *bio; + int ret = 0; + + /* + * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can + * point to a freshly allocated bio at this point. If that happens + * we have a few cases to consider: + * + * 1) the bio is beeing initialized and bi_bdev is NULL. We can just + * simply nothing in this case + * 2) the bio points to a not poll enabled device. bio_poll will catch + * this and return 0 + * 3) the bio points to a poll capable device, including but not + * limited to the one that the original bio pointed to. In this + * case we will call into the actual poll method and poll for I/O, + * even if we don't need to, but it won't cause harm either. + * + * For cases 2) and 3) above the RCU grace period ensures that bi_bdev + * is still allocated. Because partitions hold a reference to the whole + * device bdev and thus disk, the disk is also still valid. Grabbing + * a reference to the queue in bio_poll() ensures the hctxs and requests + * are still valid as well. + */ + rcu_read_lock(); + bio = READ_ONCE(kiocb->private); + if (bio && bio->bi_bdev) + ret = bio_poll(bio, flags); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(iocb_bio_iopoll); + /** * blk_cloned_rq_check_limits - Helper function to check a cloned request * for the new queue limits diff --git a/block/blk-exec.c b/block/blk-exec.c index 1fa7f25e5726..55f0cd34b37b 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -65,13 +65,19 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); static bool blk_rq_is_poll(struct request *rq) { - return rq->mq_hctx && rq->mq_hctx->type == HCTX_TYPE_POLL; + if (!rq->mq_hctx) + return false; + if (rq->mq_hctx->type != HCTX_TYPE_POLL) + return false; + if (WARN_ON_ONCE(!rq->bio)) + return false; + return true; } static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0); + bio_poll(rq->bio, 0); cond_resched(); } while (!completion_done(wait)); } diff --git a/block/blk-mq.c b/block/blk-mq.c index a34ffcf861c3..0860f622099f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -65,6 +65,9 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) return bucket; } +#define BLK_QC_T_SHIFT 16 +#define BLK_QC_T_INTERNAL (1U << 31) + static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, blk_qc_t qc) { @@ -81,6 +84,13 @@ static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx, return blk_mq_tag_to_rq(hctx->tags, tag); } +static inline blk_qc_t blk_rq_to_qc(struct request *rq) +{ + return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) | + (rq->tag != -1 ? + rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL)); +} + /* * Check if any of the ctx, dispatch list or elevator * have pending work in this hardware queue. @@ -819,6 +829,8 @@ void blk_mq_start_request(struct request *rq) if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) q->integrity.profile->prepare_fn(rq); #endif + if (rq->bio && rq->bio->bi_opf & REQ_POLLED) + WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq)); } EXPORT_SYMBOL(blk_mq_start_request); @@ -2045,19 +2057,15 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, } static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - blk_qc_t *cookie, bool last) + struct request *rq, bool last) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { .rq = rq, .last = last, }; - blk_qc_t new_cookie; blk_status_t ret; - new_cookie = request_to_qc_t(hctx, rq); - /* * For OK queue, we are done. For error, caller may kill it. * Any other error (busy), just add it to our list as we @@ -2067,7 +2075,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, switch (ret) { case BLK_STS_OK: blk_mq_update_dispatch_busy(hctx, false); - *cookie = new_cookie; break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: @@ -2076,7 +2083,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, break; default: blk_mq_update_dispatch_busy(hctx, false); - *cookie = BLK_QC_T_NONE; break; } @@ -2085,7 +2091,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_qc_t *cookie, bool bypass_insert, bool last) { struct request_queue *q = rq->q; @@ -2119,7 +2124,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, goto insert; } - return __blk_mq_issue_directly(hctx, rq, cookie, last); + return __blk_mq_issue_directly(hctx, rq, last); insert: if (bypass_insert) return BLK_STS_RESOURCE; @@ -2133,7 +2138,6 @@ insert: * blk_mq_try_issue_directly - Try to send a request directly to device driver. * @hctx: Pointer of the associated hardware queue. * @rq: Pointer to request to be sent. - * @cookie: Request queue cookie. * * If the device has enough resources to accept a new request now, send the * request directly to device driver. Else, insert at hctx->dispatch queue, so @@ -2141,7 +2145,7 @@ insert: * queue have higher priority. */ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, blk_qc_t *cookie) + struct request *rq) { blk_status_t ret; int srcu_idx; @@ -2150,7 +2154,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); + ret = __blk_mq_try_issue_directly(hctx, rq, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) @@ -2163,11 +2167,10 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) { blk_status_t ret; int srcu_idx; - blk_qc_t unused_cookie; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last); + ret = __blk_mq_try_issue_directly(hctx, rq, true, last); hctx_unlock(hctx, srcu_idx); return ret; @@ -2247,10 +2250,8 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) * * It will not queue the request if there is an error with the bio, or at the * request creation. - * - * Returns: Request queue cookie. */ -blk_qc_t blk_mq_submit_bio(struct bio *bio) +void blk_mq_submit_bio(struct bio *bio) { struct request_queue *q = bio->bi_bdev->bd_disk->queue; const int is_sync = op_is_sync(bio->bi_opf); @@ -2259,9 +2260,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) struct blk_plug *plug; struct request *same_queue_rq = NULL; unsigned int nr_segs; - blk_qc_t cookie; blk_status_t ret; - bool hipri; blk_queue_bounce(q, &bio); __blk_queue_split(&bio, &nr_segs); @@ -2278,8 +2277,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) rq_qos_throttle(q, bio); - hipri = bio->bi_opf & REQ_POLLED; - plug = blk_mq_plug(q, bio); if (plug && plug->cached_rq) { rq = plug->cached_rq; @@ -2310,8 +2307,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) rq_qos_track(q, rq, bio); - cookie = request_to_qc_t(rq->mq_hctx, rq); - blk_mq_bio_to_request(rq, bio, nr_segs); ret = blk_crypto_init_request(rq); @@ -2319,7 +2314,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) bio->bi_status = ret; bio_endio(bio); blk_mq_free_request(rq); - return BLK_QC_T_NONE; + return; } if (unlikely(is_flush_fua)) { @@ -2375,7 +2370,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) if (same_queue_rq) { trace_block_unplug(q, 1, true); blk_mq_try_issue_directly(same_queue_rq->mq_hctx, - same_queue_rq, &cookie); + same_queue_rq); } } else if ((q->nr_hw_queues > 1 && is_sync) || !rq->mq_hctx->dispatch_busy) { @@ -2383,18 +2378,15 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) * There is no scheduler and we can try to send directly * to the hardware. */ - blk_mq_try_issue_directly(rq->mq_hctx, rq, &cookie); + blk_mq_try_issue_directly(rq->mq_hctx, rq); } else { /* Default case. */ blk_mq_sched_insert_request(rq, false, true, true); } - if (!hipri) - return BLK_QC_T_NONE; - return cookie; + return; queue_exit: blk_queue_exit(q); - return BLK_QC_T_NONE; } static size_t order_to_size(unsigned int order) @@ -4084,25 +4076,8 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, return 0; } -/** - * blk_poll - poll for IO completions - * @q: the queue - * @cookie: cookie passed back at IO submission time - * @flags: BLK_POLL_* flags that control the behavior - * - * Description: - * Poll for completions on the passed in queue. Returns number of - * completed entries found. - */ -int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) { - if (cookie == BLK_QC_T_NONE || - !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - return 0; - - if (current->plug) - blk_flush_plug_list(current->plug, false); - if (!(flags & BLK_POLL_NOSLEEP) && q->poll_nsec != BLK_MQ_POLL_CLASSIC) { if (blk_mq_poll_hybrid(q, cookie)) @@ -4110,7 +4085,6 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) } return blk_mq_poll_classic(q, cookie, flags); } -EXPORT_SYMBOL_GPL(blk_poll); unsigned int blk_mq_rq_cpu(struct request *rq) { diff --git a/block/blk-mq.h b/block/blk-mq.h index a9fe01e14951..8be447995106 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -37,6 +37,8 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; +void blk_mq_submit_bio(struct bio *bio); +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); void blk_mq_exit_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); diff --git a/block/fops.c b/block/fops.c index db8f2fe68dd2..ce1255529ba2 100644 --- a/block/fops.c +++ b/block/fops.c @@ -61,7 +61,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bool should_dirty = false; struct bio bio; ssize_t ret; - blk_qc_t qc; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -102,13 +101,12 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, if (iocb->ki_flags & IOCB_HIPRI) bio_set_polled(&bio, iocb); - qc = submit_bio(&bio); + submit_bio(&bio); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio.bi_private)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, 0)) + if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -141,14 +139,6 @@ struct blkdev_dio { static struct bio_set blkdev_dio_pool; -static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags) -{ - struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); - struct request_queue *q = bdev_get_queue(bdev); - - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); -} - static void blkdev_bio_end_io(struct bio *bio) { struct blkdev_dio *dio = bio->bi_private; @@ -162,6 +152,8 @@ static void blkdev_bio_end_io(struct bio *bio) struct kiocb *iocb = dio->iocb; ssize_t ret; + WRITE_ONCE(iocb->private, NULL); + if (likely(!dio->bio.bi_status)) { ret = dio->size; iocb->ki_pos += ret; @@ -200,7 +192,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, bool do_poll = (iocb->ki_flags & IOCB_HIPRI); bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; - blk_qc_t qc = BLK_QC_T_NONE; int ret = 0; if ((pos | iov_iter_alignment(iter)) & @@ -262,9 +253,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (!nr_pages) { if (do_poll) bio_set_polled(bio, iocb); - qc = submit_bio(bio); + submit_bio(bio); if (do_poll) - WRITE_ONCE(iocb->ki_cookie, qc); + WRITE_ONCE(iocb->private, bio); break; } if (!dio->multi_bio) { @@ -297,7 +288,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->waiter)) break; - if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0)) + if (!do_poll || !bio_poll(bio, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -594,7 +585,7 @@ const struct file_operations def_blk_fops = { .llseek = blkdev_llseek, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, - .iopoll = blkdev_iopoll, + .iopoll = iocb_bio_iopoll, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = blkdev_ioctl, diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 530b31240203..aa0472718dce 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -282,7 +282,7 @@ out: return err; } -static blk_qc_t brd_submit_bio(struct bio *bio) +static void brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; sector_t sector = bio->bi_iter.bi_sector; @@ -299,16 +299,14 @@ static blk_qc_t brd_submit_bio(struct bio *bio) err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, bio_op(bio), sector); - if (err) - goto io_error; + if (err) { + bio_io_error(bio); + return; + } sector += len >> SECTOR_SHIFT; } bio_endio(bio); - return BLK_QC_T_NONE; -io_error: - bio_io_error(bio); - return BLK_QC_T_NONE; } static int brd_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5d9181382ce1..6674a0b88341 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1448,7 +1448,7 @@ extern void conn_free_crypto(struct drbd_connection *connection); /* drbd_req */ extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_device *, struct bio *); -extern blk_qc_t drbd_submit_bio(struct bio *bio); +void drbd_submit_bio(struct bio *bio); extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); extern int is_valid_ar_handle(struct drbd_request *, sector_t); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5ca233644d70..3235532ae077 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1596,7 +1596,7 @@ void do_submit(struct work_struct *ws) } } -blk_qc_t drbd_submit_bio(struct bio *bio) +void drbd_submit_bio(struct bio *bio) { struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; @@ -1609,7 +1609,6 @@ blk_qc_t drbd_submit_bio(struct bio *bio) inc_ap_bio(device); __drbd_make_request(device, bio); - return BLK_QC_T_NONE; } static bool net_timeout_reached(struct drbd_request *net_req, diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 26798da661bd..b168ca25b6c9 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -84,7 +84,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) return true; } -static blk_qc_t n64cart_submit_bio(struct bio *bio) +static void n64cart_submit_bio(struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; @@ -92,16 +92,14 @@ static blk_qc_t n64cart_submit_bio(struct bio *bio) u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT; bio_for_each_segment(bvec, bio, iter) { - if (!n64cart_do_bvec(dev, &bvec, pos)) - goto io_error; + if (!n64cart_do_bvec(dev, &bvec, pos)) { + bio_io_error(bio); + return; + } pos += bvec.bv_len; } bio_endio(bio); - return BLK_QC_T_NONE; -io_error: - bio_io_error(bio); - return BLK_QC_T_NONE; } static const struct block_device_operations n64cart_fops = { diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 187d779c8ca0..e5cbcf582233 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1422,7 +1422,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb) return &nullb->queues[index]; } -static blk_qc_t null_submit_bio(struct bio *bio) +static void null_submit_bio(struct bio *bio) { sector_t sector = bio->bi_iter.bi_sector; sector_t nr_sectors = bio_sectors(bio); @@ -1434,7 +1434,6 @@ static blk_qc_t null_submit_bio(struct bio *bio) cmd->bio = bio; null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio)); - return BLK_QC_T_NONE; } static bool should_timeout_request(struct request *rq) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 0f26b2510a75..cb52cce6fb03 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2400,7 +2400,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) } } -static blk_qc_t pkt_submit_bio(struct bio *bio) +static void pkt_submit_bio(struct bio *bio) { struct pktcdvd_device *pd; char b[BDEVNAME_SIZE]; @@ -2423,7 +2423,7 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) */ if (bio_data_dir(bio) == READ) { pkt_make_request_read(pd, bio); - return BLK_QC_T_NONE; + return; } if (!test_bit(PACKET_WRITABLE, &pd->flags)) { @@ -2455,10 +2455,9 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); } while (split != bio); - return BLK_QC_T_NONE; + return; end_io: bio_io_error(bio); - return BLK_QC_T_NONE; } static void pkt_init_queue(struct pktcdvd_device *pd) diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index c7b19e128b03..d1ebf193cb9a 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -578,7 +578,7 @@ out: return next; } -static blk_qc_t ps3vram_submit_bio(struct bio *bio) +static void ps3vram_submit_bio(struct bio *bio) { struct ps3_system_bus_device *dev = bio->bi_bdev->bd_disk->private_data; struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -594,13 +594,11 @@ static blk_qc_t ps3vram_submit_bio(struct bio *bio) spin_unlock_irq(&priv->lock); if (busy) - return BLK_QC_T_NONE; + return; do { bio = ps3vram_do_bio(dev, bio); } while (bio); - - return BLK_QC_T_NONE; } static const struct block_device_operations ps3vram_fops = { diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 1cc40b0ea761..268252380e88 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -50,7 +50,7 @@ struct rsxx_bio_meta { static struct kmem_cache *bio_meta_pool; -static blk_qc_t rsxx_submit_bio(struct bio *bio); +static void rsxx_submit_bio(struct bio *bio); /*----------------- Block Device Operations -----------------*/ static int rsxx_blkdev_ioctl(struct block_device *bdev, @@ -120,7 +120,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, } } -static blk_qc_t rsxx_submit_bio(struct bio *bio) +static void rsxx_submit_bio(struct bio *bio) { struct rsxx_cardinfo *card = bio->bi_bdev->bd_disk->private_data; struct rsxx_bio_meta *bio_meta; @@ -169,7 +169,7 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio) if (st) goto queue_err; - return BLK_QC_T_NONE; + return; queue_err: kmem_cache_free(bio_meta_pool, bio_meta); @@ -177,7 +177,6 @@ req_err: if (st) bio->bi_status = st; bio_endio(bio); - return BLK_QC_T_NONE; } /*----------------- Device Setup -------------------*/ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fcaf2750f68f..a68297fb51a2 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1598,22 +1598,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) /* * Handler function for all zram I/O requests. */ -static blk_qc_t zram_submit_bio(struct bio *bio) +static void zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_bdev->bd_disk->private_data; if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { atomic64_inc(&zram->stats.invalid_io); - goto error; + bio_io_error(bio); + return; } __zram_make_request(zram, bio); - return BLK_QC_T_NONE; - -error: - bio_io_error(bio); - return BLK_QC_T_NONE; } static void zram_slot_free_notify(struct block_device *bdev, diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6d1de889baeb..23b28edae90f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1163,7 +1163,7 @@ static void quit_max_writeback_rate(struct cache_set *c, /* Cached devices - read & write stuff */ -blk_qc_t cached_dev_submit_bio(struct bio *bio) +void cached_dev_submit_bio(struct bio *bio) { struct search *s; struct block_device *orig_bdev = bio->bi_bdev; @@ -1176,7 +1176,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) dc->io_disable)) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; } if (likely(d->c)) { @@ -1222,8 +1222,6 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) } else /* I/O request sent to backing device */ detached_dev_do_request(d, bio, orig_bdev, start_time); - - return BLK_QC_T_NONE; } static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, @@ -1273,7 +1271,7 @@ static void flash_dev_nodata(struct closure *cl) continue_at(cl, search_free, NULL); } -blk_qc_t flash_dev_submit_bio(struct bio *bio) +void flash_dev_submit_bio(struct bio *bio) { struct search *s; struct closure *cl; @@ -1282,7 +1280,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; } s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio)); @@ -1298,7 +1296,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) continue_at_nobarrier(&s->cl, flash_dev_nodata, bcache_wq); - return BLK_QC_T_NONE; + return; } else if (bio_data_dir(bio)) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), @@ -1314,7 +1312,6 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) } continue_at(cl, search_free, NULL); - return BLK_QC_T_NONE; } static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 82b38366a95d..38ab4856eaab 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c); void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); -blk_qc_t cached_dev_submit_bio(struct bio *bio); +void cached_dev_submit_bio(struct bio *bio); void bch_flash_dev_request_init(struct bcache_device *d); -blk_qc_t flash_dev_submit_bio(struct bio *bio); +void flash_dev_submit_bio(struct bio *bio); extern struct kmem_cache *bch_search_cache; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 76d9da49fda7..7870e6460633 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1183,14 +1183,13 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) mutex_unlock(&md->swap_bios_lock); } -static blk_qc_t __map_bio(struct dm_target_io *tio) +static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct bio *clone = &tio->clone; struct dm_io *io = tio->io; struct dm_target *ti = tio->ti; - blk_qc_t ret = BLK_QC_T_NONE; clone->bi_end_io = clone_endio; @@ -1226,7 +1225,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) case DM_MAPIO_REMAPPED: /* the bio has been remapped so dispatch it */ trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector); - ret = submit_bio_noacct(clone); + submit_bio_noacct(clone); break; case DM_MAPIO_KILL: if (unlikely(swap_bios_limit(ti, clone))) { @@ -1248,8 +1247,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) DMWARN("unimplemented target map return value: %d", r); BUG(); } - - return ret; } static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) @@ -1336,7 +1333,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } } -static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci, +static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target_io *tio, unsigned *len) { struct bio *clone = &tio->clone; @@ -1346,8 +1343,7 @@ static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci, __bio_clone_fast(clone, ci->bio); if (len) bio_setup_sector(clone, ci->sector, *len); - - return __map_bio(tio); + __map_bio(tio); } static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, @@ -1361,7 +1357,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, while ((bio = bio_list_pop(&blist))) { tio = container_of(bio, struct dm_target_io, clone); - (void) __clone_and_map_simple_bio(ci, tio, len); + __clone_and_map_simple_bio(ci, tio, len); } } @@ -1405,7 +1401,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, free_tio(tio); return r; } - (void) __map_bio(tio); + __map_bio(tio); return 0; } @@ -1520,11 +1516,10 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, /* * Entry point to split a bio into clones and submit them to the targets. */ -static blk_qc_t __split_and_process_bio(struct mapped_device *md, +static void __split_and_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { struct clone_info ci; - blk_qc_t ret = BLK_QC_T_NONE; int error = 0; init_clone_info(&ci, md, map, bio); @@ -1567,19 +1562,17 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, bio_chain(b, bio); trace_block_split(b, bio->bi_iter.bi_sector); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); } } /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); - return ret; } -static blk_qc_t dm_submit_bio(struct bio *bio) +static void dm_submit_bio(struct bio *bio) { struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; - blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; struct dm_table *map; @@ -1609,10 +1602,9 @@ static blk_qc_t dm_submit_bio(struct bio *bio) if (is_abnormal_io(bio)) blk_queue_split(&bio); - ret = __split_and_process_bio(md, map, bio); + __split_and_process_bio(md, map, bio); out: dm_put_live_table(md, srcu_idx); - return ret; } /*----------------------------------------------------------------- diff --git a/drivers/md/md.c b/drivers/md/md.c index ec09083ff0ef..22310d5d8d41 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -443,19 +443,19 @@ check_suspended: } EXPORT_SYMBOL(md_handle_request); -static blk_qc_t md_submit_bio(struct bio *bio) +static void md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); struct mddev *mddev = bio->bi_bdev->bd_disk->private_data; if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); - return BLK_QC_T_NONE; + return; } if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { bio_io_error(bio); - return BLK_QC_T_NONE; + return; } blk_queue_split(&bio); @@ -464,15 +464,13 @@ static blk_qc_t md_submit_bio(struct bio *bio) if (bio_sectors(bio) != 0) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - return BLK_QC_T_NONE; + return; } /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; md_handle_request(mddev, bio); - - return BLK_QC_T_NONE; } /* mddev_suspend makes sure no new requests are submitted diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 088d3dd6f6fa..b6c6866f9259 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk, return err; } -static blk_qc_t nd_blk_submit_bio(struct bio *bio) +static void nd_blk_submit_bio(struct bio *bio) { struct bio_integrity_payload *bip; struct nd_namespace_blk *nsblk = bio->bi_bdev->bd_disk->private_data; @@ -173,7 +173,7 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio) bool do_acct; if (!bio_integrity_prep(bio)) - return BLK_QC_T_NONE; + return; bip = bio_integrity(bio); rw = bio_data_dir(bio); @@ -199,7 +199,6 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio) bio_end_io_acct(bio, start); bio_endio(bio); - return BLK_QC_T_NONE; } static int nsblk_rw_bytes(struct nd_namespace_common *ndns, diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 92dec4952297..4295fa809420 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1440,7 +1440,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, return ret; } -static blk_qc_t btt_submit_bio(struct bio *bio) +static void btt_submit_bio(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct btt *btt = bio->bi_bdev->bd_disk->private_data; @@ -1451,7 +1451,7 @@ static blk_qc_t btt_submit_bio(struct bio *bio) bool do_acct; if (!bio_integrity_prep(bio)) - return BLK_QC_T_NONE; + return; do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue); if (do_acct) @@ -1483,7 +1483,6 @@ static blk_qc_t btt_submit_bio(struct bio *bio) bio_end_io_acct(bio, start); bio_endio(bio); - return BLK_QC_T_NONE; } static int btt_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ef4950f80832..a67a3ad1d413 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -190,7 +190,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem, return rc; } -static blk_qc_t pmem_submit_bio(struct bio *bio) +static void pmem_submit_bio(struct bio *bio) { int ret = 0; blk_status_t rc = 0; @@ -229,7 +229,6 @@ static blk_qc_t pmem_submit_bio(struct bio *bio) bio->bi_status = errno_to_blk_status(ret); bio_endio(bio); - return BLK_QC_T_NONE; } static int pmem_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index fba06618c6c2..ab78aa5d28c6 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -312,12 +312,11 @@ static bool nvme_available_path(struct nvme_ns_head *head) return false; } -static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) +static void nvme_ns_head_submit_bio(struct bio *bio) { struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data; struct device *dev = disk_to_dev(head->disk); struct nvme_ns *ns; - blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; /* @@ -334,7 +333,7 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) bio->bi_opf |= REQ_NVME_MPATH; trace_block_bio_remap(bio, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); } else if (nvme_available_path(head)) { dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); @@ -349,7 +348,6 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) } srcu_read_unlock(&head->srcu, srcu_idx); - return ret; } static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 5be3d1c39a78..59e513d34b0f 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -30,7 +30,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); -static blk_qc_t dcssblk_submit_bio(struct bio *bio); +static void dcssblk_submit_bio(struct bio *bio); static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); @@ -854,7 +854,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) up_write(&dcssblk_devices_sem); } -static blk_qc_t +static void dcssblk_submit_bio(struct bio *bio) { struct dcssblk_dev_info *dev_info; @@ -907,10 +907,9 @@ dcssblk_submit_bio(struct bio *bio) bytes_done += bvec.bv_len; } bio_endio(bio); - return BLK_QC_T_NONE; + return; fail: bio_io_error(bio); - return BLK_QC_T_NONE; } static long diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4a9077c52444..04090ba0ef73 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8248,7 +8248,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, return dip; } -static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, +static void btrfs_submit_direct(const struct iomap_iter *iter, struct bio *dio_bio, loff_t file_offset) { struct inode *inode = iter->inode; @@ -8278,7 +8278,7 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, } dio_bio->bi_status = BLK_STS_RESOURCE; bio_endio(dio_bio); - return BLK_QC_T_NONE; + return; } if (!write) { @@ -8372,15 +8372,13 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, free_extent_map(em); } while (submit_len > 0); - return BLK_QC_T_NONE; + return; out_err_em: free_extent_map(em); out_err: dip->dio_bio->bi_status = status; btrfs_dio_private_put(dip); - - return BLK_QC_T_NONE; } const struct iomap_ops btrfs_dio_iomap_ops = { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac0e11bbb445..9c5559faacda 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -915,7 +915,7 @@ const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read_iter = ext4_file_read_iter, .write_iter = ext4_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index c559827cb6f9..635f0e3f10ec 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1353,7 +1353,7 @@ const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, @@ -1386,7 +1386,7 @@ const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, .read_iter = gfs2_file_read_iter, .write_iter = gfs2_file_write_iter, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = gfs2_ioctl, .compat_ioctl = gfs2_compat_ioctl, .mmap = gfs2_mmap, diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 236aba256cd1..8efab177011d 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -38,8 +38,7 @@ struct iomap_dio { struct { struct iov_iter *iter; struct task_struct *waiter; - struct request_queue *last_queue; - blk_qc_t cookie; + struct bio *poll_bio; } submit; /* used for aio completion: */ @@ -49,29 +48,20 @@ struct iomap_dio { }; }; -int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags) -{ - struct request_queue *q = READ_ONCE(kiocb->private); - - if (!q) - return 0; - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); -} -EXPORT_SYMBOL_GPL(iomap_dio_iopoll); - static void iomap_dio_submit_bio(const struct iomap_iter *iter, struct iomap_dio *dio, struct bio *bio, loff_t pos) { atomic_inc(&dio->ref); - if (dio->iocb->ki_flags & IOCB_HIPRI) + if (dio->iocb->ki_flags & IOCB_HIPRI) { bio_set_polled(bio, dio->iocb); + dio->submit.poll_bio = bio; + } - dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev); if (dio->dops && dio->dops->submit_io) - dio->submit.cookie = dio->dops->submit_io(iter, bio, pos); + dio->dops->submit_io(iter, bio, pos); else - dio->submit.cookie = submit_bio(bio); + submit_bio(bio); } ssize_t iomap_dio_complete(struct iomap_dio *dio) @@ -164,9 +154,11 @@ static void iomap_dio_bio_end_io(struct bio *bio) } else if (dio->flags & IOMAP_DIO_WRITE) { struct inode *inode = file_inode(dio->iocb->ki_filp); + WRITE_ONCE(dio->iocb->private, NULL); INIT_WORK(&dio->aio.work, iomap_dio_complete_work); queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); } else { + WRITE_ONCE(dio->iocb->private, NULL); iomap_dio_complete_work(&dio->aio.work); } } @@ -497,8 +489,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->submit.iter = iter; dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; + dio->submit.poll_bio = NULL; if (iov_iter_rw(iter) == READ) { if (iomi.pos >= dio->i_size) @@ -611,8 +602,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (dio->flags & IOMAP_DIO_WRITE_FUA) dio->flags &= ~IOMAP_DIO_NEED_SYNC; - WRITE_ONCE(iocb->ki_cookie, dio->submit.cookie); - WRITE_ONCE(iocb->private, dio->submit.last_queue); + WRITE_ONCE(iocb->private, dio->submit.poll_bio); /* * We are about to drop our additional submission reference, which @@ -639,10 +629,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->submit.waiter)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !dio->submit.last_queue || - !blk_poll(dio->submit.last_queue, - dio->submit.cookie, 0)) + if (!dio->submit.poll_bio || + !bio_poll(dio->submit.poll_bio, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7aa943edfc02..62e7fbe4e54c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1452,7 +1452,7 @@ const struct file_operations xfs_file_operations = { .write_iter = xfs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index ddc346a9df9b..3ce5f47338cb 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1128,7 +1128,7 @@ static const struct file_operations zonefs_file_operations = { .write_iter = zonefs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .iopoll = iomap_dio_iopoll, + .iopoll = iocb_bio_iopoll, }; static struct kmem_cache *zonefs_inode_cachep; diff --git a/include/linux/bio.h b/include/linux/bio.h index c7a2d880e927..62d684b7dd4c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -349,7 +349,7 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); } -extern blk_qc_t submit_bio(struct bio *); +void submit_bio(struct bio *bio); extern void bio_endio(struct bio *); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2219e9277118..a9c1d0882550 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -359,9 +359,9 @@ struct blk_mq_hw_ctx { /** @kobj: Kernel object for sysfs. */ struct kobject kobj; - /** @poll_considered: Count times blk_poll() was called. */ + /** @poll_considered: Count times blk_mq_poll() was called. */ unsigned long poll_considered; - /** @poll_invoked: Count how many requests blk_poll() polled. */ + /** @poll_invoked: Count how many requests blk_mq_poll() polled. */ unsigned long poll_invoked; /** @poll_success: Count how many polled requests were completed. */ unsigned long poll_success; @@ -815,16 +815,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) -static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag != -1) - return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); - - return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | - BLK_QC_T_INTERNAL; -} - static inline void blk_mq_cleanup_rq(struct request *rq) { if (rq->q->mq_ops->cleanup_rq) @@ -843,7 +833,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, rq->rq_disk = bio->bi_bdev->bd_disk; } -blk_qc_t blk_mq_submit_bio(struct bio *bio); void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f8b9fce68834..72736b4c057c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -208,6 +208,9 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -227,8 +230,8 @@ struct bio { struct bvec_iter bi_iter; + blk_qc_t bi_cookie; bio_end_io_t *bi_end_io; - void *bi_private; #ifdef CONFIG_BLK_CGROUP /* @@ -384,7 +387,7 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ - __REQ_POLLED, /* caller polls for completion using blk_poll */ + __REQ_POLLED, /* caller polls for completion using bio_poll */ /* for driver use */ __REQ_DRV, @@ -495,11 +498,6 @@ static inline int op_stat_group(unsigned int op) return op_is_write(op); } -typedef unsigned int blk_qc_t; -#define BLK_QC_T_NONE -1U -#define BLK_QC_T_SHIFT 16 -#define BLK_QC_T_INTERNAL (1U << 31) - struct blk_rq_stat { u64 mean; u64 min; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2b80c98fc373..2a8689e949b4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -25,6 +25,7 @@ struct request; struct sg_io_hdr; struct blkcg_gq; struct blk_flush_queue; +struct kiocb; struct pr_ops; struct rq_qos; struct blk_queue_stats; @@ -550,7 +551,7 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -blk_qc_t submit_bio_noacct(struct bio *bio); +void submit_bio_noacct(struct bio *bio); extern int blk_lld_busy(struct request_queue *q); extern void blk_queue_split(struct bio **); @@ -568,7 +569,8 @@ blk_status_t errno_to_blk_status(int errno); #define BLK_POLL_ONESHOT (1 << 0) /* do not sleep to wait for the expected completion time */ #define BLK_POLL_NOSLEEP (1 << 1) -int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); +int bio_poll(struct bio *bio, unsigned int flags); +int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -1176,7 +1178,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { } struct block_device_operations { - blk_qc_t (*submit_bio) (struct bio *bio); + void (*submit_bio)(struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); diff --git a/include/linux/fs.h b/include/linux/fs.h index c443cddf414f..f595f4097cb7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -334,11 +334,7 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - union { - unsigned int ki_cookie; /* for ->iopoll */ - struct wait_page_queue *ki_waitq; /* for async buffered IO */ - }; - + struct wait_page_queue *ki_waitq; /* for async buffered IO */ randomized_struct_fields_end }; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1e86b65567c2..63f4ea4dac9b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -313,8 +313,8 @@ int iomap_writepages(struct address_space *mapping, struct iomap_dio_ops { int (*end_io)(struct kiocb *iocb, ssize_t size, int error, unsigned flags); - blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio, - loff_t file_offset); + void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, + loff_t file_offset); }; /* @@ -337,7 +337,6 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags); ssize_t iomap_dio_complete(struct iomap_dio *dio); -int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags); #ifdef CONFIG_SWAP struct file; diff --git a/mm/page_io.c b/mm/page_io.c index ed2eded74f3a..a68faab5b310 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -358,8 +358,6 @@ int swap_readpage(struct page *page, bool synchronous) struct bio *bio; int ret = 0; struct swap_info_struct *sis = page_swap_info(page); - blk_qc_t qc; - struct gendisk *disk; unsigned long pflags; VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); @@ -409,8 +407,6 @@ int swap_readpage(struct page *page, bool synchronous) bio->bi_iter.bi_sector = swap_page_sector(page); bio->bi_end_io = end_swap_bio_read; bio_add_page(bio, page, thp_size(page), 0); - - disk = bio->bi_bdev->bd_disk; /* * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. @@ -422,13 +418,13 @@ int swap_readpage(struct page *page, bool synchronous) } count_vm_event(PSWPIN); bio_get(bio); - qc = submit_bio(bio); + submit_bio(bio); while (synchronous) { set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio->bi_private)) break; - if (!blk_poll(disk->queue, qc, 0)) + if (!bio_poll(bio, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); -- cgit v1.2.3 From 5a72e899ceb465d731c413d57c6c12cdbf88303c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Oct 2021 09:24:29 -0600 Subject: block: add a struct io_comp_batch argument to fops->iopoll() struct io_comp_batch contains a list head and a completion handler, which will allow completions to more effciently completed batches of IO. For now, no functional changes in this patch, we just define the io_comp_batch structure and add the argument to the file_operations iopoll handler. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 9 +++++---- block/blk-exec.c | 2 +- block/blk-mq.c | 9 +++++---- block/blk-mq.h | 3 ++- block/fops.c | 4 ++-- drivers/block/rnbd/rnbd-clt.c | 2 +- drivers/nvme/host/pci.c | 4 ++-- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- fs/io_uring.c | 2 +- fs/iomap/direct-io.c | 2 +- include/linux/blk-mq.h | 2 +- include/linux/blkdev.h | 13 +++++++++++-- include/linux/fs.h | 4 +++- mm/page_io.c | 2 +- 16 files changed, 39 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/block/blk-core.c b/block/blk-core.c index 20b6cc06461a..d0c2e11411d0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1078,7 +1078,7 @@ EXPORT_SYMBOL(submit_bio); * Note: the caller must either be the context that submitted @bio, or * be in a RCU critical section to prevent freeing of @bio. */ -int bio_poll(struct bio *bio, unsigned int flags) +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) { struct request_queue *q = bio->bi_bdev->bd_disk->queue; blk_qc_t cookie = READ_ONCE(bio->bi_cookie); @@ -1096,7 +1096,7 @@ int bio_poll(struct bio *bio, unsigned int flags) if (WARN_ON_ONCE(!queue_is_mq(q))) ret = 0; /* not yet implemented, should not happen */ else - ret = blk_mq_poll(q, cookie, flags); + ret = blk_mq_poll(q, cookie, iob, flags); blk_queue_exit(q); return ret; } @@ -1106,7 +1106,8 @@ EXPORT_SYMBOL_GPL(bio_poll); * Helper to implement file_operations.iopoll. Requires the bio to be stored * in iocb->private, and cleared before freeing the bio. */ -int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags) +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags) { struct bio *bio; int ret = 0; @@ -1134,7 +1135,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags) rcu_read_lock(); bio = READ_ONCE(kiocb->private); if (bio && bio->bi_bdev) - ret = bio_poll(bio, flags); + ret = bio_poll(bio, iob, flags); rcu_read_unlock(); return ret; diff --git a/block/blk-exec.c b/block/blk-exec.c index 55f0cd34b37b..1b8b47f6e79b 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -77,7 +77,7 @@ static bool blk_rq_is_poll(struct request *rq) static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { do { - bio_poll(rq->bio, 0); + bio_poll(rq->bio, NULL, 0); cond_resched(); } while (!completion_done(wait)); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 74505b545dd3..79c25b64e8b0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4174,14 +4174,14 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) } static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, - unsigned int flags) + struct io_comp_batch *iob, unsigned int flags) { struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); long state = get_current_state(); int ret; do { - ret = q->mq_ops->poll(hctx); + ret = q->mq_ops->poll(hctx, iob); if (ret > 0) { __set_current_state(TASK_RUNNING); return ret; @@ -4201,14 +4201,15 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, return 0; } -int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, + unsigned int flags) { if (!(flags & BLK_POLL_NOSLEEP) && q->poll_nsec != BLK_MQ_POLL_CLASSIC) { if (blk_mq_poll_hybrid(q, cookie)) return 1; } - return blk_mq_poll_classic(q, cookie, flags); + return blk_mq_poll_classic(q, cookie, iob, flags); } unsigned int blk_mq_rq_cpu(struct request *rq) diff --git a/block/blk-mq.h b/block/blk-mq.h index 1b91a3fdaa01..ebf67f4d4f2e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -31,7 +31,8 @@ struct blk_mq_ctx { } ____cacheline_aligned_in_smp; void blk_mq_submit_bio(struct bio *bio); -int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, + unsigned int flags); void blk_mq_exit_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); diff --git a/block/fops.c b/block/fops.c index 1d4f862950bb..2c43e493e37c 100644 --- a/block/fops.c +++ b/block/fops.c @@ -105,7 +105,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio.bi_private)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0)) + if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -291,7 +291,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (!READ_ONCE(dio->waiter)) break; - if (!do_poll || !bio_poll(bio, 0)) + if (!do_poll || !bio_poll(bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index bd4a41afbbfc..0ec0191d4196 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1176,7 +1176,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx) +static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct rnbd_queue *q = hctx->driver_data; struct rnbd_clt_dev *dev = q->dev; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 896328271471..bb0482dfab3c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1092,7 +1092,7 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } -static int nvme_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_queue *nvmeq = hctx->driver_data; bool found; @@ -1274,7 +1274,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Did we miss an interrupt? */ if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) - nvme_poll(req->mq_hctx); + nvme_poll(req->mq_hctx, NULL); else nvme_poll_irqdisable(nvmeq); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 40317e1b9183..1624da3702d4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2106,7 +2106,7 @@ unmap_qe: return ret; } -static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_rdma_queue *queue = hctx->driver_data; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 3c1c29dd3020..9ce3458ee1dd 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2429,7 +2429,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) return 0; } -static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 33fd9a01330c..30f7d0b4eb73 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1784,7 +1784,7 @@ static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq, } -static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx) +static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct Scsi_Host *shost = hctx->driver_data; diff --git a/fs/io_uring.c b/fs/io_uring.c index c5066146b8de..cd77a137f2d8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2483,7 +2483,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, poll_flags); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 8efab177011d..83ecfba53abe 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -630,7 +630,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, break; if (!dio->submit.poll_bio || - !bio_poll(dio->submit.poll_bio, 0)) + !bio_poll(dio->submit.poll_bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9fb8618fb957..4c79439af2f2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -532,7 +532,7 @@ struct blk_mq_ops { /** * @poll: Called to poll for completion of a specific tag. */ - int (*poll)(struct blk_mq_hw_ctx *); + int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); /** * @complete: Mark the request as complete. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b0a322172965..fd9771a1da09 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -569,8 +569,9 @@ blk_status_t errno_to_blk_status(int errno); #define BLK_POLL_ONESHOT (1 << 0) /* do not sleep to wait for the expected completion time */ #define BLK_POLL_NOSLEEP (1 << 1) -int bio_poll(struct bio *bio, unsigned int flags); -int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags); +int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); +int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, + unsigned int flags); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -1298,6 +1299,14 @@ int fsync_bdev(struct block_device *bdev); int freeze_bdev(struct block_device *bdev); int thaw_bdev(struct block_device *bdev); +struct io_comp_batch { + struct request *req_list; + bool need_ts; + void (*complete)(struct io_comp_batch *); +}; + +#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } + #define rq_list_add(listptr, rq) do { \ (rq)->rq_next = *(listptr); \ *(listptr) = rq; \ diff --git a/include/linux/fs.h b/include/linux/fs.h index f595f4097cb7..31029a91f440 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -48,6 +48,7 @@ struct backing_dev_info; struct bdi_writeback; struct bio; +struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; @@ -2071,7 +2072,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); - int (*iopoll)(struct kiocb *kiocb, unsigned int flags); + int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, + unsigned int flags); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); diff --git a/mm/page_io.c b/mm/page_io.c index a68faab5b310..6010fb07f231 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -424,7 +424,7 @@ int swap_readpage(struct page *page, bool synchronous) if (!READ_ONCE(bio->bi_private)) break; - if (!bio_poll(bio, 0)) + if (!bio_poll(bio, NULL, 0)) blk_io_schedule(); } __set_current_state(TASK_RUNNING); -- cgit v1.2.3 From b688f11e86c9a22169a0e522530982735d2db19b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Oct 2021 09:28:46 -0600 Subject: io_uring: utilize the io batching infrastructure for more efficient polled IO Wire up using an io_comp_batch for f_op->iopoll(). If the lower stack supports it, we can handle high rates of polled IO more efficiently. This raises the single core efficiency on my system from ~6.1M IOPS to ~6.6M IOPS running a random read workload at depth 128 on two gen2 Optane drives. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index cd77a137f2d8..d4631a55a692 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2458,6 +2458,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, { struct io_kiocb *req, *tmp; unsigned int poll_flags = BLK_POLL_NOSLEEP; + DEFINE_IO_COMP_BATCH(iob); LIST_HEAD(done); /* @@ -2483,17 +2484,20 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, if (!list_empty(&done)) break; - ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags); + ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); if (unlikely(ret < 0)) return ret; else if (ret) poll_flags |= BLK_POLL_ONESHOT; /* iopoll may have completed current req */ - if (READ_ONCE(req->iopoll_completed)) + if (!rq_list_empty(iob.req_list) || + READ_ONCE(req->iopoll_completed)) list_move_tail(&req->inflight_entry, &done); } + if (!rq_list_empty(iob.req_list)) + iob.complete(&iob); if (!list_empty(&done)) io_iopoll_complete(ctx, nr_events, &done); -- cgit v1.2.3 From 008f75a20e7072d0840ec323c39b42206f3fa8a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Oct 2021 16:41:19 +0200 Subject: block: cleanup the flush plug helpers Consolidate the various helpers into a single blk_flush_plug helper that takes a plk_plug and the from_scheduler bool and switch all callsites to call it directly. Checks that the plug is non-NULL must be performed by the caller, something that most already do anyway. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211020144119.142582-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 13 ++++++------- fs/fs-writeback.c | 5 +++-- include/linux/blkdev.h | 29 ++++------------------------- kernel/sched/core.c | 5 +++-- 4 files changed, 16 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/block/blk-core.c b/block/blk-core.c index db8b2fe0ceaf..dfa199312c2f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1089,7 +1089,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) return 0; if (current->plug) - blk_flush_plug_list(current->plug, false); + blk_flush_plug(current->plug, false); if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) return 0; @@ -1637,7 +1637,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, } EXPORT_SYMBOL(blk_check_plugged); -void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) +void blk_flush_plug(struct blk_plug *plug, bool from_schedule) { if (!list_empty(&plug->cb_list)) flush_plug_callbacks(plug, from_schedule); @@ -1659,11 +1659,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) */ void blk_finish_plug(struct blk_plug *plug) { - if (plug != current->plug) - return; - blk_flush_plug_list(plug, false); - - current->plug = NULL; + if (plug == current->plug) { + blk_flush_plug(plug, false); + current->plug = NULL; + } } EXPORT_SYMBOL(blk_finish_plug); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 81ec192ce067..4124a89a1a5d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1893,7 +1893,8 @@ static long writeback_sb_inodes(struct super_block *sb, * unplug, so get our IOs out the door before we * give up the CPU. */ - blk_flush_plug(current); + if (current->plug) + blk_flush_plug(current->plug, false); cond_resched(); } @@ -2291,7 +2292,7 @@ void wakeup_flusher_threads(enum wb_reason reason) * If we are expecting writeback progress we must submit plugged IO. */ if (blk_needs_flush_plug(current)) - blk_schedule_flush_plug(current); + blk_flush_plug(current->plug, true); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2b22fa36e568..c7b1e9355123 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -725,9 +725,8 @@ extern void blk_set_queue_dying(struct request_queue *); * as the lock contention for request_queue lock is reduced. * * It is ok not to disable preemption when adding the request to the plug list - * or when attempting a merge, because blk_schedule_flush_list() will only flush - * the plug list when the task sleeps by itself. For details, please see - * schedule() where blk_schedule_flush_plug() is called. + * or when attempting a merge. For details, please see schedule() where + * blk_flush_plug() is called. */ struct blk_plug { struct request *mq_list; /* blk-mq requests */ @@ -757,23 +756,8 @@ extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); -extern void blk_flush_plug_list(struct blk_plug *, bool); -static inline void blk_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - if (plug) - blk_flush_plug_list(plug, false); -} - -static inline void blk_schedule_flush_plug(struct task_struct *tsk) -{ - struct blk_plug *plug = tsk->plug; - - if (plug) - blk_flush_plug_list(plug, true); -} +void blk_flush_plug(struct blk_plug *plug, bool from_schedule); static inline bool blk_needs_flush_plug(struct task_struct *tsk) { @@ -802,15 +786,10 @@ static inline void blk_finish_plug(struct blk_plug *plug) { } -static inline void blk_flush_plug(struct task_struct *task) -{ -} - -static inline void blk_schedule_flush_plug(struct task_struct *task) +static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } - static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 92ef7b68198c..34f37502c27e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6343,7 +6343,7 @@ static inline void sched_submit_work(struct task_struct *tsk) * make sure to submit it to avoid deadlocks. */ if (blk_needs_flush_plug(tsk)) - blk_schedule_flush_plug(tsk); + blk_flush_plug(tsk->plug, true); } static void sched_update_worker(struct task_struct *tsk) @@ -8354,7 +8354,8 @@ int io_schedule_prepare(void) int old_iowait = current->in_iowait; current->in_iowait = 1; - blk_schedule_flush_plug(current); + if (current->plug) + blk_flush_plug(current->plug, true); return old_iowait; } -- cgit v1.2.3