From 9cd1e566676bbcb8a126acd921e4e194e6339603 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Mar 2023 11:39:02 -0700 Subject: blk-mq: release crypto keyslot before reporting I/O complete Once all I/O using a blk_crypto_key has completed, filesystems can call blk_crypto_evict_key(). However, the block layer currently doesn't call blk_crypto_put_keyslot() until the request is being freed, which happens after upper layers have been told (via bio_endio()) the I/O has completed. This causes a race condition where blk_crypto_evict_key() can see 'slot_refs != 0' without there being an actual bug. This makes __blk_crypto_evict_key() hit the 'WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)' and return without doing anything, eventually causing a use-after-free in blk_crypto_reprogram_all_keys(). (This is a very rare bug and has only been seen when per-file keys are being used with fscrypt.) There are two options to fix this: either release the keyslot before bio_endio() is called on the request's last bio, or make __blk_crypto_evict_key() ignore slot_refs. Let's go with the first solution, since it preserves the ability to report bugs (via WARN_ON_ONCE) where a key is evicted while still in-use. Fixes: a892c8d52c02 ("block: Inline encryption support for blk-mq") Cc: stable@vger.kernel.org Reviewed-by: Nathan Huckleberry Reviewed-by: Christoph Hellwig Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20230315183907.53675-2-ebiggers@kernel.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index d0cb2ef18fe2..49825538d932 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -840,6 +840,12 @@ static void blk_complete_request(struct request *req) req->q->integrity.profile->complete_fn(req, total_bytes); #endif + /* + * Upper layers may call blk_crypto_evict_key() anytime after the last + * bio_endio(). Therefore, the keyslot must be released before that. + */ + blk_crypto_rq_put_keyslot(req); + blk_account_io_completion(req, total_bytes); do { @@ -905,6 +911,13 @@ bool blk_update_request(struct request *req, blk_status_t error, req->q->integrity.profile->complete_fn(req, nr_bytes); #endif + /* + * Upper layers may call blk_crypto_evict_key() anytime after the last + * bio_endio(). Therefore, the keyslot must be released before that. + */ + if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req)) + __blk_crypto_rq_put_keyslot(req); + if (unlikely(error && !blk_rq_is_passthrough(req) && !(req->rq_flags & RQF_QUIET)) && !test_bit(GD_DEAD, &req->q->disk->state)) { @@ -2967,7 +2980,7 @@ void blk_mq_submit_bio(struct bio *bio) blk_mq_bio_to_request(rq, bio, nr_segs); - ret = blk_crypto_init_request(rq); + ret = blk_crypto_rq_get_keyslot(rq); if (ret != BLK_STS_OK) { bio->bi_status = ret; bio_endio(bio); -- cgit v1.2.3 From 435c0e999689b7f383d0a27978cdaa08669cf134 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Mar 2023 11:39:05 -0700 Subject: blk-crypto: remove blk_crypto_insert_cloned_request() blk_crypto_insert_cloned_request() is the same as blk_crypto_rq_get_keyslot(), so just use that directly. Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230315183907.53675-2-ebiggers@kernel.org Signed-off-by: Jens Axboe --- Documentation/block/inline-encryption.rst | 3 +-- block/blk-crypto-internal.h | 15 --------------- block/blk-mq.c | 2 +- 3 files changed, 2 insertions(+), 18 deletions(-) (limited to 'block/blk-mq.c') diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index f9bf18ea6509..90b733422ed4 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -270,8 +270,7 @@ Request queue based layered devices like dm-rq that wish to support inline encryption need to create their own blk_crypto_profile for their request_queue, and expose whatever functionality they choose. When a layered device wants to pass a clone of that request to another request_queue, blk-crypto will -initialize and prepare the clone as necessary; see -``blk_crypto_insert_cloned_request()``. +initialize and prepare the clone as necessary. Interaction between inline encryption and blk integrity ======================================================= diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 4f1de2495f0c..93a141979694 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -205,21 +205,6 @@ static inline int blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio, return 0; } -/** - * blk_crypto_insert_cloned_request - Prepare a cloned request to be inserted - * into a request queue. - * @rq: the request being queued - * - * Return: BLK_STS_OK on success, nonzero on error. - */ -static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq) -{ - - if (blk_crypto_rq_is_encrypted(rq)) - return blk_crypto_rq_get_keyslot(rq); - return BLK_STS_OK; -} - #ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num); diff --git a/block/blk-mq.c b/block/blk-mq.c index 49825538d932..5e819de2f5e7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3049,7 +3049,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq) if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; - if (blk_crypto_insert_cloned_request(rq)) + if (blk_crypto_rq_get_keyslot(rq)) return BLK_STS_IOERR; blk_account_io_start(rq); -- cgit v1.2.3 From 5b8562f0e87b9bc7e581fc482e5a242885f79b88 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Mar 2023 11:39:06 -0700 Subject: blk-mq: return actual keyslot error in blk_insert_cloned_request() To avoid hiding information, pass on the error code from blk_crypto_rq_get_keyslot() instead of always using BLK_STS_IOERR. Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230315183907.53675-2-ebiggers@kernel.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5e819de2f5e7..a875b1cdff9b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3049,8 +3049,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq) if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; - if (blk_crypto_rq_get_keyslot(rq)) - return BLK_STS_IOERR; + ret = blk_crypto_rq_get_keyslot(rq); + if (ret != BLK_STS_OK) + return ret; blk_account_io_start(rq); -- cgit v1.2.3 From 54bdd67d0f88489ac88f7664b56cb7c93799d84d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 20 Mar 2023 12:49:26 -0700 Subject: blk-mq: remove hybrid polling io_uring provides the only way user space can poll completions, and that always sets BLK_POLL_NOSLEEP. This effectively makes hybrid polling dead code, so remove it and everything supporting it. Hybrid polling was effectively killed off with 9650b453a3d4b1, "block: ignore RWF_HIPRI hint for sync dio", but still potentially reachable through io_uring until d729cf9acb93119, "io_uring: don't sleep when polling for I/O", but hybrid polling probably should not have been reachable through that async interface from the beginning. Fixes: 9650b453a3d4 ("block: ignore RWF_HIPRI hint for sync dio") Fixes: d729cf9acb93 ("io_uring: don't sleep when polling for I/O") Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20230320194926.3353144-1-kbusch@meta.com Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 15 +-- block/blk-core.c | 6 - block/blk-mq-debugfs.c | 26 ----- block/blk-mq.c | 205 +---------------------------------- block/blk-stat.c | 18 --- block/blk-sysfs.c | 25 +---- include/linux/blk-mq.h | 2 - include/linux/blkdev.h | 12 -- io_uring/rw.c | 2 +- 9 files changed, 12 insertions(+), 299 deletions(-) (limited to 'block/blk-mq.c') diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 282de3680367..c57e5b7cb532 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -336,18 +336,11 @@ What: /sys/block//queue/io_poll_delay Date: November 2016 Contact: linux-block@vger.kernel.org Description: - [RW] If polling is enabled, this controls what kind of polling - will be performed. It defaults to -1, which is classic polling. + [RW] This was used to control what kind of polling will be + performed. It is now fixed to -1, which is classic polling. In this mode, the CPU will repeatedly ask for completions - without giving up any time. If set to 0, a hybrid polling mode - is used, where the kernel will attempt to make an educated guess - at when the IO will complete. Based on this guess, the kernel - will put the process issuing IO to sleep for an amount of time, - before entering a classic poll loop. This mode might be a little - slower than pure classic polling, but it will be more efficient. - If set to a value larger than 0, the kernel will put the process - issuing IO to sleep for this amount of microseconds before - entering classic polling. + without giving up any time. + What: /sys/block//queue/io_timeout diff --git a/block/blk-core.c b/block/blk-core.c index 9e5e0277a4d9..269765d16cfd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) static void blk_free_queue(struct request_queue *q) { - if (q->poll_stat) - blk_stat_remove_callback(q, q->poll_cb); - blk_stat_free_callback(q->poll_cb); - blk_free_queue_stats(q->stats); - kfree(q->poll_stat); - if (queue_is_mq(q)) blk_mq_release(q); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b01818f8e216..212a7f301e73 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -15,33 +15,8 @@ #include "blk-mq-tag.h" #include "blk-rq-qos.h" -static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) -{ - if (stat->nr_samples) { - seq_printf(m, "samples=%d, mean=%llu, min=%llu, max=%llu", - stat->nr_samples, stat->mean, stat->min, stat->max); - } else { - seq_puts(m, "samples=0"); - } -} - static int queue_poll_stat_show(void *data, struct seq_file *m) { - struct request_queue *q = data; - int bucket; - - if (!q->poll_stat) - return 0; - - for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) { - seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket)); - print_stat(m, &q->poll_stat[2 * bucket]); - seq_puts(m, "\n"); - - seq_printf(m, "write (%d Bytes): ", 1 << (9 + bucket)); - print_stat(m, &q->poll_stat[2 * bucket + 1]); - seq_puts(m, "\n"); - } return 0; } @@ -282,7 +257,6 @@ static const char *const rqf_name[] = { RQF_NAME(STATS), RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(ZONE_WRITE_LOCKED), - RQF_NAME(MQ_POLL_SLEPT), RQF_NAME(TIMED_OUT), RQF_NAME(ELV), RQF_NAME(RESV), diff --git a/block/blk-mq.c b/block/blk-mq.c index a875b1cdff9b..4e30459df815 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -46,51 +46,15 @@ static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); -static void blk_mq_poll_stats_start(struct request_queue *q); -static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); - -static int blk_mq_poll_stats_bkt(const struct request *rq) -{ - int ddir, sectors, bucket; - - ddir = rq_data_dir(rq); - sectors = blk_rq_stats_sectors(rq); - - bucket = ddir + 2 * ilog2(sectors); - - if (bucket < 0) - return -1; - else if (bucket >= BLK_MQ_POLL_STATS_BKTS) - return ddir + BLK_MQ_POLL_STATS_BKTS - 2; - - return bucket; -} - -#define BLK_QC_T_SHIFT 16 -#define BLK_QC_T_INTERNAL (1U << 31) - static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, blk_qc_t qc) { - return xa_load(&q->hctx_table, - (qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT); -} - -static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx, - blk_qc_t qc) -{ - unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1); - - if (qc & BLK_QC_T_INTERNAL) - return blk_mq_tag_to_rq(hctx->sched_tags, tag); - return blk_mq_tag_to_rq(hctx->tags, tag); + return xa_load(&q->hctx_table, qc); } static inline blk_qc_t blk_rq_to_qc(struct request *rq) { - return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) | - (rq->tag != -1 ? - rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL)); + return rq->mq_hctx->queue_num; } /* @@ -1038,10 +1002,8 @@ static inline void blk_account_io_start(struct request *req) static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) { - if (rq->rq_flags & RQF_STATS) { - blk_mq_poll_stats_start(rq->q); + if (rq->rq_flags & RQF_STATS) blk_stat_add(rq, now); - } blk_mq_sched_completed_request(rq, now); blk_account_io_done(rq, now); @@ -4222,14 +4184,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* mark the queue as mq asap */ q->mq_ops = set->ops; - q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, - blk_mq_poll_stats_bkt, - BLK_MQ_POLL_STATS_BKTS, q); - if (!q->poll_cb) - goto err_exit; - if (blk_mq_alloc_ctxs(q)) - goto err_poll; + goto err_exit; /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); @@ -4257,11 +4213,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->nr_requests = set->queue_depth; - /* - * Default to classic polling - */ - q->poll_nsec = BLK_MQ_POLL_CLASSIC; - blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q); @@ -4269,9 +4220,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, err_hctxs: blk_mq_release(q); -err_poll: - blk_stat_free_callback(q->poll_cb); - q->poll_cb = NULL; err_exit: q->mq_ops = NULL; return -ENOMEM; @@ -4768,138 +4716,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); -/* Enable polling stats and return whether they were already enabled. */ -static bool blk_poll_stats_enable(struct request_queue *q) -{ - if (q->poll_stat) - return true; - - return blk_stats_alloc_enable(q); -} - -static void blk_mq_poll_stats_start(struct request_queue *q) -{ - /* - * We don't arm the callback if polling stats are not enabled or the - * callback is already active. - */ - if (!q->poll_stat || blk_stat_is_active(q->poll_cb)) - return; - - blk_stat_activate_msecs(q->poll_cb, 100); -} - -static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb) -{ - struct request_queue *q = cb->data; - int bucket; - - for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) { - if (cb->stat[bucket].nr_samples) - q->poll_stat[bucket] = cb->stat[bucket]; - } -} - -static unsigned long blk_mq_poll_nsecs(struct request_queue *q, - struct request *rq) -{ - unsigned long ret = 0; - int bucket; - - /* - * If stats collection isn't on, don't sleep but turn it on for - * future users - */ - if (!blk_poll_stats_enable(q)) - return 0; - - /* - * As an optimistic guess, use half of the mean service time - * for this type of request. We can (and should) make this smarter. - * For instance, if the completion latencies are tight, we can - * get closer than just half the mean. This is especially - * important on devices where the completion latencies are longer - * than ~10 usec. We do use the stats for the relevant IO size - * if available which does lead to better estimates. - */ - bucket = blk_mq_poll_stats_bkt(rq); - if (bucket < 0) - return ret; - - if (q->poll_stat[bucket].nr_samples) - ret = (q->poll_stat[bucket].mean + 1) / 2; - - return ret; -} - -static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) -{ - struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc); - struct request *rq = blk_qc_to_rq(hctx, qc); - struct hrtimer_sleeper hs; - enum hrtimer_mode mode; - unsigned int nsecs; - ktime_t kt; - - /* - * If a request has completed on queue that uses an I/O scheduler, we - * won't get back a request from blk_qc_to_rq. - */ - if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT)) - return false; - - /* - * If we get here, hybrid polling is enabled. Hence poll_nsec can be: - * - * 0: use half of prev avg - * >0: use this specific value - */ - if (q->poll_nsec > 0) - nsecs = q->poll_nsec; - else - nsecs = blk_mq_poll_nsecs(q, rq); - - if (!nsecs) - return false; - - rq->rq_flags |= RQF_MQ_POLL_SLEPT; - - /* - * This will be replaced with the stats tracking code, using - * 'avg_completion_time / 2' as the pre-sleep target. - */ - kt = nsecs; - - mode = HRTIMER_MODE_REL; - hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode); - hrtimer_set_expires(&hs.timer, kt); - - do { - if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) - break; - set_current_state(TASK_UNINTERRUPTIBLE); - hrtimer_sleeper_start_expires(&hs, mode); - if (hs.task) - io_schedule(); - hrtimer_cancel(&hs.timer); - mode = HRTIMER_MODE_ABS; - } while (hs.task && !signal_pending(current)); - - __set_current_state(TASK_RUNNING); - destroy_hrtimer_on_stack(&hs.timer); - - /* - * If we sleep, have the caller restart the poll loop to reset the - * state. Like for the other success return cases, the caller is - * responsible for checking if the IO completed. If the IO isn't - * complete, we'll get called again and will go straight to the busy - * poll loop. - */ - return true; -} - -static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, - struct io_comp_batch *iob, unsigned int flags) +int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, + unsigned int flags) { struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); long state = get_current_state(); @@ -4926,17 +4744,6 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, return 0; } -int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, - unsigned int flags) -{ - if (!(flags & BLK_POLL_NOSLEEP) && - q->poll_nsec != BLK_MQ_POLL_CLASSIC) { - if (blk_mq_poll_hybrid(q, cookie)) - return 1; - } - return blk_mq_poll_classic(q, cookie, iob, flags); -} - unsigned int blk_mq_rq_cpu(struct request *rq) { return rq->mq_ctx->cpu; diff --git a/block/blk-stat.c b/block/blk-stat.c index c6ca16abf911..74a1a8c32d86 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -231,21 +231,3 @@ void blk_free_queue_stats(struct blk_queue_stats *stats) kfree(stats); } - -bool blk_stats_alloc_enable(struct request_queue *q) -{ - struct blk_rq_stat *poll_stat; - - poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat), - GFP_ATOMIC); - if (!poll_stat) - return false; - - if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) { - kfree(poll_stat); - return true; - } - - blk_stat_add_callback(q, q->poll_cb); - return false; -} diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f1fce1c7fa44..1a743b4f2958 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -408,35 +408,12 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_poll_delay_show(struct request_queue *q, char *page) { - int val; - - if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) - val = BLK_MQ_POLL_CLASSIC; - else - val = q->poll_nsec / 1000; - - return sprintf(page, "%d\n", val); + return sprintf(page, "%d\n", -1); } static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, size_t count) { - int err, val; - - if (!q->mq_ops || !q->mq_ops->poll) - return -EINVAL; - - err = kstrtoint(page, 10, &val); - if (err < 0) - return err; - - if (val == BLK_MQ_POLL_CLASSIC) - q->poll_nsec = BLK_MQ_POLL_CLASSIC; - else if (val >= 0) - q->poll_nsec = val * 1000; - else - return -EINVAL; - return count; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index dd5ce1137f04..1dacb2c81fdd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,8 +57,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) /* The per-zone write lock is held for this request */ #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) -/* already slept for hybrid poll */ -#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) /* queue has elevator attached */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d1aee08f8c18..6ede578dfbc6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -44,12 +44,6 @@ extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; -/* Must be consistent with blk_mq_poll_stats_bkt() */ -#define BLK_MQ_POLL_STATS_BKTS 16 - -/* Doing classic polling */ -#define BLK_MQ_POLL_CLASSIC -1 - /* * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. @@ -468,10 +462,6 @@ struct request_queue { #endif unsigned int rq_timeout; - int poll_nsec; - - struct blk_stat_callback *poll_cb; - struct blk_rq_stat *poll_stat; struct timer_list timeout; struct work_struct timeout_work; @@ -870,8 +860,6 @@ blk_status_t errno_to_blk_status(int errno); /* only poll the hardware once, don't continue until a completion was found */ #define BLK_POLL_ONESHOT (1 << 0) -/* do not sleep to wait for the expected completion time */ -#define BLK_POLL_NOSLEEP (1 << 1) int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, unsigned int flags); diff --git a/io_uring/rw.c b/io_uring/rw.c index 4c233910e200..a099dc0543d9 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1002,7 +1002,7 @@ void io_rw_fail(struct io_kiocb *req) int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) { struct io_wq_work_node *pos, *start, *prev; - unsigned int poll_flags = BLK_POLL_NOSLEEP; + unsigned int poll_flags = 0; DEFINE_IO_COMP_BATCH(iob); int nr_events = 0; -- cgit v1.2.3 From e165fb4dd6985b37215178e514a2e09dab8fef14 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 27 Mar 2023 00:34:26 -0700 Subject: block: open code __blk_account_io_start() There is only one caller for __blk_account_io_start(), the function is small enough to fit in its caller blk_account_io_start(). Remove the function and opencode in the its caller blk_account_io_start(). Signed-off-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20230327073427.4403-2-kch@nvidia.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 4e30459df815..eb091f42cfc1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -976,28 +976,24 @@ static inline void blk_account_io_done(struct request *req, u64 now) __blk_account_io_done(req, now); } -static void __blk_account_io_start(struct request *rq) -{ - /* - * All non-passthrough requests are created from a bio with one - * exception: when a flush command that is part of a flush sequence - * generated by the state machine in blk-flush.c is cloned onto the - * lower device by dm-multipath we can get here without a bio. - */ - if (rq->bio) - rq->part = rq->bio->bi_bdev; - else - rq->part = rq->q->disk->part0; - - part_stat_lock(); - update_io_ticks(rq->part, jiffies, false); - part_stat_unlock(); -} - static inline void blk_account_io_start(struct request *req) { - if (blk_do_io_stat(req)) - __blk_account_io_start(req); + if (blk_do_io_stat(req)) { + /* + * All non-passthrough requests are created from a bio with one + * exception: when a flush command that is part of a flush sequence + * generated by the state machine in blk-flush.c is cloned onto the + * lower device by dm-multipath we can get here without a bio. + */ + if (req->bio) + req->part = req->bio->bi_bdev; + else + req->part = req->q->disk->part0; + + part_stat_lock(); + update_io_ticks(req->part, jiffies, false); + part_stat_unlock(); + } } static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) -- cgit v1.2.3 From 06965037ce942500c1ce3aa29ca217093a9c5720 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 27 Mar 2023 00:34:27 -0700 Subject: block: open code __blk_account_io_done() There is only one caller for __blk_account_io_done(), the function is small enough to fit in its caller blk_account_io_done(). Remove the function and opencode in the its caller blk_account_io_done(). Signed-off-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20230327073427.4403-2-kch@nvidia.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index eb091f42cfc1..52f8e0099c7f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -953,17 +953,6 @@ bool blk_update_request(struct request *req, blk_status_t error, } EXPORT_SYMBOL_GPL(blk_update_request); -static void __blk_account_io_done(struct request *req, u64 now) -{ - const int sgrp = op_stat_group(req_op(req)); - - part_stat_lock(); - update_io_ticks(req->part, jiffies, true); - part_stat_inc(req->part, ios[sgrp]); - part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); - part_stat_unlock(); -} - static inline void blk_account_io_done(struct request *req, u64 now) { /* @@ -972,8 +961,15 @@ static inline void blk_account_io_done(struct request *req, u64 now) * containing request is enough. */ if (blk_do_io_stat(req) && req->part && - !(req->rq_flags & RQF_FLUSH_SEQ)) - __blk_account_io_done(req, now); + !(req->rq_flags & RQF_FLUSH_SEQ)) { + const int sgrp = op_stat_group(req_op(req)); + + part_stat_lock(); + update_io_ticks(req->part, jiffies, true); + part_stat_inc(req->part, ios[sgrp]); + part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); + part_stat_unlock(); + } } static inline void blk_account_io_start(struct request *req) -- cgit v1.2.3 From 50947d7fe9fa6abe3ddc40769dfb02a51c58edb6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:38 +0200 Subject: blk-mq: don't plug for head insertions in blk_execute_rq_nowait Plugs never insert at head, so don't plug for head insertions. Fixes: 1c2d2fff6dc0 ("block: wire-up support for passthrough plugging") Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 52f8e0099c7f..7908d19f1408 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1299,7 +1299,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head) * device, directly accessing the plug instead of using blk_mq_plug() * should not have any consequences. */ - if (current->plug) + if (current->plug && !at_head) blk_add_rq_to_plug(current->plug, rq); else blk_mq_sched_insert_request(rq, at_head, true, false); -- cgit v1.2.3 From bebe84ebeec4d030aa65af58376305749762e5a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:39 +0200 Subject: blk-mq: remove blk-mq-tag.h blk-mq-tag.h is always included by blk-mq.h, and causes recursive inclusion hell with further changes. Just merge it into blk-mq.h instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-3-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 1 - block/blk-flush.c | 1 - block/blk-mq-debugfs.c | 1 - block/blk-mq-sched.c | 1 - block/blk-mq-sched.h | 1 - block/blk-mq-sysfs.c | 1 - block/blk-mq-tag.c | 1 - block/blk-mq-tag.h | 73 -------------------------------------------------- block/blk-mq.c | 1 - block/blk-mq.h | 61 ++++++++++++++++++++++++++++++++++++++++- block/blk-pm.c | 1 - block/kyber-iosched.c | 1 - block/mq-deadline.c | 1 - 13 files changed, 60 insertions(+), 85 deletions(-) delete mode 100644 block/blk-mq-tag.h (limited to 'block/blk-mq.c') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index d9ed3108c17a..37f68c907ac0 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -129,7 +129,6 @@ #include "elevator.h" #include "blk.h" #include "blk-mq.h" -#include "blk-mq-tag.h" #include "blk-mq-sched.h" #include "bfq-iosched.h" #include "blk-wbt.h" diff --git a/block/blk-flush.c b/block/blk-flush.c index 53202eff545e..a13a1d6caa0f 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -73,7 +73,6 @@ #include "blk.h" #include "blk-mq.h" -#include "blk-mq-tag.h" #include "blk-mq-sched.h" /* PREFLUSH/FUA sequences */ diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 212a7f301e73..ace2bcf1cf9a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -12,7 +12,6 @@ #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" -#include "blk-mq-tag.h" #include "blk-rq-qos.h" static int queue_poll_stat_show(void *data, struct seq_file *m) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 06b312c69114..1029e8eed5ee 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -15,7 +15,6 @@ #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" -#include "blk-mq-tag.h" #include "blk-wbt.h" /* diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 025013972453..65cab6e475be 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -4,7 +4,6 @@ #include "elevator.h" #include "blk-mq.h" -#include "blk-mq-tag.h" #define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 1b2b0d258e46..ba84caa868dd 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -13,7 +13,6 @@ #include #include "blk.h" #include "blk-mq.h" -#include "blk-mq-tag.h" static void blk_mq_sysfs_release(struct kobject *kobj) { diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 9eb968e14d31..1f8b065d72c5 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -14,7 +14,6 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" -#include "blk-mq-tag.h" /* * Recalculate wakeup batch when tag is shared by hctx. diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h deleted file mode 100644 index 91ff37e3b43d..000000000000 --- a/block/blk-mq-tag.h +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef INT_BLK_MQ_TAG_H -#define INT_BLK_MQ_TAG_H - -struct blk_mq_alloc_data; - -extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, - unsigned int reserved_tags, - int node, int alloc_policy); -extern void blk_mq_free_tags(struct blk_mq_tags *tags); -extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, - struct sbitmap_queue *breserved_tags, - unsigned int queue_depth, - unsigned int reserved, - int node, int alloc_policy); - -extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, - unsigned int *offset); -extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, - unsigned int tag); -void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); -extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, - struct blk_mq_tags **tags, - unsigned int depth, bool can_grow); -extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, - unsigned int size); -extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); - -extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); -void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, - void *priv); -void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, - void *priv); - -static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, - struct blk_mq_hw_ctx *hctx) -{ - if (!hctx) - return &bt->ws[0]; - return sbq_wait_ptr(bt, &hctx->wait_index); -} - -enum { - BLK_MQ_NO_TAG = -1U, - BLK_MQ_TAG_MIN = 1, - BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, -}; - -extern void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); -extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); - -static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) -{ - if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) - __blk_mq_tag_busy(hctx); -} - -static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) -{ - if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) - return; - - __blk_mq_tag_idle(hctx); -} - -static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, - unsigned int tag) -{ - return tag < tags->nr_reserved_tags; -} - -#endif diff --git a/block/blk-mq.c b/block/blk-mq.c index 7908d19f1408..545600be2063 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -37,7 +37,6 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" -#include "blk-mq-tag.h" #include "blk-pm.h" #include "blk-stat.h" #include "blk-mq-sched.h" diff --git a/block/blk-mq.h b/block/blk-mq.h index ef59fee62780..7a041fecea02 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -3,7 +3,6 @@ #define INT_BLK_MQ_H #include "blk-stat.h" -#include "blk-mq-tag.h" struct blk_mq_tag_set; @@ -30,6 +29,12 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; +enum { + BLK_MQ_NO_TAG = -1U, + BLK_MQ_TAG_MIN = 1, + BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, +}; + void blk_mq_submit_bio(struct bio *bio); int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, unsigned int flags); @@ -164,6 +169,60 @@ struct blk_mq_alloc_data { struct blk_mq_hw_ctx *hctx; }; +struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, + unsigned int reserved_tags, int node, int alloc_policy); +void blk_mq_free_tags(struct blk_mq_tags *tags); +int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, + struct sbitmap_queue *breserved_tags, unsigned int queue_depth, + unsigned int reserved, int node, int alloc_policy); + +unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); +unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, + unsigned int *offset); +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); +void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); +int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, + struct blk_mq_tags **tags, unsigned int depth, bool can_grow); +void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, + unsigned int size); +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); + +void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); +void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, + void *priv); +void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, + void *priv); + +static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, + struct blk_mq_hw_ctx *hctx) +{ + if (!hctx) + return &bt->ws[0]; + return sbq_wait_ptr(bt, &hctx->wait_index); +} + +void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); +void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); + +static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) +{ + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_tag_busy(hctx); +} + +static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) +{ + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_tag_idle(hctx); +} + +static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, + unsigned int tag) +{ + return tag < tags->nr_reserved_tags; +} + static inline bool blk_mq_is_shared_tags(unsigned int flags) { return flags & BLK_MQ_F_TAG_HCTX_SHARED; diff --git a/block/blk-pm.c b/block/blk-pm.c index 2dad62cc1572..8af5ee54feb4 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -5,7 +5,6 @@ #include #include #include "blk-mq.h" -#include "blk-mq-tag.h" /** * blk_pm_runtime_init - Block layer runtime PM initialization routine diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 2146969237bf..d0a4838ce7fc 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -19,7 +19,6 @@ #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" -#include "blk-mq-tag.h" #define CREATE_TRACE_POINTS #include diff --git a/block/mq-deadline.c b/block/mq-deadline.c index f10c2a0d18d4..a18526e11194 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -23,7 +23,6 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" -#include "blk-mq-tag.h" #include "blk-mq-sched.h" /* -- cgit v1.2.3 From 90110e04f265b95f59fbae09c228c5920b8a302f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:40 +0200 Subject: blk-mq: include in block/blk-mq.h block/blk-mq.h needs various definitions from , include it there instead of relying on the source files to include both. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-flush.c | 1 - block/blk-mq-cpumap.c | 1 - block/blk-mq-debugfs.c | 1 - block/blk-mq-pci.c | 1 - block/blk-mq-sched.c | 1 - block/blk-mq-sysfs.c | 1 - block/blk-mq-tag.c | 1 - block/blk-mq-virtio.c | 1 - block/blk-mq.c | 1 - block/blk-mq.h | 1 + block/blk-pm.c | 1 - block/blk-stat.c | 1 - block/blk-sysfs.c | 1 - block/kyber-iosched.c | 1 - block/mq-deadline.c | 1 - 15 files changed, 1 insertion(+), 14 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index a13a1d6caa0f..3c81b0af5b39 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #include "blk.h" diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 0c612c19feb8..9638b25fd521 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -12,7 +12,6 @@ #include #include -#include #include "blk.h" #include "blk-mq.h" diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index ace2bcf1cf9a..d23a8554ec4a 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -7,7 +7,6 @@ #include #include -#include #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index a90b88fd1332..d47b5c73c9eb 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -4,7 +4,6 @@ */ #include #include -#include #include #include #include diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1029e8eed5ee..c4b2d44b2d4e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -6,7 +6,6 @@ */ #include #include -#include #include #include diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ba84caa868dd..156e9bb07abf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -10,7 +10,6 @@ #include #include -#include #include "blk.h" #include "blk-mq.h" diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 1f8b065d72c5..d6af9d431dc6 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -9,7 +9,6 @@ #include #include -#include #include #include "blk.h" #include "blk-mq.h" diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c index 6589f076a096..68d0945c0b08 100644 --- a/block/blk-mq-virtio.c +++ b/block/blk-mq-virtio.c @@ -3,7 +3,6 @@ * Copyright (c) 2016 Christoph Hellwig. */ #include -#include #include #include #include diff --git a/block/blk-mq.c b/block/blk-mq.c index 545600be2063..29014a0f9f39 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -32,7 +32,6 @@ #include -#include #include #include "blk.h" #include "blk-mq.h" diff --git a/block/blk-mq.h b/block/blk-mq.h index 7a041fecea02..fa13b694ff27 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -2,6 +2,7 @@ #ifndef INT_BLK_MQ_H #define INT_BLK_MQ_H +#include #include "blk-stat.h" struct blk_mq_tag_set; diff --git a/block/blk-pm.c b/block/blk-pm.c index 8af5ee54feb4..6b72b2e03fc8 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include #include diff --git a/block/blk-stat.c b/block/blk-stat.c index bc7e0ed81642..7ff76ae6c76a 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -6,7 +6,6 @@ */ #include #include -#include #include "blk-stat.h" #include "blk-mq.h" diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1a743b4f2958..a64208583853 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "blk.h" diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index d0a4838ce7fc..3f9fb2090c91 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/block/mq-deadline.c b/block/mq-deadline.c index a18526e11194..af9e79050dcc 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 94aa228c2a2f6edc8e9b7c4745942ea4c5978977 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:41 +0200 Subject: blk-mq: move more logic into blk_mq_insert_requests Move all logic related to the direct insert (including the call to blk_mq_run_hw_queue) into blk_mq_insert_requests to streamline the code flow up a bit, and to allow marking blk_mq_try_issue_list_directly static. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 17 ++--------------- block/blk-mq.c | 20 ++++++++++++++++++-- block/blk-mq.h | 4 +--- 3 files changed, 21 insertions(+), 20 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c4b2d44b2d4e..811a9765b745 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -472,23 +472,10 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, e = hctx->queue->elevator; if (e) { e->type->ops.insert_requests(hctx, list, false); + blk_mq_run_hw_queue(hctx, run_queue_async); } else { - /* - * try to issue requests directly if the hw queue isn't - * busy in case of 'none' scheduler, and this way may save - * us one extra enqueue & dequeue to sw queue. - */ - if (!hctx->dispatch_busy && !run_queue_async) { - blk_mq_run_dispatch_ops(hctx->queue, - blk_mq_try_issue_list_directly(hctx, list)); - if (list_empty(list)) - goto out; - } - blk_mq_insert_requests(hctx, ctx, list); + blk_mq_insert_requests(hctx, ctx, list, run_queue_async); } - - blk_mq_run_hw_queue(hctx, run_queue_async); - out: percpu_ref_put(&q->q_usage_counter); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 29014a0f9f39..536f001282bb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -44,6 +44,9 @@ static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); +static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, + struct list_head *list); + static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, blk_qc_t qc) { @@ -2495,12 +2498,23 @@ void blk_mq_request_bypass_insert(struct request *rq, bool at_head, } void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - struct list_head *list) + struct list_head *list, bool run_queue_async) { struct request *rq; enum hctx_type type = hctx->type; + /* + * Try to issue requests directly if the hw queue isn't busy to save an + * extra enqueue & dequeue to the sw queue. + */ + if (!hctx->dispatch_busy && !run_queue_async) { + blk_mq_run_dispatch_ops(hctx->queue, + blk_mq_try_issue_list_directly(hctx, list)); + if (list_empty(list)) + goto out; + } + /* * preemption doesn't flush plug list, so it's possible ctx->cpu is * offline now @@ -2514,6 +2528,8 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, list_splice_tail_init(list, &ctx->rq_lists[type]); blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); +out: + blk_mq_run_hw_queue(hctx, run_queue_async); } static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, @@ -2755,7 +2771,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) } while (!rq_list_empty(plug->mq_list)); } -void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, +static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { int queued = 0; diff --git a/block/blk-mq.h b/block/blk-mq.h index fa13b694ff27..5d551f9ef2d6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -70,9 +70,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, void blk_mq_request_bypass_insert(struct request *rq, bool at_head, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - struct list_head *list); -void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, - struct list_head *list); + struct list_head *list, bool run_queue_async); /* * CPU -> queue mappings -- cgit v1.2.3 From 05a93117703e7b2e40fa9193e622079b30395bcc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:42 +0200 Subject: blk-mq: fold blk_mq_sched_insert_requests into blk_mq_dispatch_plug_list blk_mq_dispatch_plug_list is the only caller of blk_mq_sched_insert_requests, and it makes sense to just fold it there as blk_mq_sched_insert_requests isn't specific to I/O schedulers despite the name. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-6-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 24 ------------------------ block/blk-mq-sched.h | 3 --- block/blk-mq.c | 17 +++++++++++++---- block/blk-mq.h | 2 -- block/mq-deadline.c | 2 +- 5 files changed, 14 insertions(+), 34 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 811a9765b745..9c0d231722d9 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -455,30 +455,6 @@ run: blk_mq_run_hw_queue(hctx, async); } -void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, - struct list_head *list, bool run_queue_async) -{ - struct elevator_queue *e; - struct request_queue *q = hctx->queue; - - /* - * blk_mq_sched_insert_requests() is called from flush plug - * context only, and hold one usage counter to prevent queue - * from being released. - */ - percpu_ref_get(&q->q_usage_counter); - - e = hctx->queue->elevator; - if (e) { - e->type->ops.insert_requests(hctx, list, false); - blk_mq_run_hw_queue(hctx, run_queue_async); - } else { - blk_mq_insert_requests(hctx, ctx, list, run_queue_async); - } - percpu_ref_put(&q->q_usage_counter); -} - static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 65cab6e475be..1ec01e9934dc 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -18,9 +18,6 @@ void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async); -void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, - struct list_head *list, bool run_queue_async); void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); diff --git a/block/blk-mq.c b/block/blk-mq.c index 536f001282bb..f1da4f053cc6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2497,9 +2497,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool at_head, blk_mq_run_hw_queue(hctx, false); } -void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - struct list_head *list, bool run_queue_async) - +static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx, struct list_head *list, + bool run_queue_async) { struct request *rq; enum hctx_type type = hctx->type; @@ -2725,7 +2725,16 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) plug->mq_list = requeue_list; trace_block_unplug(this_hctx->queue, depth, !from_sched); - blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched); + + percpu_ref_get(&this_hctx->queue->q_usage_counter); + if (this_hctx->queue->elevator) { + this_hctx->queue->elevator->type->ops.insert_requests(this_hctx, + &list, false); + blk_mq_run_hw_queue(this_hctx, from_sched); + } else { + blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched); + } + percpu_ref_put(&this_hctx->queue->q_usage_counter); } void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) diff --git a/block/blk-mq.h b/block/blk-mq.h index 5d551f9ef2d6..bd7ae5e67a52 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -69,8 +69,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); void blk_mq_request_bypass_insert(struct request *rq, bool at_head, bool run_queue); -void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - struct list_head *list, bool run_queue_async); /* * CPU -> queue mappings diff --git a/block/mq-deadline.c b/block/mq-deadline.c index af9e79050dcc..d62a3039c8e0 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -820,7 +820,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } /* - * Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests(). + * Called from blk_mq_sched_insert_request() or blk_mq_dispatch_plug_list(). */ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, bool at_head) -- cgit v1.2.3 From 2bd215df791b5d36ca1d20c07683100b48310cc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:43 +0200 Subject: blk-mq: move blk_mq_sched_insert_request to blk-mq.c blk_mq_sched_insert_request is the main request insert helper and not directly I/O scheduler related. Move blk_mq_sched_insert_request to blk-mq.c, rename it to blk_mq_insert_request and mark it static. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-7-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 73 ------------------------------------------- block/blk-mq-sched.h | 3 -- block/blk-mq.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++---- block/mq-deadline.c | 2 +- 4 files changed, 82 insertions(+), 83 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 9c0d231722d9..f90fc42a88ca 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -382,79 +382,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); -static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - /* - * dispatch flush and passthrough rq directly - * - * passthrough request has to be added to hctx->dispatch directly. - * For some reason, device may be in one situation which can't - * handle FS request, so STS_RESOURCE is always returned and the - * FS request will be added to hctx->dispatch. However passthrough - * request may be required at that time for fixing the problem. If - * passthrough request is added to scheduler queue, there isn't any - * chance to dispatch it given we prioritize requests in hctx->dispatch. - */ - if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) - return true; - - return false; -} - -void blk_mq_sched_insert_request(struct request *rq, bool at_head, - bool run_queue, bool async) -{ - struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; - struct blk_mq_ctx *ctx = rq->mq_ctx; - struct blk_mq_hw_ctx *hctx = rq->mq_hctx; - - WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG)); - - if (blk_mq_sched_bypass_insert(hctx, rq)) { - /* - * Firstly normal IO request is inserted to scheduler queue or - * sw queue, meantime we add flush request to dispatch queue( - * hctx->dispatch) directly and there is at most one in-flight - * flush request for each hw queue, so it doesn't matter to add - * flush request to tail or front of the dispatch queue. - * - * Secondly in case of NCQ, flush request belongs to non-NCQ - * command, and queueing it will fail when there is any - * in-flight normal IO request(NCQ command). When adding flush - * rq to the front of hctx->dispatch, it is easier to introduce - * extra time to flush rq's latency because of S_SCHED_RESTART - * compared with adding to the tail of dispatch queue, then - * chance of flush merge is increased, and less flush requests - * will be issued to controller. It is observed that ~10% time - * is saved in blktests block/004 on disk attached to AHCI/NCQ - * drive when adding flush rq to the front of hctx->dispatch. - * - * Simply queue flush rq to the front of hctx->dispatch so that - * intensive flush workloads can benefit in case of NCQ HW. - */ - at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head; - blk_mq_request_bypass_insert(rq, at_head, false); - goto run; - } - - if (e) { - LIST_HEAD(list); - - list_add(&rq->queuelist, &list); - e->type->ops.insert_requests(hctx, &list, at_head); - } else { - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, at_head); - spin_unlock(&ctx->lock); - } - -run: - if (run_queue) - blk_mq_run_hw_queue(hctx, async); -} - static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 1ec01e9934dc..7c3cbad17f30 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -16,9 +16,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx); void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); -void blk_mq_sched_insert_request(struct request *rq, bool at_head, - bool run_queue, bool async); - void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); diff --git a/block/blk-mq.c b/block/blk-mq.c index f1da4f053cc6..78e54a64fe92 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -44,6 +44,8 @@ static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); +static void blk_mq_insert_request(struct request *rq, bool at_head, + bool run_queue, bool async); static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); @@ -1303,7 +1305,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head) if (current->plug && !at_head) blk_add_rq_to_plug(current->plug, rq); else - blk_mq_sched_insert_request(rq, at_head, true, false); + blk_mq_insert_request(rq, at_head, true, false); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); @@ -1364,7 +1366,7 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head) rq->end_io = blk_end_sync_rq; blk_account_io_start(rq); - blk_mq_sched_insert_request(rq, at_head, true, false); + blk_mq_insert_request(rq, at_head, true, false); if (blk_rq_is_poll(rq)) { blk_rq_poll_completion(rq, &wait.done); @@ -1438,13 +1440,13 @@ static void blk_mq_requeue_work(struct work_struct *work) if (rq->rq_flags & RQF_DONTPREP) blk_mq_request_bypass_insert(rq, false, false); else - blk_mq_sched_insert_request(rq, true, false, false); + blk_mq_insert_request(rq, true, false, false); } while (!list_empty(&rq_list)) { rq = list_entry(rq_list.next, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_sched_insert_request(rq, false, false, false); + blk_mq_insert_request(rq, false, false, false); } blk_mq_run_hw_queues(q, false); @@ -2532,6 +2534,79 @@ out: blk_mq_run_hw_queue(hctx, run_queue_async); } +static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) + return true; + + return false; +} + +static void blk_mq_insert_request(struct request *rq, bool at_head, + bool run_queue, bool async) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG)); + + if (blk_mq_sched_bypass_insert(hctx, rq)) { + /* + * Firstly normal IO request is inserted to scheduler queue or + * sw queue, meantime we add flush request to dispatch queue( + * hctx->dispatch) directly and there is at most one in-flight + * flush request for each hw queue, so it doesn't matter to add + * flush request to tail or front of the dispatch queue. + * + * Secondly in case of NCQ, flush request belongs to non-NCQ + * command, and queueing it will fail when there is any + * in-flight normal IO request(NCQ command). When adding flush + * rq to the front of hctx->dispatch, it is easier to introduce + * extra time to flush rq's latency because of S_SCHED_RESTART + * compared with adding to the tail of dispatch queue, then + * chance of flush merge is increased, and less flush requests + * will be issued to controller. It is observed that ~10% time + * is saved in blktests block/004 on disk attached to AHCI/NCQ + * drive when adding flush rq to the front of hctx->dispatch. + * + * Simply queue flush rq to the front of hctx->dispatch so that + * intensive flush workloads can benefit in case of NCQ HW. + */ + at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head; + blk_mq_request_bypass_insert(rq, at_head, false); + goto run; + } + + if (e) { + LIST_HEAD(list); + + list_add(&rq->queuelist, &list); + e->type->ops.insert_requests(hctx, &list, at_head); + } else { + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, at_head); + spin_unlock(&ctx->lock); + } + +run: + if (run_queue) + blk_mq_run_hw_queue(hctx, async); +} + static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, unsigned int nr_segs) { @@ -2623,7 +2698,7 @@ insert: if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_sched_insert_request(rq, false, run_queue, false); + blk_mq_insert_request(rq, false, run_queue, false); return BLK_STS_OK; } @@ -2975,7 +3050,7 @@ void blk_mq_submit_bio(struct bio *bio) else if ((rq->rq_flags & RQF_ELV) || (rq->mq_hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) - blk_mq_sched_insert_request(rq, false, true, true); + blk_mq_insert_request(rq, false, true, true); else blk_mq_run_dispatch_ops(rq->q, blk_mq_try_issue_directly(rq->mq_hctx, rq)); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index d62a3039c8e0..ceae477c3571 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -820,7 +820,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } /* - * Called from blk_mq_sched_insert_request() or blk_mq_dispatch_plug_list(). + * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list(). */ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, bool at_head) -- cgit v1.2.3 From a88db1e0003eda8adbe3c499b81f736d8065b952 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:44 +0200 Subject: blk-mq: fold __blk_mq_insert_request into blk_mq_insert_request There is no good point in keeping the __blk_mq_insert_request around for two function calls and a singler caller. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-8-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 ++------------ block/blk-mq.h | 2 -- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 78e54a64fe92..103caf1bae27 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2463,17 +2463,6 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, list_add_tail(&rq->queuelist, &ctx->rq_lists[type]); } -void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool at_head) -{ - struct blk_mq_ctx *ctx = rq->mq_ctx; - - lockdep_assert_held(&ctx->lock); - - __blk_mq_insert_req_list(hctx, rq, at_head); - blk_mq_hctx_mark_pending(hctx, ctx); -} - /** * blk_mq_request_bypass_insert - Insert a request at dispatch list. * @rq: Pointer to request to be inserted. @@ -2598,7 +2587,8 @@ static void blk_mq_insert_request(struct request *rq, bool at_head, e->type->ops.insert_requests(hctx, &list, at_head); } else { spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, at_head); + __blk_mq_insert_req_list(hctx, rq, at_head); + blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); } diff --git a/block/blk-mq.h b/block/blk-mq.h index bd7ae5e67a52..e2d59e33046e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -65,8 +65,6 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, /* * Internal helpers for request insertion into sw queues */ -void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool at_head); void blk_mq_request_bypass_insert(struct request *rq, bool at_head, bool run_queue); -- cgit v1.2.3 From 4ec5c0553c33e42f2d650785309de17d4cb8f5ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:45 +0200 Subject: blk-mq: fold __blk_mq_insert_req_list into blk_mq_insert_request Remove this very small helper and fold it into the only caller. Note that this moves the trace_block_rq_insert out of ctx->lock, matching the other calls to this tracepoint. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-9-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 103caf1bae27..7e9f7d00452f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2446,23 +2446,6 @@ static void blk_mq_run_work_fn(struct work_struct *work) __blk_mq_run_hw_queue(hctx); } -static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, - struct request *rq, - bool at_head) -{ - struct blk_mq_ctx *ctx = rq->mq_ctx; - enum hctx_type type = hctx->type; - - lockdep_assert_held(&ctx->lock); - - trace_block_rq_insert(rq); - - if (at_head) - list_add(&rq->queuelist, &ctx->rq_lists[type]); - else - list_add_tail(&rq->queuelist, &ctx->rq_lists[type]); -} - /** * blk_mq_request_bypass_insert - Insert a request at dispatch list. * @rq: Pointer to request to be inserted. @@ -2586,8 +2569,14 @@ static void blk_mq_insert_request(struct request *rq, bool at_head, list_add(&rq->queuelist, &list); e->type->ops.insert_requests(hctx, &list, at_head); } else { + trace_block_rq_insert(rq); + spin_lock(&ctx->lock); - __blk_mq_insert_req_list(hctx, rq, at_head); + if (at_head) + list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]); + else + list_add_tail(&rq->queuelist, + &ctx->rq_lists[hctx->type]); blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); } -- cgit v1.2.3 From 53548d2a945eb2c277332c66f57505881392e5a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:47 +0200 Subject: blk-mq: refactor passthrough vs flush handling in blk_mq_insert_request While both passthrough and flush requests call directly into blk_mq_request_bypass_insert, the parameters aren't the same. Split the handling into two separate conditionals and turn the whole function into an if/elif/elif/else flow instead of the gotos. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-11-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 7e9f7d00452f..c3de03217f4f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2506,37 +2506,26 @@ out: blk_mq_run_hw_queue(hctx, run_queue_async); } -static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - /* - * dispatch flush and passthrough rq directly - * - * passthrough request has to be added to hctx->dispatch directly. - * For some reason, device may be in one situation which can't - * handle FS request, so STS_RESOURCE is always returned and the - * FS request will be added to hctx->dispatch. However passthrough - * request may be required at that time for fixing the problem. If - * passthrough request is added to scheduler queue, there isn't any - * chance to dispatch it given we prioritize requests in hctx->dispatch. - */ - if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) - return true; - - return false; -} - static void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, bool async) { struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; - WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG)); - - if (blk_mq_sched_bypass_insert(hctx, rq)) { + if (blk_rq_is_passthrough(rq)) { + /* + * Passthrough request have to be added to hctx->dispatch + * directly. The device may be in a situation where it can't + * handle FS request, and always returns BLK_STS_RESOURCE for + * them, which gets them added to hctx->dispatch. + * + * If a passthrough request is required to unblock the queues, + * and it is added to the scheduler queue, there is no chance to + * dispatch it given we prioritize requests in hctx->dispatch. + */ + blk_mq_request_bypass_insert(rq, at_head, false); + } else if (rq->rq_flags & RQF_FLUSH_SEQ) { /* * Firstly normal IO request is inserted to scheduler queue or * sw queue, meantime we add flush request to dispatch queue( @@ -2558,16 +2547,14 @@ static void blk_mq_insert_request(struct request *rq, bool at_head, * Simply queue flush rq to the front of hctx->dispatch so that * intensive flush workloads can benefit in case of NCQ HW. */ - at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head; - blk_mq_request_bypass_insert(rq, at_head, false); - goto run; - } - - if (e) { + blk_mq_request_bypass_insert(rq, true, false); + } else if (q->elevator) { LIST_HEAD(list); + WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG); + list_add(&rq->queuelist, &list); - e->type->ops.insert_requests(hctx, &list, at_head); + q->elevator->type->ops.insert_requests(hctx, &list, at_head); } else { trace_block_rq_insert(rq); @@ -2581,7 +2568,6 @@ static void blk_mq_insert_request(struct request *rq, bool at_head, spin_unlock(&ctx->lock); } -run: if (run_queue) blk_mq_run_hw_queue(hctx, async); } -- cgit v1.2.3 From a1e948b81ad21d635b99c1284f945423cb02b4c4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:48 +0200 Subject: blk-mq: refactor the DONTPREP/SOFTBARRIER andling in blk_mq_requeue_work Split the RQF_DONTPREP and RQF_SOFTBARRIER in separate branches to make the code more readable. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-12-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index c3de03217f4f..d17871c237f7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1427,20 +1427,21 @@ static void blk_mq_requeue_work(struct work_struct *work) spin_unlock_irq(&q->requeue_lock); list_for_each_entry_safe(rq, next, &rq_list, queuelist) { - if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP))) - continue; - - rq->rq_flags &= ~RQF_SOFTBARRIER; - list_del_init(&rq->queuelist); /* - * If RQF_DONTPREP, rq has contained some driver specific - * data, so insert it to hctx dispatch list to avoid any - * merge. + * If RQF_DONTPREP ist set, the request has been started by the + * driver already and might have driver-specific data allocated + * already. Insert it into the hctx dispatch list to avoid + * block layer merges for the request. */ - if (rq->rq_flags & RQF_DONTPREP) + if (rq->rq_flags & RQF_DONTPREP) { + rq->rq_flags &= ~RQF_SOFTBARRIER; + list_del_init(&rq->queuelist); blk_mq_request_bypass_insert(rq, false, false); - else + } else if (rq->rq_flags & RQF_SOFTBARRIER) { + rq->rq_flags &= ~RQF_SOFTBARRIER; + list_del_init(&rq->queuelist); blk_mq_insert_request(rq, true, false, false); + } } while (!list_empty(&rq_list)) { -- cgit v1.2.3 From 2b71b8770710f2913e29053f01b6c7df1a5c7f75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:49 +0200 Subject: blk-mq: factor out a blk_mq_get_budget_and_tag helper Factor out a helper from __blk_mq_try_issue_directly in preparation of folding that function into its two callers. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-13-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index d17871c237f7..5cb7ebefc88c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2624,13 +2624,27 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, return ret; } +static bool blk_mq_get_budget_and_tag(struct request *rq) +{ + int budget_token; + + budget_token = blk_mq_get_dispatch_budget(rq->q); + if (budget_token < 0) + return false; + blk_mq_set_rq_budget_token(rq, budget_token); + if (!blk_mq_get_driver_tag(rq)) { + blk_mq_put_dispatch_budget(rq->q, budget_token); + return false; + } + return true; +} + static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, bool bypass_insert, bool last) { struct request_queue *q = rq->q; bool run_queue = true; - int budget_token; /* * RCU or SRCU read lock is needed before checking quiesced flag. @@ -2648,16 +2662,8 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if ((rq->rq_flags & RQF_ELV) && !bypass_insert) goto insert; - budget_token = blk_mq_get_dispatch_budget(q); - if (budget_token < 0) - goto insert; - - blk_mq_set_rq_budget_token(rq, budget_token); - - if (!blk_mq_get_driver_tag(rq)) { - blk_mq_put_dispatch_budget(q, budget_token); + if (!blk_mq_get_budget_and_tag(rq)) goto insert; - } return __blk_mq_issue_directly(hctx, rq, last); insert: -- cgit v1.2.3 From e1f44ac0d7f48ec44a1eacfe637e545c408ede40 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:50 +0200 Subject: blk-mq: fold __blk_mq_try_issue_directly into its two callers Due to the wildly different behavior based on the bypass_insert argument, not a whole lot of code in __blk_mq_try_issue_directly is actually shared between blk_mq_try_issue_directly and blk_mq_request_issue_directly. Remove __blk_mq_try_issue_directly and fold the code into the two callers instead. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-14-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 72 +++++++++++++++++++++++++--------------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5cb7ebefc88c..c5b42476337c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2639,42 +2639,6 @@ static bool blk_mq_get_budget_and_tag(struct request *rq) return true; } -static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - bool bypass_insert, bool last) -{ - struct request_queue *q = rq->q; - bool run_queue = true; - - /* - * RCU or SRCU read lock is needed before checking quiesced flag. - * - * When queue is stopped or quiesced, ignore 'bypass_insert' from - * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller, - * and avoid driver to try to dispatch again. - */ - if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) { - run_queue = false; - bypass_insert = false; - goto insert; - } - - if ((rq->rq_flags & RQF_ELV) && !bypass_insert) - goto insert; - - if (!blk_mq_get_budget_and_tag(rq)) - goto insert; - - return __blk_mq_issue_directly(hctx, rq, last); -insert: - if (bypass_insert) - return BLK_STS_RESOURCE; - - blk_mq_insert_request(rq, false, run_queue, false); - - return BLK_STS_OK; -} - /** * blk_mq_try_issue_directly - Try to send a request directly to device driver. * @hctx: Pointer of the associated hardware queue. @@ -2688,18 +2652,44 @@ insert: static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_status_t ret = - __blk_mq_try_issue_directly(hctx, rq, false, true); + blk_status_t ret; + + if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { + blk_mq_insert_request(rq, false, false, false); + return; + } + + if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) { + blk_mq_insert_request(rq, false, true, false); + return; + } - if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) + ret = __blk_mq_issue_directly(hctx, rq, true); + switch (ret) { + case BLK_STS_OK: + break; + case BLK_STS_RESOURCE: + case BLK_STS_DEV_RESOURCE: blk_mq_request_bypass_insert(rq, false, true); - else if (ret != BLK_STS_OK) + break; + default: blk_mq_end_request(rq, ret); + break; + } } static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) { - return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { + blk_mq_insert_request(rq, false, false, false); + return BLK_STS_OK; + } + + if (!blk_mq_get_budget_and_tag(rq)) + return BLK_STS_RESOURCE; + return __blk_mq_issue_directly(hctx, rq, last); } static void blk_mq_plug_issue_direct(struct blk_plug *plug) -- cgit v1.2.3 From f0dbe6e88e1bf4003ef778527b975ff60dbdd35a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:51 +0200 Subject: blk-mq: don't run the hw_queue from blk_mq_insert_request blk_mq_insert_request takes two bool parameters to control how to run the queue at the end of the function. Move the blk_mq_run_hw_queue call to the callers that want it instead. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-15-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 56 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index c5b42476337c..d1941db1ad3c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -44,8 +44,7 @@ static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); -static void blk_mq_insert_request(struct request *rq, bool at_head, - bool run_queue, bool async); +static void blk_mq_insert_request(struct request *rq, bool at_head); static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); @@ -1292,6 +1291,8 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) */ void blk_execute_rq_nowait(struct request *rq, bool at_head) { + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + WARN_ON(irqs_disabled()); WARN_ON(!blk_rq_is_passthrough(rq)); @@ -1302,10 +1303,13 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head) * device, directly accessing the plug instead of using blk_mq_plug() * should not have any consequences. */ - if (current->plug && !at_head) + if (current->plug && !at_head) { blk_add_rq_to_plug(current->plug, rq); - else - blk_mq_insert_request(rq, at_head, true, false); + return; + } + + blk_mq_insert_request(rq, at_head); + blk_mq_run_hw_queue(hctx, false); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); @@ -1355,6 +1359,7 @@ static void blk_rq_poll_completion(struct request *rq, struct completion *wait) */ blk_status_t blk_execute_rq(struct request *rq, bool at_head) { + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_rq_wait wait = { .done = COMPLETION_INITIALIZER_ONSTACK(wait.done), }; @@ -1366,7 +1371,8 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head) rq->end_io = blk_end_sync_rq; blk_account_io_start(rq); - blk_mq_insert_request(rq, at_head, true, false); + blk_mq_insert_request(rq, at_head); + blk_mq_run_hw_queue(hctx, false); if (blk_rq_is_poll(rq)) { blk_rq_poll_completion(rq, &wait.done); @@ -1440,14 +1446,14 @@ static void blk_mq_requeue_work(struct work_struct *work) } else if (rq->rq_flags & RQF_SOFTBARRIER) { rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_insert_request(rq, true, false, false); + blk_mq_insert_request(rq, true); } } while (!list_empty(&rq_list)) { rq = list_entry(rq_list.next, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_insert_request(rq, false, false, false); + blk_mq_insert_request(rq, false); } blk_mq_run_hw_queues(q, false); @@ -2507,8 +2513,7 @@ out: blk_mq_run_hw_queue(hctx, run_queue_async); } -static void blk_mq_insert_request(struct request *rq, bool at_head, - bool run_queue, bool async) +static void blk_mq_insert_request(struct request *rq, bool at_head) { struct request_queue *q = rq->q; struct blk_mq_ctx *ctx = rq->mq_ctx; @@ -2568,9 +2573,6 @@ static void blk_mq_insert_request(struct request *rq, bool at_head, blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); } - - if (run_queue) - blk_mq_run_hw_queue(hctx, async); } static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, @@ -2655,12 +2657,13 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, blk_status_t ret; if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { - blk_mq_insert_request(rq, false, false, false); + blk_mq_insert_request(rq, false); return; } if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) { - blk_mq_insert_request(rq, false, true, false); + blk_mq_insert_request(rq, false); + blk_mq_run_hw_queue(hctx, false); return; } @@ -2683,7 +2686,7 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) struct blk_mq_hw_ctx *hctx = rq->mq_hctx; if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { - blk_mq_insert_request(rq, false, false, false); + blk_mq_insert_request(rq, false); return BLK_STS_OK; } @@ -2963,6 +2966,7 @@ void blk_mq_submit_bio(struct bio *bio) struct request_queue *q = bdev_get_queue(bio->bi_bdev); struct blk_plug *plug = blk_mq_plug(bio); const int is_sync = op_is_sync(bio->bi_opf); + struct blk_mq_hw_ctx *hctx; struct request *rq; unsigned int nr_segs = 1; blk_status_t ret; @@ -3007,15 +3011,19 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (plug) + if (plug) { blk_add_rq_to_plug(plug, rq); - else if ((rq->rq_flags & RQF_ELV) || - (rq->mq_hctx->dispatch_busy && - (q->nr_hw_queues == 1 || !is_sync))) - blk_mq_insert_request(rq, false, true, true); - else - blk_mq_run_dispatch_ops(rq->q, - blk_mq_try_issue_directly(rq->mq_hctx, rq)); + return; + } + + hctx = rq->mq_hctx; + if ((rq->rq_flags & RQF_ELV) || + (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { + blk_mq_insert_request(rq, false); + blk_mq_run_hw_queue(hctx, true); + } else { + blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq)); + } } #ifdef CONFIG_BLK_MQ_STACKING -- cgit v1.2.3 From 2394395cd598f6404c57ae0b63afb5d37e94924d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:52 +0200 Subject: blk-mq: don't run the hw_queue from blk_mq_request_bypass_insert blk_mq_request_bypass_insert takes a bool parameter to control how to run the queue at the end of the function. Move the blk_mq_run_hw_queue call to the callers that want it instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-16-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 +++- block/blk-mq.c | 24 +++++++++++------------- block/blk-mq.h | 3 +-- 3 files changed, 15 insertions(+), 16 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index 62ef98f604fb..3561aba8cc23 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -389,6 +389,7 @@ void blk_insert_flush(struct request *rq) unsigned long fflags = q->queue_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; /* * @policy now records what operations need to be done. Adjust @@ -425,7 +426,8 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false, true); + blk_mq_request_bypass_insert(rq, false); + blk_mq_run_hw_queue(hctx, false); return; } diff --git a/block/blk-mq.c b/block/blk-mq.c index d1941db1ad3c..cde7ba9c39bf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1442,7 +1442,7 @@ static void blk_mq_requeue_work(struct work_struct *work) if (rq->rq_flags & RQF_DONTPREP) { rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_request_bypass_insert(rq, false, false); + blk_mq_request_bypass_insert(rq, false); } else if (rq->rq_flags & RQF_SOFTBARRIER) { rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); @@ -2457,13 +2457,11 @@ static void blk_mq_run_work_fn(struct work_struct *work) * blk_mq_request_bypass_insert - Insert a request at dispatch list. * @rq: Pointer to request to be inserted. * @at_head: true if the request should be inserted at the head of the list. - * @run_queue: If we should run the hardware queue after inserting the request. * * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool at_head, - bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; @@ -2473,9 +2471,6 @@ void blk_mq_request_bypass_insert(struct request *rq, bool at_head, else list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); - - if (run_queue) - blk_mq_run_hw_queue(hctx, false); } static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, @@ -2530,7 +2525,7 @@ static void blk_mq_insert_request(struct request *rq, bool at_head) * and it is added to the scheduler queue, there is no chance to * dispatch it given we prioritize requests in hctx->dispatch. */ - blk_mq_request_bypass_insert(rq, at_head, false); + blk_mq_request_bypass_insert(rq, at_head); } else if (rq->rq_flags & RQF_FLUSH_SEQ) { /* * Firstly normal IO request is inserted to scheduler queue or @@ -2553,7 +2548,7 @@ static void blk_mq_insert_request(struct request *rq, bool at_head) * Simply queue flush rq to the front of hctx->dispatch so that * intensive flush workloads can benefit in case of NCQ HW. */ - blk_mq_request_bypass_insert(rq, true, false); + blk_mq_request_bypass_insert(rq, true); } else if (q->elevator) { LIST_HEAD(list); @@ -2673,7 +2668,8 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false, true); + blk_mq_request_bypass_insert(rq, false); + blk_mq_run_hw_queue(hctx, false); break; default: blk_mq_end_request(rq, ret); @@ -2720,7 +2716,8 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug) break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false, true); + blk_mq_request_bypass_insert(rq, false); + blk_mq_run_hw_queue(hctx, false); goto out; default: blk_mq_end_request(rq, ret); @@ -2838,8 +2835,9 @@ static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false, - list_empty(list)); + blk_mq_request_bypass_insert(rq, false); + if (list_empty(list)) + blk_mq_run_hw_queue(hctx, false); goto out; default: blk_mq_end_request(rq, ret); diff --git a/block/blk-mq.h b/block/blk-mq.h index e2d59e33046e..f30f99166f38 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -65,8 +65,7 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, /* * Internal helpers for request insertion into sw queues */ -void blk_mq_request_bypass_insert(struct request *rq, bool at_head, - bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head); /* * CPU -> queue mappings -- cgit v1.2.3 From 214a441805b8cc090930fb00193125e22466a95a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:53 +0200 Subject: blk-mq: don't kick the requeue_list in blk_mq_add_to_requeue_list blk_mq_add_to_requeue_list takes a bool parameter to control how to kick the requeue list at the end of the function. Move the call to blk_mq_kick_requeue_list to the callers that want it instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-17-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-flush.c | 6 ++++-- block/blk-mq.c | 13 +++++++------ block/blk-mq.h | 3 +-- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index 3561aba8cc23..015982bd2f7c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -188,7 +188,8 @@ static void blk_flush_complete_seq(struct request *rq, case REQ_FSEQ_DATA: list_move_tail(&rq->flush.list, &fq->flush_data_in_flight); - blk_mq_add_to_requeue_list(rq, true, true); + blk_mq_add_to_requeue_list(rq, true); + blk_mq_kick_requeue_list(q); break; case REQ_FSEQ_DONE: @@ -345,7 +346,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, smp_wmb(); req_ref_set(flush_rq, 1); - blk_mq_add_to_requeue_list(flush_rq, false, true); + blk_mq_add_to_requeue_list(flush_rq, false); + blk_mq_kick_requeue_list(q); } static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, diff --git a/block/blk-mq.c b/block/blk-mq.c index cde7ba9c39bf..db806c1a194c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1412,12 +1412,17 @@ static void __blk_mq_requeue_request(struct request *rq) void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) { + struct request_queue *q = rq->q; + __blk_mq_requeue_request(rq); /* this request will be re-inserted to io scheduler queue */ blk_mq_sched_requeue_request(rq); - blk_mq_add_to_requeue_list(rq, true, kick_requeue_list); + blk_mq_add_to_requeue_list(rq, true); + + if (kick_requeue_list) + blk_mq_kick_requeue_list(q); } EXPORT_SYMBOL(blk_mq_requeue_request); @@ -1459,8 +1464,7 @@ static void blk_mq_requeue_work(struct work_struct *work) blk_mq_run_hw_queues(q, false); } -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, - bool kick_requeue_list) +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) { struct request_queue *q = rq->q; unsigned long flags; @@ -1479,9 +1483,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, list_add_tail(&rq->queuelist, &q->requeue_list); } spin_unlock_irqrestore(&q->requeue_lock, flags); - - if (kick_requeue_list) - blk_mq_kick_requeue_list(q); } void blk_mq_kick_requeue_list(struct request_queue *q) diff --git a/block/blk-mq.h b/block/blk-mq.h index f30f99166f38..5d3761c50063 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -44,8 +44,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, unsigned int); -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, - bool kick_requeue_list); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); -- cgit v1.2.3 From 710fa3789ed94ceee9675f8e189aaf3e7525269a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:54 +0200 Subject: blk-mq: pass a flags argument to blk_mq_insert_request Replace the at_head bool with a flags argument that so far only contains a single BLK_MQ_INSERT_AT_HEAD value. This makes it much easier to grep for head insertions into the blk-mq dispatch queues. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-18-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 27 ++++++++++++++------------- block/blk-mq.h | 3 +++ 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index db806c1a194c..ba64c4621e29 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); -static void blk_mq_insert_request(struct request *rq, bool at_head); +static void blk_mq_insert_request(struct request *rq, blk_insert_t flags); static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list); @@ -1308,7 +1308,7 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head) return; } - blk_mq_insert_request(rq, at_head); + blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0); blk_mq_run_hw_queue(hctx, false); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); @@ -1371,7 +1371,7 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head) rq->end_io = blk_end_sync_rq; blk_account_io_start(rq); - blk_mq_insert_request(rq, at_head); + blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0); blk_mq_run_hw_queue(hctx, false); if (blk_rq_is_poll(rq)) { @@ -1451,14 +1451,14 @@ static void blk_mq_requeue_work(struct work_struct *work) } else if (rq->rq_flags & RQF_SOFTBARRIER) { rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_insert_request(rq, true); + blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD); } } while (!list_empty(&rq_list)) { rq = list_entry(rq_list.next, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_insert_request(rq, false); + blk_mq_insert_request(rq, 0); } blk_mq_run_hw_queues(q, false); @@ -2509,7 +2509,7 @@ out: blk_mq_run_hw_queue(hctx, run_queue_async); } -static void blk_mq_insert_request(struct request *rq, bool at_head) +static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) { struct request_queue *q = rq->q; struct blk_mq_ctx *ctx = rq->mq_ctx; @@ -2526,7 +2526,7 @@ static void blk_mq_insert_request(struct request *rq, bool at_head) * and it is added to the scheduler queue, there is no chance to * dispatch it given we prioritize requests in hctx->dispatch. */ - blk_mq_request_bypass_insert(rq, at_head); + blk_mq_request_bypass_insert(rq, flags & BLK_MQ_INSERT_AT_HEAD); } else if (rq->rq_flags & RQF_FLUSH_SEQ) { /* * Firstly normal IO request is inserted to scheduler queue or @@ -2556,12 +2556,13 @@ static void blk_mq_insert_request(struct request *rq, bool at_head) WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG); list_add(&rq->queuelist, &list); - q->elevator->type->ops.insert_requests(hctx, &list, at_head); + q->elevator->type->ops.insert_requests(hctx, &list, + flags & BLK_MQ_INSERT_AT_HEAD); } else { trace_block_rq_insert(rq); spin_lock(&ctx->lock); - if (at_head) + if (flags & BLK_MQ_INSERT_AT_HEAD) list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]); else list_add_tail(&rq->queuelist, @@ -2653,12 +2654,12 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, blk_status_t ret; if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { - blk_mq_insert_request(rq, false); + blk_mq_insert_request(rq, 0); return; } if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) { - blk_mq_insert_request(rq, false); + blk_mq_insert_request(rq, 0); blk_mq_run_hw_queue(hctx, false); return; } @@ -2683,7 +2684,7 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) struct blk_mq_hw_ctx *hctx = rq->mq_hctx; if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(rq->q)) { - blk_mq_insert_request(rq, false); + blk_mq_insert_request(rq, 0); return BLK_STS_OK; } @@ -3018,7 +3019,7 @@ void blk_mq_submit_bio(struct bio *bio) hctx = rq->mq_hctx; if ((rq->rq_flags & RQF_ELV) || (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { - blk_mq_insert_request(rq, false); + blk_mq_insert_request(rq, 0); blk_mq_run_hw_queue(hctx, true); } else { blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq)); diff --git a/block/blk-mq.h b/block/blk-mq.h index 5d3761c50063..273eee00524b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -36,6 +36,9 @@ enum { BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, }; +typedef unsigned int __bitwise blk_insert_t; +#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01) + void blk_mq_submit_bio(struct bio *bio); int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, unsigned int flags); -- cgit v1.2.3 From 2b5976134bfbc753dec6281da0890c5f194c00c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:55 +0200 Subject: blk-mq: pass a flags argument to blk_mq_request_bypass_insert Replace the boolean at_head argument with the same flags that are already passed to blk_mq_insert_request. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-19-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- block/blk-mq.c | 18 +++++++++--------- block/blk-mq.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index 015982bd2f7c..1d3af17619de 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -428,7 +428,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, 0); blk_mq_run_hw_queue(hctx, false); return; } diff --git a/block/blk-mq.c b/block/blk-mq.c index ba64c4621e29..ff74559d7da1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1447,7 +1447,7 @@ static void blk_mq_requeue_work(struct work_struct *work) if (rq->rq_flags & RQF_DONTPREP) { rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, 0); } else if (rq->rq_flags & RQF_SOFTBARRIER) { rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); @@ -2457,17 +2457,17 @@ static void blk_mq_run_work_fn(struct work_struct *work) /** * blk_mq_request_bypass_insert - Insert a request at dispatch list. * @rq: Pointer to request to be inserted. - * @at_head: true if the request should be inserted at the head of the list. + * @flags: BLK_MQ_INSERT_* * * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool at_head) +void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); - if (at_head) + if (flags & BLK_MQ_INSERT_AT_HEAD) list_add(&rq->queuelist, &hctx->dispatch); else list_add_tail(&rq->queuelist, &hctx->dispatch); @@ -2526,7 +2526,7 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) * and it is added to the scheduler queue, there is no chance to * dispatch it given we prioritize requests in hctx->dispatch. */ - blk_mq_request_bypass_insert(rq, flags & BLK_MQ_INSERT_AT_HEAD); + blk_mq_request_bypass_insert(rq, flags); } else if (rq->rq_flags & RQF_FLUSH_SEQ) { /* * Firstly normal IO request is inserted to scheduler queue or @@ -2549,7 +2549,7 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) * Simply queue flush rq to the front of hctx->dispatch so that * intensive flush workloads can benefit in case of NCQ HW. */ - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, BLK_MQ_INSERT_AT_HEAD); } else if (q->elevator) { LIST_HEAD(list); @@ -2670,7 +2670,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, 0); blk_mq_run_hw_queue(hctx, false); break; default: @@ -2718,7 +2718,7 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug) break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, 0); blk_mq_run_hw_queue(hctx, false); goto out; default: @@ -2837,7 +2837,7 @@ static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; case BLK_STS_RESOURCE: case BLK_STS_DEV_RESOURCE: - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, 0); if (list_empty(list)) blk_mq_run_hw_queue(hctx, false); goto out; diff --git a/block/blk-mq.h b/block/blk-mq.h index 273eee00524b..bb16c0a54411 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -67,7 +67,7 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, /* * Internal helpers for request insertion into sw queues */ -void blk_mq_request_bypass_insert(struct request *rq, bool at_head); +void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags); /* * CPU -> queue mappings -- cgit v1.2.3 From 93fffe16f7ee18600f15838e2e8b5cf353f245c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:56 +0200 Subject: blk-mq: pass a flags argument to elevator_type->insert_requests Instead of passing a bool at_head, pass down the full flags from the blk_mq_insert_request interface. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-20-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 16 ++++++++-------- block/blk-mq.c | 5 ++--- block/elevator.h | 4 +++- block/kyber-iosched.c | 5 +++-- block/mq-deadline.c | 9 +++++---- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 37f68c907ac0..b4c4b4808c6c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6231,7 +6231,7 @@ static inline void bfq_update_insert_stats(struct request_queue *q, static struct bfq_queue *bfq_init_rq(struct request *rq); static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool at_head) + blk_insert_t flags) { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; @@ -6254,11 +6254,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, trace_block_rq_insert(rq); - if (!bfqq || at_head) { - if (at_head) - list_add(&rq->queuelist, &bfqd->dispatch); - else - list_add_tail(&rq->queuelist, &bfqd->dispatch); + if (flags & BLK_MQ_INSERT_AT_HEAD) { + list_add(&rq->queuelist, &bfqd->dispatch); + } else if (!bfqq) { + list_add_tail(&rq->queuelist, &bfqd->dispatch); } else { idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* @@ -6288,14 +6287,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, - struct list_head *list, bool at_head) + struct list_head *list, + blk_insert_t flags) { while (!list_empty(list)) { struct request *rq; rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); - bfq_insert_request(hctx, rq, at_head); + bfq_insert_request(hctx, rq, flags); } } diff --git a/block/blk-mq.c b/block/blk-mq.c index ff74559d7da1..6c3db1a15dad 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2556,8 +2556,7 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) WARN_ON_ONCE(rq->tag != BLK_MQ_NO_TAG); list_add(&rq->queuelist, &list); - q->elevator->type->ops.insert_requests(hctx, &list, - flags & BLK_MQ_INSERT_AT_HEAD); + q->elevator->type->ops.insert_requests(hctx, &list, flags); } else { trace_block_rq_insert(rq); @@ -2768,7 +2767,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) percpu_ref_get(&this_hctx->queue->q_usage_counter); if (this_hctx->queue->elevator) { this_hctx->queue->elevator->type->ops.insert_requests(this_hctx, - &list, false); + &list, 0); blk_mq_run_hw_queue(this_hctx, from_sched); } else { blk_mq_insert_requests(this_hctx, this_ctx, &list, from_sched); diff --git a/block/elevator.h b/block/elevator.h index 774a8f6b99e6..7ca3d7b6ed82 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -4,6 +4,7 @@ #include #include +#include "blk-mq.h" struct io_cq; struct elevator_type; @@ -37,7 +38,8 @@ struct elevator_mq_ops { void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *); void (*prepare_request)(struct request *); void (*finish_request)(struct request *); - void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); + void (*insert_requests)(struct blk_mq_hw_ctx *hctx, struct list_head *list, + blk_insert_t flags); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); void (*completed_request)(struct request *, u64); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 3f9fb2090c91..4155594aefc6 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -588,7 +588,8 @@ static void kyber_prepare_request(struct request *rq) } static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, - struct list_head *rq_list, bool at_head) + struct list_head *rq_list, + blk_insert_t flags) { struct kyber_hctx_data *khd = hctx->sched_data; struct request *rq, *next; @@ -600,7 +601,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, spin_lock(&kcq->lock); trace_block_rq_insert(rq); - if (at_head) + if (flags & BLK_MQ_INSERT_AT_HEAD) list_move(&rq->queuelist, head); else list_move_tail(&rq->queuelist, head); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index ceae477c3571..5839a027e0f0 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -766,7 +766,7 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio, * add rq to rbtree and fifo */ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool at_head) + blk_insert_t flags) { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; @@ -799,7 +799,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, trace_block_rq_insert(rq); - if (at_head) { + if (flags & BLK_MQ_INSERT_AT_HEAD) { list_add(&rq->queuelist, &per_prio->dispatch); rq->fifo_time = jiffies; } else { @@ -823,7 +823,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list(). */ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, - struct list_head *list, bool at_head) + struct list_head *list, + blk_insert_t flags) { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; @@ -834,7 +835,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); - dd_insert_request(hctx, rq, at_head); + dd_insert_request(hctx, rq, flags); } spin_unlock(&dd->lock); } -- cgit v1.2.3 From b12e5c6c755ae8bec44723f77f037873e3d08021 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:40:57 +0200 Subject: blk-mq: pass a flags argument to blk_mq_add_to_requeue_list Replace the boolean at_head argument with the same flags that are already passed to blk_mq_insert_request. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413064057.707578-21-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 ++-- block/blk-mq.c | 6 +++--- block/blk-mq.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index 1d3af17619de..00dd2f61312d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -188,7 +188,7 @@ static void blk_flush_complete_seq(struct request *rq, case REQ_FSEQ_DATA: list_move_tail(&rq->flush.list, &fq->flush_data_in_flight); - blk_mq_add_to_requeue_list(rq, true); + blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD); blk_mq_kick_requeue_list(q); break; @@ -346,7 +346,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, smp_wmb(); req_ref_set(flush_rq, 1); - blk_mq_add_to_requeue_list(flush_rq, false); + blk_mq_add_to_requeue_list(flush_rq, BLK_MQ_INSERT_AT_HEAD); blk_mq_kick_requeue_list(q); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 6c3db1a15dad..1e35c829bddd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1419,7 +1419,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) /* this request will be re-inserted to io scheduler queue */ blk_mq_sched_requeue_request(rq); - blk_mq_add_to_requeue_list(rq, true); + blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD); if (kick_requeue_list) blk_mq_kick_requeue_list(q); @@ -1464,7 +1464,7 @@ static void blk_mq_requeue_work(struct work_struct *work) blk_mq_run_hw_queues(q, false); } -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) +void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags) { struct request_queue *q = rq->q; unsigned long flags; @@ -1476,7 +1476,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) BUG_ON(rq->rq_flags & RQF_SOFTBARRIER); spin_lock_irqsave(&q->requeue_lock, flags); - if (at_head) { + if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { rq->rq_flags |= RQF_SOFTBARRIER; list_add(&rq->queuelist, &q->requeue_list); } else { diff --git a/block/blk-mq.h b/block/blk-mq.h index bb16c0a54411..f882677ff106 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -47,7 +47,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, unsigned int); -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); +void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); -- cgit v1.2.3 From c20a1a2c1a9f5b1081121cd18be444e7610b0c6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:06:48 +0200 Subject: blk-mq: remove the blk_mq_hctx_stopped check in blk_mq_run_work_fn blk_mq_hctx_stopped is already checked in blk_mq_sched_dispatch_requests under blk_mq_run_dispatch_ops() protection, so remove the duplicate check. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413060651.694656-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 1e35c829bddd..ad13d2aa948b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2441,15 +2441,8 @@ EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); static void blk_mq_run_work_fn(struct work_struct *work) { - struct blk_mq_hw_ctx *hctx; - - hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); - - /* - * If we are stopped, don't run the queue. - */ - if (blk_mq_hctx_stopped(hctx)) - return; + struct blk_mq_hw_ctx *hctx = + container_of(work, struct blk_mq_hw_ctx, run_work.work); __blk_mq_run_hw_queue(hctx); } -- cgit v1.2.3 From cd735e11130d4c84a073e1056aa019ca0f3305f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:06:49 +0200 Subject: blk-mq: move the blk_mq_hctx_stopped check in __blk_mq_delay_run_hw_queue For the in-context dispatch, blk_mq_hctx_stopped is alredy checked in blk_mq_sched_dispatch_requests under blk_mq_run_dispatch_ops() protection. For the async dispatch case having a check before scheduling the work still makes sense to avoid needless workqueue scheduling, so just keep it for that case. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413060651.694656-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index ad13d2aa948b..2213117bb1b5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2223,9 +2223,6 @@ select_cpu: static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, unsigned long msecs) { - if (unlikely(blk_mq_hctx_stopped(hctx))) - return; - if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { __blk_mq_run_hw_queue(hctx); @@ -2233,6 +2230,8 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, } } + if (unlikely(blk_mq_hctx_stopped(hctx))) + return; kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, msecs_to_jiffies(msecs)); } -- cgit v1.2.3 From 1aa8d875b523d61347a6887e4a4ab65a6d799d40 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:06:50 +0200 Subject: blk-mq: move the !async handling out of __blk_mq_delay_run_hw_queue Only blk_mq_run_hw_queue can call __blk_mq_delay_run_hw_queue with async=false, so move the handling there. With this __blk_mq_delay_run_hw_queue can be merged into blk_mq_delay_run_hw_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413060651.694656-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 2213117bb1b5..68575c71d167 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2212,41 +2212,19 @@ select_cpu: } /** - * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. + * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. * @hctx: Pointer to the hardware queue to run. - * @async: If we want to run the queue asynchronously. * @msecs: Milliseconds of delay to wait before running the queue. * - * If !@async, try to run the queue now. Else, run the queue asynchronously and - * with a delay of @msecs. + * Run a hardware queue asynchronously with a delay of @msecs. */ -static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, - unsigned long msecs) +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) { - if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { - if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { - __blk_mq_run_hw_queue(hctx); - return; - } - } - if (unlikely(blk_mq_hctx_stopped(hctx))) return; kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, msecs_to_jiffies(msecs)); } - -/** - * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. - * @hctx: Pointer to the hardware queue to run. - * @msecs: Milliseconds of delay to wait before running the queue. - * - * Run a hardware queue asynchronously with a delay of @msecs. - */ -void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) -{ - __blk_mq_delay_run_hw_queue(hctx, true, msecs); -} EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); /** @@ -2274,8 +2252,16 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) need_run = !blk_queue_quiesced(hctx->queue) && blk_mq_hctx_has_pending(hctx)); - if (need_run) - __blk_mq_delay_run_hw_queue(hctx, async, 0); + if (!need_run) + return; + + if (async || (hctx->flags & BLK_MQ_F_BLOCKING) || + !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { + blk_mq_delay_run_hw_queue(hctx, 0); + return; + } + + __blk_mq_run_hw_queue(hctx); } EXPORT_SYMBOL(blk_mq_run_hw_queue); -- cgit v1.2.3 From 4d5bba5bee0aa002523125e51789e95d47794a06 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2023 08:06:51 +0200 Subject: blk-mq: remove __blk_mq_run_hw_queue __blk_mq_run_hw_queue just contains a WARN_ON_ONCE for calls from interrupt context and a blk_mq_run_dispatch_ops-protected call to blk_mq_sched_dispatch_requests. Open code the call to blk_mq_sched_dispatch_requests in both callers, and move the WARN_ON_ONCE to blk_mq_run_hw_queue where it can be extended to all !async calls, while the other call is from workqueue context and thus obviously does not need the assert. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230413060651.694656-6-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 68575c71d167..c2d297efe229 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2138,24 +2138,6 @@ out: return true; } -/** - * __blk_mq_run_hw_queue - Run a hardware queue. - * @hctx: Pointer to the hardware queue to run. - * - * Send pending requests to the hardware. - */ -static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) -{ - /* - * We can't run the queue inline with ints disabled. Ensure that - * we catch bad users of this early. - */ - WARN_ON_ONCE(in_interrupt()); - - blk_mq_run_dispatch_ops(hctx->queue, - blk_mq_sched_dispatch_requests(hctx)); -} - static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) { int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); @@ -2240,6 +2222,11 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { bool need_run; + /* + * We can't run the queue inline with interrupts disabled. + */ + WARN_ON_ONCE(!async && in_interrupt()); + /* * When queue is quiesced, we may be switching io scheduler, or * updating nr_hw_queues, or other things, and we can't run queue @@ -2261,7 +2248,8 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) return; } - __blk_mq_run_hw_queue(hctx); + blk_mq_run_dispatch_ops(hctx->queue, + blk_mq_sched_dispatch_requests(hctx)); } EXPORT_SYMBOL(blk_mq_run_hw_queue); @@ -2429,7 +2417,8 @@ static void blk_mq_run_work_fn(struct work_struct *work) struct blk_mq_hw_ctx *hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); - __blk_mq_run_hw_queue(hctx); + blk_mq_run_dispatch_ops(hctx->queue, + blk_mq_sched_dispatch_requests(hctx)); } /** -- cgit v1.2.3