summaryrefslogtreecommitdiff
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 22:02:48 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 22:02:48 +0300
commit582cd91f69de8e44857cb610ebca661dac8656b7 (patch)
tree0d680db02a5c236ee87b408b3f13ce33ebaca907 /block/blk-mq.c
parentbd018bbaa58640da786d4289563e71c5ef3938c7 (diff)
parentf885056a48ccf4ad4332def91e973f3993fa8695 (diff)
downloadlinux-582cd91f69de8e44857cb610ebca661dac8656b7.tar.xz
Merge tag 'for-5.12/block-2021-02-17' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "Another nice round of removing more code than what is added, mostly due to Christoph's relentless pursuit of tech debt removal/cleanups. This pull request contains: - Two series of BFQ improvements (Paolo, Jan, Jia) - Block iov_iter improvements (Pavel) - bsg error path fix (Pan) - blk-mq scheduler improvements (Jan) - -EBUSY discard fix (Jan) - bvec allocation improvements (Ming, Christoph) - bio allocation and init improvements (Christoph) - Store bdev pointer in bio instead of gendisk + partno (Christoph) - Block trace point cleanups (Christoph) - hard read-only vs read-only split (Christoph) - Block based swap cleanups (Christoph) - Zoned write granularity support (Damien) - Various fixes/tweaks (Chunguang, Guoqing, Lei, Lukas, Huhai)" * tag 'for-5.12/block-2021-02-17' of git://git.kernel.dk/linux-block: (104 commits) mm: simplify swapdev_block sd_zbc: clear zone resources for non-zoned case block: introduce blk_queue_clear_zone_settings() zonefs: use zone write granularity as block size block: introduce zone_write_granularity limit block: use blk_queue_set_zoned in add_partition() nullb: use blk_queue_set_zoned() to setup zoned devices nvme: cleanup zone information initialization block: document zone_append_max_bytes attribute block: use bi_max_vecs to find the bvec pool md/raid10: remove dead code in reshape_request block: mark the bio as cloned in bio_iov_bvec_set block: set BIO_NO_PAGE_REF in bio_iov_bvec_set block: remove a layer of indentation in bio_iov_iter_get_pages block: turn the nr_iovecs argument to bio_alloc* into an unsigned short block: remove the 1 and 4 vec bvec_slabs entries block: streamline bvec_alloc block: factor out a bvec_alloc_gfp helper block: move struct biovec_slab to bio.c block: reuse BIO_INLINE_VECS for integrity bvecs ...
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c69
1 files changed, 61 insertions, 8 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f285a9123a8b..f21d922ecfaf 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1646,6 +1646,42 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
+/*
+ * Is the request queue handled by an IO scheduler that does not respect
+ * hardware queues when dispatching?
+ */
+static bool blk_mq_has_sqsched(struct request_queue *q)
+{
+ struct elevator_queue *e = q->elevator;
+
+ if (e && e->type->ops.dispatch_request &&
+ !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
+ return true;
+ return false;
+}
+
+/*
+ * Return prefered queue to dispatch from (if any) for non-mq aware IO
+ * scheduler.
+ */
+static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+
+ /*
+ * If the IO scheduler does not respect hardware queues when
+ * dispatching, we just don't bother with multiple HW queues and
+ * dispatch from hctx for the current CPU since running multiple queues
+ * just causes lock contention inside the scheduler and pointless cache
+ * bouncing.
+ */
+ hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
+ raw_smp_processor_id());
+ if (!blk_mq_hctx_stopped(hctx))
+ return hctx;
+ return NULL;
+}
+
/**
* blk_mq_run_hw_queues - Run all hardware queues in a request queue.
* @q: Pointer to the request queue to run.
@@ -1653,14 +1689,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queue);
*/
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
{
- struct blk_mq_hw_ctx *hctx;
+ struct blk_mq_hw_ctx *hctx, *sq_hctx;
int i;
+ sq_hctx = NULL;
+ if (blk_mq_has_sqsched(q))
+ sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
continue;
-
- blk_mq_run_hw_queue(hctx, async);
+ /*
+ * Dispatch from this hctx either if there's no hctx preferred
+ * by IO scheduler or if it has requests that bypass the
+ * scheduler.
+ */
+ if (!sq_hctx || sq_hctx == hctx ||
+ !list_empty_careful(&hctx->dispatch))
+ blk_mq_run_hw_queue(hctx, async);
}
}
EXPORT_SYMBOL(blk_mq_run_hw_queues);
@@ -1672,14 +1717,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues);
*/
void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
{
- struct blk_mq_hw_ctx *hctx;
+ struct blk_mq_hw_ctx *hctx, *sq_hctx;
int i;
+ sq_hctx = NULL;
+ if (blk_mq_has_sqsched(q))
+ sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
continue;
-
- blk_mq_delay_run_hw_queue(hctx, msecs);
+ /*
+ * Dispatch from this hctx either if there's no hctx preferred
+ * by IO scheduler or if it has requests that bypass the
+ * scheduler.
+ */
+ if (!sq_hctx || sq_hctx == hctx ||
+ !list_empty_careful(&hctx->dispatch))
+ blk_mq_delay_run_hw_queue(hctx, msecs);
}
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
@@ -2128,7 +2182,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
*/
blk_qc_t blk_mq_submit_bio(struct bio *bio)
{
- struct request_queue *q = bio->bi_disk->queue;
+ struct request_queue *q = bio->bi_bdev->bd_disk->queue;
const int is_sync = op_is_sync(bio->bi_opf);
const int is_flush_fua = op_is_flush(bio->bi_opf);
struct blk_mq_alloc_data data = {
@@ -2653,7 +2707,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
goto free_hctx;
atomic_set(&hctx->nr_active, 0);
- atomic_set(&hctx->elevator_queued, 0);
if (node == NUMA_NO_NODE)
node = set->numa_node;
hctx->numa_node = node;