From 15f73f5b3e5958f2d169fe13c420eeeeae07bbf2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jun 2020 08:44:47 +0200 Subject: blk-mq: move failure injection out of blk_mq_complete_request Move the call to blk_should_fake_timeout out of blk_mq_complete_request and into the drivers, skipping call sites that are obvious error handlers, and remove the now superflous blk_mq_force_complete_rq helper. This ensures we don't keep injecting errors into completions that just terminate the Linux request after the hardware has been reset or the command has been aborted. Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk.h b/block/blk.h index b5d1f0fc6547..8ba4a5e4fe07 100644 --- a/block/blk.h +++ b/block/blk.h @@ -223,18 +223,9 @@ ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); - -#ifdef CONFIG_FAIL_IO_TIMEOUT -int blk_should_fake_timeout(struct request_queue *); ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); ssize_t part_timeout_store(struct device *, struct device_attribute *, const char *, size_t); -#else -static inline int blk_should_fake_timeout(struct request_queue *q) -{ - return 0; -} -#endif void __blk_queue_split(struct request_queue *q, struct bio **bio, unsigned int *nr_segs); -- cgit v1.2.3 From 85e0cbbb8a79537dbc465e9deb449a08b2b092a6 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 19 Jun 2020 20:47:30 +0000 Subject: block: create the request_queue debugfs_dir on registration We were only creating the request_queue debugfs_dir only for make_request block drivers (multiqueue), but never for request-based block drivers. We did this as we were only creating non-blktrace additional debugfs files on that directory for make_request drivers. However, since blktrace *always* creates that directory anyway, we special-case the use of that directory on blktrace. Other than this being an eye-sore, this exposes request-based block drivers to the same debugfs fragile race that used to exist with make_request block drivers where if we start adding files onto that directory we can later run a race with a double removal of dentries on the directory if we don't deal with this carefully on blktrace. Instead, just simplify things by always creating the request_queue debugfs_dir on request_queue registration. Rename the mutex also to reflect the fact that this is used outside of the blktrace context. Signed-off-by: Luis Chamberlain Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 8 +------ block/blk-mq-debugfs.c | 5 ----- block/blk-sysfs.c | 9 ++++++++ block/blk.h | 2 -- include/linux/blkdev.h | 5 +++-- kernel/trace/blktrace.c | 58 ++++++++++++++++++++++--------------------------- 6 files changed, 39 insertions(+), 48 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-core.c b/block/blk-core.c index a99b22fac38a..a9769c1a2875 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -51,9 +51,7 @@ #include "blk-pm.h" #include "blk-rq-qos.h" -#ifdef CONFIG_DEBUG_FS struct dentry *blk_debugfs_root; -#endif EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -555,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id) kobject_init(&q->kobj, &blk_queue_ktype); -#ifdef CONFIG_BLK_DEV_IO_TRACE - mutex_init(&q->blk_trace_mutex); -#endif + mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); mutex_init(&q->sysfs_dir_lock); spin_lock_init(&q->queue_lock); @@ -1931,9 +1927,7 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); -#ifdef CONFIG_DEBUG_FS blk_debugfs_root = debugfs_create_dir("block", NULL); -#endif return 0; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 15df3a36e9fa..a2800bc56fb4 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -824,9 +824,6 @@ void blk_mq_debugfs_register(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), - blk_debugfs_root); - debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); /* @@ -857,9 +854,7 @@ void blk_mq_debugfs_register(struct request_queue *q) void blk_mq_debugfs_unregister(struct request_queue *q) { - debugfs_remove_recursive(q->debugfs_dir); q->sched_debugfs_dir = NULL; - q->debugfs_dir = NULL; } static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 561624d4cc4e..be67952e7be2 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "blk.h" #include "blk-mq.h" @@ -917,6 +918,9 @@ static void blk_release_queue(struct kobject *kobj) blk_mq_release(q); blk_trace_shutdown(q); + mutex_lock(&q->debugfs_mutex); + debugfs_remove_recursive(q->debugfs_dir); + mutex_unlock(&q->debugfs_mutex); if (queue_is_mq(q)) blk_mq_debugfs_unregister(q); @@ -989,6 +993,11 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } + mutex_lock(&q->debugfs_mutex); + q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), + blk_debugfs_root); + mutex_unlock(&q->debugfs_mutex); + if (queue_is_mq(q)) { __blk_mq_register_dev(dev, q); blk_mq_debugfs_register(q); diff --git a/block/blk.h b/block/blk.h index 8ba4a5e4fe07..3a120a070dac 100644 --- a/block/blk.h +++ b/block/blk.h @@ -14,9 +14,7 @@ /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) -#ifdef CONFIG_DEBUG_FS extern struct dentry *blk_debugfs_root; -#endif struct blk_flush_queue { unsigned int flush_pending_idx:1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e214e0e9f868..c0701237116d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -528,9 +528,9 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; - struct mutex blk_trace_mutex; #endif /* * for flush operations @@ -574,8 +574,9 @@ struct request_queue { struct list_head tag_set_list; struct bio_set bio_split; -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 46c273f4dec5..c086c38f4954 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -348,7 +348,7 @@ static int __blk_trace_remove(struct request_queue *q) struct blk_trace *bt; bt = rcu_replace_pointer(q->blk_trace, NULL, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (!bt) return -EINVAL; @@ -362,9 +362,9 @@ int blk_trace_remove(struct request_queue *q) { int ret; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_remove(q); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -483,14 +483,11 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct dentry *dir = NULL; int ret; - lockdep_assert_held(&q->blk_trace_mutex); + lockdep_assert_held(&q->debugfs_mutex); if (!buts->buf_size || !buts->buf_nr) return -EINVAL; - if (!blk_debugfs_root) - return -ENOENT; - strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; @@ -505,7 +502,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * we can be. */ if (rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex))) { + lockdep_is_held(&q->debugfs_mutex))) { pr_warn("Concurrent blktraces are not allowed on %s\n", buts->name); return -EBUSY; @@ -524,18 +521,15 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->msg_data) goto err; -#ifdef CONFIG_BLK_DEBUG_FS /* - * When tracing whole make_request drivers (multiqueue) block devices, - * reuse the existing debugfs directory created by the block layer on - * init. For request-based block devices, all partitions block devices, + * When tracing the whole disk reuse the existing debugfs directory + * created by the block layer on init. For partitions block devices, * and scsi-generic block devices we create a temporary new debugfs * directory that will be removed once the trace ends. */ - if (queue_is_mq(q) && bdev && bdev == bdev->bd_contains) + if (bdev && bdev == bdev->bd_contains) dir = q->debugfs_dir; else -#endif bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); /* @@ -617,9 +611,9 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, { int ret; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_setup(q, name, dev, bdev, arg); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -665,7 +659,7 @@ static int __blk_trace_startstop(struct request_queue *q, int start) struct blk_trace *bt; bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL; @@ -705,9 +699,9 @@ int blk_trace_startstop(struct request_queue *q, int start) { int ret; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_startstop(q, start); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -736,7 +730,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) if (!q) return -ENXIO; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); switch (cmd) { case BLKTRACESETUP: @@ -763,7 +757,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) break; } - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -774,14 +768,14 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex))) { + lockdep_is_held(&q->debugfs_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); } #ifdef CONFIG_BLK_CGROUP @@ -1662,7 +1656,7 @@ static int blk_trace_remove_queue(struct request_queue *q) struct blk_trace *bt; bt = rcu_replace_pointer(q->blk_trace, NULL, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL; @@ -1837,10 +1831,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (q == NULL) goto out_bdput; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; @@ -1858,7 +1852,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); out_bdput: bdput(bdev); out: @@ -1901,10 +1895,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (q == NULL) goto out_bdput; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { if (!!value == !!bt) { ret = 0; @@ -1921,7 +1915,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); } if (ret == 0) { @@ -1936,7 +1930,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } out_unlock_bdev: - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); out_bdput: bdput(bdev); out: -- cgit v1.2.3 From db18a53e5ba840993a3fc908dec648402ed740bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 Jun 2020 09:31:58 +0200 Subject: blk-cgroup: remove blkcg_bio_issue_check blkcg_bio_issue_check is a giant inline function that does three entirely different things. Factor out the blk-cgroup related bio initalization into a new helper, and the open code the sequence in the only caller, relying on the fact that all the actual functionality is stubbed out for non-cgroup builds. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 34 ++++++++++++++++++++++++++++ block/blk-core.c | 7 +++++- block/blk-throttle.c | 5 +++-- block/blk.h | 2 ++ include/linux/blk-cgroup.h | 56 ++-------------------------------------------- 5 files changed, 47 insertions(+), 57 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index d21ec2acd716..1ce94afc03bc 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1813,6 +1813,40 @@ void bio_clone_blkg_association(struct bio *dst, struct bio *src) } EXPORT_SYMBOL_GPL(bio_clone_blkg_association); +static int blk_cgroup_io_type(struct bio *bio) +{ + if (op_is_discard(bio->bi_opf)) + return BLKG_IOSTAT_DISCARD; + if (op_is_write(bio->bi_opf)) + return BLKG_IOSTAT_WRITE; + return BLKG_IOSTAT_READ; +} + +void blk_cgroup_bio_start(struct bio *bio) +{ + int rwd = blk_cgroup_io_type(bio), cpu; + struct blkg_iostat_set *bis; + + cpu = get_cpu(); + bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); + u64_stats_update_begin(&bis->sync); + + /* + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split + * bio and we would have already accounted for the size of the bio. + */ + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { + bio_set_flag(bio, BIO_CGROUP_ACCT); + bis->cur.bytes[rwd] += bio->bi_iter.bi_size; + } + bis->cur.ios[rwd]++; + + u64_stats_update_end(&bis->sync); + if (cgroup_subsys_on_dfl(io_cgrp_subsys)) + cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu); + put_cpu(); +} + static int __init blkcg_init(void) { blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", diff --git a/block/blk-core.c b/block/blk-core.c index a9769c1a2875..76cfd5709f66 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1073,8 +1073,13 @@ generic_make_request_checks(struct bio *bio) if (unlikely(!current->io_context)) create_task_io_context(current, GFP_ATOMIC, q->node); - if (!blkcg_bio_issue_check(q, bio)) + if (blk_throtl_bio(bio)) { + blkcg_bio_issue_init(bio); return false; + } + + blk_cgroup_bio_start(bio); + blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_queue(q, bio); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index ac0083450500..9d00f62c05ec 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2158,9 +2158,10 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, - struct bio *bio) +bool blk_throtl_bio(struct bio *bio) { + struct request_queue *q = bio->bi_disk->queue; + struct blkcg_gq *blkg = bio->bi_blkg; struct throtl_qnode *qn = NULL; struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); struct throtl_service_queue *sq; diff --git a/block/blk.h b/block/blk.h index 3a120a070dac..41a50880c94e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -288,10 +288,12 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern void blk_throtl_register_queue(struct request_queue *q); +bool blk_throtl_bio(struct bio *bio); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } static inline void blk_throtl_register_queue(struct request_queue *q) { } +static inline bool blk_throtl_bio(struct bio *bio) { return false; } #endif /* CONFIG_BLK_DEV_THROTTLING */ #ifdef CONFIG_BLK_DEV_THROTTLING_LOW extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 8ab043c911f2..431b2d18bf40 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -517,14 +517,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -#ifdef CONFIG_BLK_DEV_THROTTLING -extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, - struct bio *bio); -#else -static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, - struct bio *bio) { return false; } -#endif - bool __blkcg_punt_bio_submit(struct bio *bio); static inline bool blkcg_punt_bio_submit(struct bio *bio) @@ -540,50 +532,6 @@ static inline void blkcg_bio_issue_init(struct bio *bio) bio_issue_init(&bio->bi_issue, bio_sectors(bio)); } -static inline bool blkcg_bio_issue_check(struct request_queue *q, - struct bio *bio) -{ - struct blkcg_gq *blkg = bio->bi_blkg; - bool throtl = false; - - throtl = blk_throtl_bio(q, blkg, bio); - if (!throtl) { - struct blkg_iostat_set *bis; - int rwd, cpu; - - if (op_is_discard(bio->bi_opf)) - rwd = BLKG_IOSTAT_DISCARD; - else if (op_is_write(bio->bi_opf)) - rwd = BLKG_IOSTAT_WRITE; - else - rwd = BLKG_IOSTAT_READ; - - cpu = get_cpu(); - bis = per_cpu_ptr(blkg->iostat_cpu, cpu); - u64_stats_update_begin(&bis->sync); - - /* - * If the bio is flagged with BIO_CGROUP_ACCT it means this is a - * split bio and we would have already accounted for the size of - * the bio. - */ - if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { - bio_set_flag(bio, BIO_CGROUP_ACCT); - bis->cur.bytes[rwd] += bio->bi_iter.bi_size; - } - bis->cur.ios[rwd]++; - - u64_stats_update_end(&bis->sync); - if (cgroup_subsys_on_dfl(io_cgrp_subsys)) - cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); - put_cpu(); - } - - blkcg_bio_issue_init(bio); - - return !throtl; -} - static inline void blkcg_use_delay(struct blkcg_gq *blkg) { if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) @@ -657,6 +605,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } +void blk_cgroup_bio_start(struct bio *bio); void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); void blkcg_maybe_throttle_current(void); @@ -710,8 +659,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { } static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } -static inline bool blkcg_bio_issue_check(struct request_queue *q, - struct bio *bio) { return true; } +static inline void blk_cgroup_bio_start(struct bio *bio) { } #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) -- cgit v1.2.3 From 37f4a24c2469a10a4c16c641671bd766e276cf9f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 30 Jun 2020 22:03:57 +0800 Subject: blk-mq: centralise related handling into blk_mq_get_driver_tag Move .nr_active update and request assignment into blk_mq_get_driver_tag(), all are good to do during getting driver tag. Meantime blk-flush related code is simplified and flush request needn't to update the request table manually any more. Signed-off-by: Ming Lei Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 17 ++++++----------- block/blk-mq-tag.h | 12 ------------ block/blk-mq.c | 30 ++++++++++++++---------------- block/blk.h | 5 ----- 4 files changed, 20 insertions(+), 44 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-flush.c b/block/blk-flush.c index 21108a550fbf..e756db088d84 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -236,12 +236,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) error = fq->rq_status; hctx = flush_rq->mq_hctx; - if (!q->elevator) { - blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); - flush_rq->tag = -1; - } else { - flush_rq->internal_tag = -1; - } + if (!q->elevator) + flush_rq->tag = BLK_MQ_NO_TAG; + else + flush_rq->internal_tag = BLK_MQ_NO_TAG; running = &fq->flush_queue[fq->flush_running_idx]; BUG_ON(fq->flush_pending_idx == fq->flush_running_idx); @@ -315,13 +313,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->mq_ctx = first_rq->mq_ctx; flush_rq->mq_hctx = first_rq->mq_hctx; - if (!q->elevator) { - fq->orig_rq = first_rq; + if (!q->elevator) flush_rq->tag = first_rq->tag; - blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq); - } else { + else flush_rq->internal_tag = first_rq->internal_tag; - } flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK); diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 3945c7f5b944..b1acac518c4e 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -101,18 +101,6 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return atomic_read(&hctx->nr_active) < depth; } -/* - * This helper should only be used for flush request to share tag - * with the request cloned from, and both the two requests can't be - * in flight at the same time. The caller has to make sure the tag - * can't be freed. - */ -static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx, - unsigned int tag, struct request *rq) -{ - hctx->tags->rqs[tag] = rq; -} - static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, unsigned int tag) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 803d4ee3b8de..65e0846fd065 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -277,26 +277,20 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; - req_flags_t rq_flags = 0; if (data->q->elevator) { rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = tag; } else { - if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) { - rq_flags = RQF_MQ_INFLIGHT; - atomic_inc(&data->hctx->nr_active); - } rq->tag = tag; rq->internal_tag = BLK_MQ_NO_TAG; - data->hctx->tags->rqs[rq->tag] = rq; } /* csd/requeue_work/fifo_time is initialized before use */ rq->q = data->q; rq->mq_ctx = data->ctx; rq->mq_hctx = data->hctx; - rq->rq_flags = rq_flags; + rq->rq_flags = 0; rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; @@ -1127,9 +1121,10 @@ static bool __blk_mq_get_driver_tag(struct request *rq) { struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; - bool shared = blk_mq_tag_busy(rq->mq_hctx); int tag; + blk_mq_tag_busy(rq->mq_hctx); + if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { bt = &rq->mq_hctx->tags->breserved_tags; tag_offset = 0; @@ -1142,19 +1137,22 @@ static bool __blk_mq_get_driver_tag(struct request *rq) return false; rq->tag = tag + tag_offset; - if (shared) { - rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&rq->mq_hctx->nr_active); - } - rq->mq_hctx->tags->rqs[rq->tag] = rq; return true; } static bool blk_mq_get_driver_tag(struct request *rq) { - if (rq->tag != BLK_MQ_NO_TAG) - return true; - return __blk_mq_get_driver_tag(rq); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) + return false; + + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + rq->rq_flags |= RQF_MQ_INFLIGHT; + atomic_inc(&hctx->nr_active); + } + hctx->tags->rqs[rq->tag] = rq; + return true; } static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, diff --git a/block/blk.h b/block/blk.h index 41a50880c94e..0184a31fe4df 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,11 +25,6 @@ struct blk_flush_queue { struct list_head flush_data_in_flight; struct request *flush_rq; - /* - * flush_rq shares tag with this rq, both can't be active - * at the same time - */ - struct request *orig_rq; struct lock_class_key key; spinlock_t mq_flush_lock; }; -- cgit v1.2.3 From f695ca3886ce72b027af7aa6040cd420cae2088c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Jul 2020 10:59:39 +0200 Subject: block: remove the request_queue argument from blk_queue_split The queue can be trivially derived from the bio, so pass one less argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-merge.c | 21 ++++++++++----------- block/blk-mq.c | 2 +- block/blk.h | 3 +-- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3vram.c | 2 +- drivers/block/rsxx/dev.c | 2 +- drivers/block/umem.c | 2 +- drivers/lightnvm/pblk-init.c | 4 ++-- drivers/md/dm.c | 2 +- drivers/md/md.c | 2 +- drivers/nvme/host/multipath.c | 9 ++++----- drivers/s390/block/dcssblk.c | 2 +- drivers/s390/block/xpram.c | 2 +- include/linux/blkdev.h | 2 +- 15 files changed, 28 insertions(+), 31 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-merge.c b/block/blk-merge.c index 9c9fb21584b6..20fa22906041 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -283,20 +283,20 @@ split: /** * __blk_queue_split - split a bio and submit the second half - * @q: [in] request queue pointer * @bio: [in, out] bio to be split * @nr_segs: [out] number of segments in the first bio * * Split a bio into two bios, chain the two bios, submit the second half and * store a pointer to the first half in *@bio. If the second bio is still too * big it will be split by a recursive call to this function. Since this - * function may allocate a new bio from @q->bio_split, it is the responsibility - * of the caller to ensure that @q is only released after processing of the + * function may allocate a new bio from @bio->bi_disk->queue->bio_split, it is + * the responsibility of the caller to ensure that + * @bio->bi_disk->queue->bio_split is only released after processing of the * split bio has finished. */ -void __blk_queue_split(struct request_queue *q, struct bio **bio, - unsigned int *nr_segs) +void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) { + struct request_queue *q = (*bio)->bi_disk->queue; struct bio *split = NULL; switch (bio_op(*bio)) { @@ -345,20 +345,19 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, /** * blk_queue_split - split a bio and submit the second half - * @q: [in] request queue pointer * @bio: [in, out] bio to be split * * Split a bio into two bios, chains the two bios, submit the second half and * store a pointer to the first half in *@bio. Since this function may allocate - * a new bio from @q->bio_split, it is the responsibility of the caller to - * ensure that @q is only released after processing of the split bio has - * finished. + * a new bio from @bio->bi_disk->queue->bio_split, it is the responsibility of + * the caller to ensure that @bio->bi_disk->queue->bio_split is only released + * after processing of the split bio has finished. */ -void blk_queue_split(struct request_queue *q, struct bio **bio) +void blk_queue_split(struct bio **bio) { unsigned int nr_segs; - __blk_queue_split(q, bio, &nr_segs); + __blk_queue_split(bio, &nr_segs); } EXPORT_SYMBOL(blk_queue_split); diff --git a/block/blk-mq.c b/block/blk-mq.c index 65e0846fd065..dbadb7defd61 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2166,7 +2166,7 @@ blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_status_t ret; blk_queue_bounce(q, &bio); - __blk_queue_split(q, &bio, &nr_segs); + __blk_queue_split(&bio, &nr_segs); if (!bio_integrity_prep(bio)) goto queue_exit; diff --git a/block/blk.h b/block/blk.h index 0184a31fe4df..0114fd92c8a0 100644 --- a/block/blk.h +++ b/block/blk.h @@ -220,8 +220,7 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); ssize_t part_timeout_store(struct device *, struct device_attribute *, const char *, size_t); -void __blk_queue_split(struct request_queue *q, struct bio **bio, - unsigned int *nr_segs); +void __blk_queue_split(struct bio **bio, unsigned int *nr_segs); int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs); int ll_front_merge_fn(struct request *req, struct bio *bio, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3f09b2ab9778..936868047422 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1598,7 +1598,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio) struct drbd_device *device = bio->bi_disk->private_data; unsigned long start_jif; - blk_queue_split(q, &bio); + blk_queue_split(&bio); start_jif = jiffies; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 27a33adc41e4..29b0c62dc86c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2434,7 +2434,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) char b[BDEVNAME_SIZE]; struct bio *split; - blk_queue_split(q, &bio); + blk_queue_split(&bio); pd = q->queuedata; if (!pd) { diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 5a1d1d137c72..76cc584aa763 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -593,7 +593,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); - blk_queue_split(q, &bio); + blk_queue_split(&bio); spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 6a4d8d26e32c..1d52bc73dd0f 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -123,7 +123,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio) struct rsxx_bio_meta *bio_meta; blk_status_t st = BLK_STS_IOERR; - blk_queue_split(q, &bio); + blk_queue_split(&bio); might_sleep(); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 5498f1cf36b3..3b89c07f9e9d 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -527,7 +527,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio) (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size); - blk_queue_split(q, &bio); + blk_queue_split(&bio); spin_lock_irq(&card->lock); *card->biotail = bio; diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 6e677ff62cc9..7a4a1b7a941d 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -63,7 +63,7 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) * constraint. Writes can be of arbitrary size. */ if (bio_data_dir(bio) == READ) { - blk_queue_split(q, &bio); + blk_queue_split(&bio); pblk_submit_read(pblk, bio); } else { /* Prevent deadlock in the case of a modest LUN configuration @@ -71,7 +71,7 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) * leaves at least 256KB available for user I/O. */ if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) - blk_queue_split(q, &bio); + blk_queue_split(&bio); pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c8d91f271c27..5aa7a604f4cb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1776,7 +1776,7 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, */ if (current->bio_list) { if (is_abnormal_io(bio)) - blk_queue_split(md->queue, &bio); + blk_queue_split(&bio); else dm_queue_split(md, ti, &bio); } diff --git a/drivers/md/md.c b/drivers/md/md.c index f567f536b529..ff20868e5e1b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -475,7 +475,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } - blk_queue_split(q, &bio); + blk_queue_split(&bio); if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index da78e499947a..5a5205ea570a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -301,12 +301,11 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, int srcu_idx; /* - * The namespace might be going away and the bio might - * be moved to a different queue via blk_steal_bios(), - * so we need to use the bio_split pool from the original - * queue to allocate the bvecs from. + * The namespace might be going away and the bio might be moved to a + * different queue via blk_steal_bios(), so we need to use the bio_split + * pool from the original queue to allocate the bvecs from. */ - blk_queue_split(q, &bio); + blk_queue_split(&bio); srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 384edffe5cb4..dfe21eb72760 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -878,7 +878,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) unsigned long source_addr; unsigned long bytes_done; - blk_queue_split(q, &bio); + blk_queue_split(&bio); bytes_done = 0; dev_info = bio->bi_disk->private_data; diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 45a04daec89e..5456f0ad5a40 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -191,7 +191,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio) unsigned long page_addr; unsigned long bytes; - blk_queue_split(q, &bio); + blk_queue_split(&bio); if ((bio->bi_iter.bi_sector & 7) != 0 || (bio->bi_iter.bi_size & 4095) != 0) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 15497782c176..d002defc1789 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,7 +871,7 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_queue_split(struct request_queue *, struct bio **); +extern void blk_queue_split(struct bio **); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); -- cgit v1.2.3 From c62b37d96b6eb3ec5ae4cbe00db107bf15aebc93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Jul 2020 10:59:43 +0200 Subject: block: move ->make_request_fn to struct block_device_operations The make_request_fn is a little weird in that it sits directly in struct request_queue instead of an operation vector. Replace it with a block_device_operations method called submit_bio (which describes much better what it does). Also remove the request_queue argument to it, as the queue can be derived pretty trivially from the bio. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/biodoc.rst | 2 +- Documentation/block/writeback_cache_control.rst | 2 +- arch/m68k/emu/nfblock.c | 5 ++- arch/xtensa/platforms/iss/simdisk.c | 5 ++- block/blk-cgroup.c | 2 +- block/blk-core.c | 53 +++++++++---------------- block/blk-mq.c | 10 ++--- block/blk.h | 2 - drivers/block/brd.c | 5 ++- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 9 +++-- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/null_blk_main.c | 17 ++++++-- drivers/block/pktcdvd.c | 11 ++--- drivers/block/ps3vram.c | 15 ++++--- drivers/block/rsxx/dev.c | 7 +++- drivers/block/umem.c | 5 ++- drivers/block/zram/zram_drv.c | 11 ++--- drivers/lightnvm/core.c | 8 +--- drivers/lightnvm/pblk-init.c | 12 ++++-- drivers/md/bcache/request.c | 4 +- drivers/md/bcache/request.h | 4 +- drivers/md/bcache/super.c | 23 +++++++---- drivers/md/dm.c | 23 ++++++----- drivers/md/md.c | 5 ++- drivers/nvdimm/blk.c | 5 ++- drivers/nvdimm/btt.c | 5 ++- drivers/nvdimm/pmem.c | 5 ++- drivers/nvme/host/core.c | 1 + drivers/nvme/host/multipath.c | 5 +-- drivers/nvme/host/nvme.h | 1 + drivers/s390/block/dcssblk.c | 9 ++--- drivers/s390/block/xpram.c | 6 +-- include/linux/blk-mq.h | 2 +- include/linux/blkdev.h | 7 +--- include/linux/lightnvm.h | 3 +- 36 files changed, 153 insertions(+), 140 deletions(-) (limited to 'block/blk.h') diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst index b964796ec9c7..267384159bf7 100644 --- a/Documentation/block/biodoc.rst +++ b/Documentation/block/biodoc.rst @@ -1036,7 +1036,7 @@ Now the generic block layer performs partition-remapping early and thus provides drivers with a sector number relative to whole device, rather than having to take partition number into account in order to arrive at the true sector number. The routine blk_partition_remap() is invoked by -generic_make_request even before invoking the queue specific make_request_fn, +generic_make_request even before invoking the queue specific ->submit_bio, so the i/o scheduler also gets to operate on whole disk sector numbers. This should typically not require changes to block drivers, it just never gets to invoke its own partition sector offset calculations since all bios diff --git a/Documentation/block/writeback_cache_control.rst b/Documentation/block/writeback_cache_control.rst index 2c752c57c14c..b208488d0aae 100644 --- a/Documentation/block/writeback_cache_control.rst +++ b/Documentation/block/writeback_cache_control.rst @@ -47,7 +47,7 @@ the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags may both be set on a single bio. -Implementation details for make_request_fn based block drivers +Implementation details for bio based block drivers -------------------------------------------------------------- These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 87e8b1700acd..92d26c812441 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -59,7 +59,7 @@ struct nfhd_device { struct gendisk *disk; }; -static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio) +static blk_qc_t nfhd_submit_bio(struct bio *bio) { struct nfhd_device *dev = bio->bi_disk->private_data; struct bio_vec bvec; @@ -93,6 +93,7 @@ static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) static const struct block_device_operations nfhd_ops = { .owner = THIS_MODULE, + .submit_bio = nfhd_submit_bio, .getgeo = nfhd_getgeo, }; @@ -118,7 +119,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) dev->bsize = bsize; dev->bshift = ffs(bsize) - 10; - dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE); + dev->queue = blk_alloc_queue(NUMA_NO_NODE); if (dev->queue == NULL) goto free_dev; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 31b5020077a0..5107140dbb7e 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -101,7 +101,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector, spin_unlock(&dev->lock); } -static blk_qc_t simdisk_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t simdisk_submit_bio(struct bio *bio) { struct simdisk *dev = bio->bi_disk->private_data; struct bio_vec bvec; @@ -144,6 +144,7 @@ static void simdisk_release(struct gendisk *disk, fmode_t mode) static const struct block_device_operations simdisk_ops = { .owner = THIS_MODULE, + .submit_bio = simdisk_submit_bio, .open = simdisk_open, .release = simdisk_release, }; @@ -267,7 +268,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, spin_lock_init(&dev->lock); dev->users = 0; - dev->queue = blk_alloc_queue(simdisk_make_request, NUMA_NO_NODE); + dev->queue = blk_alloc_queue(NUMA_NO_NODE); if (dev->queue == NULL) { pr_err("blk_alloc_queue failed\n"); goto out_alloc_queue; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 748c4b2a9273..594f1d0b0e5a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1012,7 +1012,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) * blkcg_init_queue - initialize blkcg part of request queue * @q: request_queue to initialize * - * Called from __blk_alloc_queue(). Responsible for initializing blkcg + * Called from blk_alloc_queue(). Responsible for initializing blkcg * part of new request_queue @q. * * RETURNS: diff --git a/block/blk-core.c b/block/blk-core.c index 28f60985dc75..cb07a726dd71 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -283,7 +283,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); * A block device may call blk_sync_queue to ensure that any * such activity is cancelled, thus allowing it to release resources * that the callbacks might use. The caller must already have made sure - * that its ->make_request_fn will not re-add plugging prior to calling + * that its ->submit_bio will not re-add plugging prior to calling * this function. * * This function does not cancel any asynchronous activity arising @@ -510,7 +510,7 @@ static void blk_timeout_work(struct work_struct *work) { } -struct request_queue *__blk_alloc_queue(int node_id) +struct request_queue *blk_alloc_queue(int node_id) { struct request_queue *q; int ret; @@ -575,6 +575,7 @@ struct request_queue *__blk_alloc_queue(int node_id) blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); + q->nr_requests = BLKDEV_MAX_RQ; return q; @@ -592,21 +593,6 @@ fail_q: kmem_cache_free(blk_requestq_cachep, q); return NULL; } - -struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id) -{ - struct request_queue *q; - - if (WARN_ON_ONCE(!make_request)) - return NULL; - - q = __blk_alloc_queue(node_id); - if (!q) - return NULL; - q->make_request_fn = make_request; - q->nr_requests = BLKDEV_MAX_RQ; - return q; -} EXPORT_SYMBOL(blk_alloc_queue); /** @@ -1088,15 +1074,15 @@ end_io: static blk_qc_t do_make_request(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; + struct gendisk *disk = bio->bi_disk; blk_qc_t ret = BLK_QC_T_NONE; if (blk_crypto_bio_prep(&bio)) { - if (!q->make_request_fn) - return blk_mq_make_request(q, bio); - ret = q->make_request_fn(q, bio); + if (!disk->fops->submit_bio) + return blk_mq_submit_bio(bio); + ret = disk->fops->submit_bio(bio); } - blk_queue_exit(q); + blk_queue_exit(disk->queue); return ret; } @@ -1113,10 +1099,9 @@ blk_qc_t generic_make_request(struct bio *bio) { /* * bio_list_on_stack[0] contains bios submitted by the current - * make_request_fn. - * bio_list_on_stack[1] contains bios that were submitted before - * the current make_request_fn, but that haven't been processed - * yet. + * ->submit_bio. + * bio_list_on_stack[1] contains bios that were submitted before the + * current ->submit_bio_bio, but that haven't been processed yet. */ struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; @@ -1125,10 +1110,10 @@ blk_qc_t generic_make_request(struct bio *bio) goto out; /* - * We only want one ->make_request_fn to be active at a time, else + * We only want one ->submit_bio to be active at a time, else * stack usage with stacked devices could be a problem. So use * current->bio_list to keep a list of requests submited by a - * make_request_fn function. current->bio_list is also used as a + * ->submit_bio method. current->bio_list is also used as a * flag to say if generic_make_request is currently active in this * task or not. If it is NULL, then no make_request is active. If * it is non-NULL, then a make_request is active, and new requests @@ -1146,12 +1131,12 @@ blk_qc_t generic_make_request(struct bio *bio) * We pretend that we have just taken it off a longer list, so * we assign bio_list to a pointer to the bio_list_on_stack, * thus initialising the bio_list of new bios to be - * added. ->make_request() may indeed add some more bios + * added. ->submit_bio() may indeed add some more bios * through a recursive call to generic_make_request. If it * did, we find a non-NULL value in bio_list and re-enter the loop * from the top. In this case we really did just take the bio * of the top of the list (no pretending) and so remove it from - * bio_list, and call into ->make_request() again. + * bio_list, and call into ->submit_bio() again. */ BUG_ON(bio->bi_next); bio_list_init(&bio_list_on_stack[0]); @@ -1201,9 +1186,9 @@ EXPORT_SYMBOL(generic_make_request); */ blk_qc_t direct_make_request(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; + struct gendisk *disk = bio->bi_disk; - if (WARN_ON_ONCE(q->make_request_fn)) { + if (WARN_ON_ONCE(!disk->queue->mq_ops)) { bio_io_error(bio); return BLK_QC_T_NONE; } @@ -1212,10 +1197,10 @@ blk_qc_t direct_make_request(struct bio *bio) if (unlikely(bio_queue_enter(bio))) return BLK_QC_T_NONE; if (!blk_crypto_bio_prep(&bio)) { - blk_queue_exit(q); + blk_queue_exit(disk->queue); return BLK_QC_T_NONE; } - return blk_mq_make_request(q, bio); + return blk_mq_submit_bio(bio); } EXPORT_SYMBOL_GPL(direct_make_request); diff --git a/block/blk-mq.c b/block/blk-mq.c index dbadb7defd61..948987e9b6ab 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2136,8 +2136,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } /** - * blk_mq_make_request - Create and send a request to block device. - * @q: Request queue pointer. + * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. * * Builds up a request structure from @q and @bio and send to the device. The @@ -2151,8 +2150,9 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) * * Returns: Request queue cookie. */ -blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) +blk_qc_t blk_mq_submit_bio(struct bio *bio) { + struct request_queue *q = bio->bi_disk->queue; const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = op_is_flush(bio->bi_opf); struct blk_mq_alloc_data data = { @@ -2277,7 +2277,7 @@ queue_exit: blk_queue_exit(q); return BLK_QC_T_NONE; } -EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */ +EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) @@ -3017,7 +3017,7 @@ struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, { struct request_queue *uninit_q, *q; - uninit_q = __blk_alloc_queue(set->numa_node); + uninit_q = blk_alloc_queue(set->numa_node); if (!uninit_q) return ERR_PTR(-ENOMEM); uninit_q->queuedata = queuedata; diff --git a/block/blk.h b/block/blk.h index 0114fd92c8a0..9dcf51c94096 100644 --- a/block/blk.h +++ b/block/blk.h @@ -419,8 +419,6 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) #endif } -struct request_queue *__blk_alloc_queue(int node_id); - int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 2fb25c348d53..2723a70eb855 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -282,7 +282,7 @@ out: return err; } -static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t brd_submit_bio(struct bio *bio) { struct brd_device *brd = bio->bi_disk->private_data; struct bio_vec bvec; @@ -330,6 +330,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, + .submit_bio = brd_submit_bio, .rw_page = brd_rw_page, }; @@ -381,7 +382,7 @@ static struct brd_device *brd_alloc(int i) spin_lock_init(&brd->brd_lock); INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); - brd->brd_queue = blk_alloc_queue(brd_make_request, NUMA_NO_NODE); + brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE); if (!brd->brd_queue) goto out_free_dev; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 33d0831c99b6..0327408da79c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1451,7 +1451,7 @@ extern void conn_free_crypto(struct drbd_connection *connection); /* drbd_req */ extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long); -extern blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio); +extern blk_qc_t drbd_submit_bio(struct bio *bio); extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); extern int is_valid_ar_handle(struct drbd_request *, sector_t); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 26f4e0aa7393..2b05de0896e2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -132,9 +132,10 @@ wait_queue_head_t drbd_pp_wait; DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); static const struct block_device_operations drbd_ops = { - .owner = THIS_MODULE, - .open = drbd_open, - .release = drbd_release, + .owner = THIS_MODULE, + .submit_bio = drbd_submit_bio, + .open = drbd_open, + .release = drbd_release, }; struct bio *bio_alloc_drbd(gfp_t gfp_mask) @@ -2801,7 +2802,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue(drbd_make_request, NUMA_NO_NODE); + q = blk_alloc_queue(NUMA_NO_NODE); if (!q) goto out_no_q; device->rq_queue = q; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 936868047422..c7e14c9a6e5f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1593,7 +1593,7 @@ void do_submit(struct work_struct *ws) } } -blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio) +blk_qc_t drbd_submit_bio(struct bio *bio) { struct drbd_device *device = bio->bi_disk->private_data; unsigned long start_jif; diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 93ce0a00b2ae..907c6858aec0 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1388,7 +1388,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb) return &nullb->queues[index]; } -static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) +static blk_qc_t null_submit_bio(struct bio *bio) { sector_t sector = bio->bi_iter.bi_sector; sector_t nr_sectors = bio_sectors(bio); @@ -1575,7 +1575,13 @@ static void null_config_discard(struct nullb *nullb) blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); } -static const struct block_device_operations null_ops = { +static const struct block_device_operations null_bio_ops = { + .owner = THIS_MODULE, + .submit_bio = null_submit_bio, + .report_zones = null_report_zones, +}; + +static const struct block_device_operations null_rq_ops = { .owner = THIS_MODULE, .report_zones = null_report_zones, }; @@ -1647,7 +1653,10 @@ static int null_gendisk_register(struct nullb *nullb) disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; disk->major = null_major; disk->first_minor = nullb->index; - disk->fops = &null_ops; + if (queue_is_mq(nullb->q)) + disk->fops = &null_rq_ops; + else + disk->fops = &null_bio_ops; disk->private_data = nullb; disk->queue = nullb->q; strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); @@ -1792,7 +1801,7 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_tags; } } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue(null_queue_bio, dev->home_node); + nullb->q = blk_alloc_queue(dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 29b0c62dc86c..5588bd4cd267 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -36,7 +36,7 @@ * block device, assembling the pieces to full packets and queuing them to the * packet I/O scheduler. * - * At the top layer there is a custom make_request_fn function that forwards + * At the top layer there is a custom ->submit_bio function that forwards * read requests directly to the iosched queue and puts write requests in the * unaligned write queue. A kernel thread performs the necessary read * gathering to convert the unaligned writes to aligned writes and then feeds @@ -2428,7 +2428,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) } } -static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t pkt_submit_bio(struct bio *bio) { struct pktcdvd_device *pd; char b[BDEVNAME_SIZE]; @@ -2436,7 +2436,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(&bio); - pd = q->queuedata; + pd = bio->bi_disk->queue->queuedata; if (!pd) { pr_err("%s incorrect request queue\n", bio_devname(bio, b)); goto end_io; @@ -2480,7 +2480,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) split = bio; } - pkt_make_request_write(q, split); + pkt_make_request_write(bio->bi_disk->queue, split); } while (split != bio); return BLK_QC_T_NONE; @@ -2685,6 +2685,7 @@ static char *pkt_devnode(struct gendisk *disk, umode_t *mode) static const struct block_device_operations pktcdvd_ops = { .owner = THIS_MODULE, + .submit_bio = pkt_submit_bio, .open = pkt_open, .release = pkt_close, .ioctl = pkt_ioctl, @@ -2749,7 +2750,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) disk->flags = GENHD_FL_REMOVABLE; strcpy(disk->disk_name, pd->name); disk->private_data = pd; - disk->queue = blk_alloc_queue(pkt_make_request, NUMA_NO_NODE); + disk->queue = blk_alloc_queue(NUMA_NO_NODE); if (!disk->queue) goto out_mem2; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 76cc584aa763..1088798c8dd0 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -90,12 +90,6 @@ struct ps3vram_priv { static int ps3vram_major; - -static const struct block_device_operations ps3vram_fops = { - .owner = THIS_MODULE, -}; - - #define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */ #define DMA_NOTIFIER_OFFSET_BASE 0x1000 /* first DMA notifier offset */ #define DMA_NOTIFIER_SIZE 0x40 @@ -585,7 +579,7 @@ out: return next; } -static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t ps3vram_submit_bio(struct bio *bio) { struct ps3_system_bus_device *dev = bio->bi_disk->private_data; struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); @@ -610,6 +604,11 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static const struct block_device_operations ps3vram_fops = { + .owner = THIS_MODULE, + .submit_bio = ps3vram_submit_bio, +}; + static int ps3vram_probe(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv; @@ -737,7 +736,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) ps3vram_proc_init(dev); - queue = blk_alloc_queue(ps3vram_make_request, NUMA_NO_NODE); + queue = blk_alloc_queue(NUMA_NO_NODE); if (!queue) { dev_err(&dev->core, "blk_alloc_queue failed\n"); error = -ENOMEM; diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 1d52bc73dd0f..edacefff6e35 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -50,6 +50,8 @@ struct rsxx_bio_meta { static struct kmem_cache *bio_meta_pool; +static blk_qc_t rsxx_submit_bio(struct bio *bio); + /*----------------- Block Device Operations -----------------*/ static int rsxx_blkdev_ioctl(struct block_device *bdev, fmode_t mode, @@ -92,6 +94,7 @@ static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) static const struct block_device_operations rsxx_fops = { .owner = THIS_MODULE, + .submit_bio = rsxx_submit_bio, .getgeo = rsxx_getgeo, .ioctl = rsxx_blkdev_ioctl, }; @@ -117,7 +120,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, } } -static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t rsxx_submit_bio(struct bio *bio) { struct rsxx_cardinfo *card = bio->bi_disk->private_data; struct rsxx_bio_meta *bio_meta; @@ -233,7 +236,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) return -ENOMEM; } - card->queue = blk_alloc_queue(rsxx_make_request, NUMA_NO_NODE); + card->queue = blk_alloc_queue(NUMA_NO_NODE); if (!card->queue) { dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); unregister_blkdev(card->major, DRIVER_NAME); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 3b89c07f9e9d..2b95d7b33b91 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -519,7 +519,7 @@ static int mm_check_plugged(struct cardinfo *card) return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb)); } -static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t mm_submit_bio(struct bio *bio) { struct cardinfo *card = bio->bi_disk->private_data; @@ -779,6 +779,7 @@ static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo) static const struct block_device_operations mm_fops = { .owner = THIS_MODULE, + .submit_bio = mm_submit_bio, .getgeo = mm_getgeo, .revalidate_disk = mm_revalidate, }; @@ -886,7 +887,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->biotail = &card->bio; spin_lock_init(&card->lock); - card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE); + card->queue = blk_alloc_queue(NUMA_NO_NODE); if (!card->queue) goto failed_alloc; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0564e3f38408..f9a57f147ee1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -793,9 +793,9 @@ static void zram_sync_read(struct work_struct *work) } /* - * Block layer want one ->make_request_fn to be active at a time - * so if we use chained IO with parent IO in same context, - * it's a deadlock. To avoid, it, it uses worker thread context. + * Block layer want one ->submit_bio to be active at a time, so if we use + * chained IO with parent IO in same context, it's a deadlock. To avoid that, + * use a worker thread context. */ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, unsigned long entry, struct bio *bio) @@ -1584,7 +1584,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) /* * Handler function for all zram I/O requests. */ -static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) +static blk_qc_t zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_disk->private_data; @@ -1813,6 +1813,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode) static const struct block_device_operations zram_devops = { .open = zram_open, + .submit_bio = zram_submit_bio, .swap_slot_free_notify = zram_slot_free_notify, .rw_page = zram_rw_page, .owner = THIS_MODULE @@ -1891,7 +1892,7 @@ static int zram_add(void) #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); #endif - queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE); + queue = blk_alloc_queue(NUMA_NO_NODE); if (!queue) { pr_err("Error allocating disk queue for device %d\n", device_id); diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index db38a68abb6c..fe78bf0fdce5 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -236,10 +236,6 @@ err_dev: return tgt_dev; } -static const struct block_device_operations nvm_fops = { - .owner = THIS_MODULE, -}; - static struct nvm_tgt_type *__nvm_find_target_type(const char *name) { struct nvm_tgt_type *tt; @@ -380,7 +376,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue(tt->make_rq, dev->q->node); + tqueue = blk_alloc_queue(dev->q->node); if (!tqueue) { ret = -ENOMEM; goto err_disk; @@ -390,7 +386,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) tdisk->flags = GENHD_FL_EXT_DEVT; tdisk->major = 0; tdisk->first_minor = 0; - tdisk->fops = &nvm_fops; + tdisk->fops = tt->bops; tdisk->queue = tqueue; targetdata = tt->init(tgt_dev, tdisk, create->flags); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 7a4a1b7a941d..b6246f73895c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -47,9 +47,9 @@ static struct pblk_global_caches pblk_caches = { struct bio_set pblk_bio_set; -static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) +static blk_qc_t pblk_submit_bio(struct bio *bio) { - struct pblk *pblk = q->queuedata; + struct pblk *pblk = bio->bi_disk->queue->queuedata; if (bio_op(bio) == REQ_OP_DISCARD) { pblk_discard(pblk, bio); @@ -79,6 +79,12 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static const struct block_device_operations pblk_bops = { + .owner = THIS_MODULE, + .submit_bio = pblk_submit_bio, +}; + + static size_t pblk_trans_map_size(struct pblk *pblk) { int entry_size = 8; @@ -1280,7 +1286,7 @@ static struct nvm_tgt_type tt_pblk = { .name = "pblk", .version = {1, 0, 0}, - .make_rq = pblk_make_rq, + .bops = &pblk_bops, .capacity = pblk_capacity, .init = pblk_init, diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 7acf024e99f3..fc5702b10074 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1158,7 +1158,7 @@ static void quit_max_writeback_rate(struct cache_set *c, /* Cached devices - read & write stuff */ -blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) +blk_qc_t cached_dev_submit_bio(struct bio *bio) { struct search *s; struct bcache_device *d = bio->bi_disk->private_data; @@ -1291,7 +1291,7 @@ static void flash_dev_nodata(struct closure *cl) continue_at(cl, search_free, NULL); } -blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio) +blk_qc_t flash_dev_submit_bio(struct bio *bio) { struct search *s; struct closure *cl; diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index bb005c93dd72..82b38366a95d 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c); void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); -blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio); +blk_qc_t cached_dev_submit_bio(struct bio *bio); void bch_flash_dev_request_init(struct bcache_device *d); -blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio); +blk_qc_t flash_dev_submit_bio(struct bio *bio); extern struct kmem_cache *bch_search_cache; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 21aa168113d3..de13f6e91696 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -680,7 +680,16 @@ static int ioctl_dev(struct block_device *b, fmode_t mode, return d->ioctl(d, mode, cmd, arg); } -static const struct block_device_operations bcache_ops = { +static const struct block_device_operations bcache_cached_ops = { + .submit_bio = cached_dev_submit_bio, + .open = open_dev, + .release = release_dev, + .ioctl = ioctl_dev, + .owner = THIS_MODULE, +}; + +static const struct block_device_operations bcache_flash_ops = { + .submit_bio = flash_dev_submit_bio, .open = open_dev, .release = release_dev, .ioctl = ioctl_dev, @@ -820,8 +829,8 @@ static void bcache_device_free(struct bcache_device *d) } static int bcache_device_init(struct bcache_device *d, unsigned int block_size, - sector_t sectors, make_request_fn make_request_fn, - struct block_device *cached_bdev) + sector_t sectors, struct block_device *cached_bdev, + const struct block_device_operations *ops) { struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, @@ -868,10 +877,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->major = bcache_major; d->disk->first_minor = idx_to_first_minor(idx); - d->disk->fops = &bcache_ops; + d->disk->fops = ops; d->disk->private_data = d; - q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE); + q = blk_alloc_queue(NUMA_NO_NODE); if (!q) return -ENOMEM; @@ -1355,7 +1364,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) ret = bcache_device_init(&dc->disk, block_size, dc->bdev->bd_part->nr_sects - dc->sb.data_offset, - cached_dev_make_request, dc->bdev); + dc->bdev, &bcache_cached_ops); if (ret) return ret; @@ -1468,7 +1477,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) kobject_init(&d->kobj, &bch_flash_dev_ktype); if (bcache_device_init(d, block_bytes(c), u->sectors, - flash_dev_make_request, NULL)) + NULL, &bcache_flash_ops)) goto err; bcache_device_attach(d, c, u - c->uuids); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5aa7a604f4cb..5acfaba3700d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1770,7 +1770,7 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, } /* - * If in ->make_request_fn we need to use blk_queue_split(), otherwise + * If in ->queue_bio we need to use blk_queue_split(), otherwise * queue_limits for abnormal requests (e.g. discard, writesame, etc) * won't be imposed. */ @@ -1787,7 +1787,7 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, return __split_and_process_bio(md, map, bio); } -static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t dm_submit_bio(struct bio *bio) { struct mapped_device *md = bio->bi_disk->private_data; blk_qc_t ret = BLK_QC_T_NONE; @@ -1798,12 +1798,12 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) /* * We are called with a live reference on q_usage_counter, but * that one will be released as soon as we return. Grab an - * extra one as blk_mq_make_request expects to be able to - * consume a reference (which lives until the request is freed - * in case a request is allocated). + * extra one as blk_mq_submit_bio expects to be able to consume + * a reference (which lives until the request is freed in case a + * request is allocated). */ - percpu_ref_get(&q->q_usage_counter); - return blk_mq_make_request(q, bio); + percpu_ref_get(&bio->bi_disk->queue->q_usage_counter); + return blk_mq_submit_bio(bio); } map = dm_get_live_table(md, &srcu_idx); @@ -1988,11 +1988,11 @@ static struct mapped_device *alloc_dev(int minor) spin_lock_init(&md->uevent_lock); /* - * default to bio-based required ->make_request_fn until DM - * table is loaded and md->type established. If request-based - * table is loaded: blk-mq will override accordingly. + * default to bio-based until DM table is loaded and md->type + * established. If request-based table is loaded: blk-mq will + * override accordingly. */ - md->queue = blk_alloc_queue(dm_make_request, numa_node_id); + md->queue = blk_alloc_queue(numa_node_id); if (!md->queue) goto bad; @@ -3232,6 +3232,7 @@ static const struct pr_ops dm_pr_ops = { }; static const struct block_device_operations dm_blk_dops = { + .submit_bio = dm_submit_bio, .open = dm_blk_open, .release = dm_blk_close, .ioctl = dm_blk_ioctl, diff --git a/drivers/md/md.c b/drivers/md/md.c index ff20868e5e1b..7b7cb49be351 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -463,7 +463,7 @@ check_suspended: } EXPORT_SYMBOL(md_handle_request); -static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); const int sgrp = op_stat_group(bio_op(bio)); @@ -5641,7 +5641,7 @@ static int md_alloc(dev_t dev, char *name) mddev->hold_active = UNTIL_STOP; error = -ENOMEM; - mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE); + mddev->queue = blk_alloc_queue(NUMA_NO_NODE); if (!mddev->queue) goto abort; @@ -7823,6 +7823,7 @@ static int md_revalidate(struct gendisk *disk) static const struct block_device_operations md_fops = { .owner = THIS_MODULE, + .submit_bio = md_submit_bio, .open = md_open, .release = md_release, .ioctl = md_ioctl, diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 39030a324d7f..1f718381a045 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk, return err; } -static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t nd_blk_submit_bio(struct bio *bio) { struct bio_integrity_payload *bip; struct nd_namespace_blk *nsblk = bio->bi_disk->private_data; @@ -225,6 +225,7 @@ static int nsblk_rw_bytes(struct nd_namespace_common *ndns, static const struct block_device_operations nd_blk_fops = { .owner = THIS_MODULE, + .submit_bio = nd_blk_submit_bio, .revalidate_disk = nvdimm_revalidate_disk, }; @@ -250,7 +251,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk)); available_disk_size = internal_nlba * nsblk_sector_size(nsblk); - q = blk_alloc_queue(nd_blk_make_request, NUMA_NO_NODE); + q = blk_alloc_queue(NUMA_NO_NODE); if (!q) return -ENOMEM; if (devm_add_action_or_reset(dev, nd_blk_release_queue, q)) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 48e9d169b6f9..412d21d8f643 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1439,7 +1439,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, return ret; } -static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t btt_submit_bio(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct btt *btt = bio->bi_disk->private_data; @@ -1512,6 +1512,7 @@ static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) static const struct block_device_operations btt_fops = { .owner = THIS_MODULE, + .submit_bio = btt_submit_bio, .rw_page = btt_rw_page, .getgeo = btt_getgeo, .revalidate_disk = nvdimm_revalidate_disk, @@ -1523,7 +1524,7 @@ static int btt_blk_init(struct btt *btt) struct nd_namespace_common *ndns = nd_btt->ndns; /* create a new disk and request queue for btt */ - btt->btt_queue = blk_alloc_queue(btt_make_request, NUMA_NO_NODE); + btt->btt_queue = blk_alloc_queue(NUMA_NO_NODE); if (!btt->btt_queue) return -ENOMEM; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index d25e66fd942d..94790e6e0e4c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -189,7 +189,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem, return rc; } -static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t pmem_submit_bio(struct bio *bio) { int ret = 0; blk_status_t rc = 0; @@ -281,6 +281,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, static const struct block_device_operations pmem_fops = { .owner = THIS_MODULE, + .submit_bio = pmem_submit_bio, .rw_page = pmem_rw_page, .revalidate_disk = nvdimm_revalidate_disk, }; @@ -423,7 +424,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue(pmem_make_request, dev_to_node(dev)); + q = blk_alloc_queue(dev_to_node(dev)); if (!q) return -ENOMEM; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6810c8812aed..5192a024dc1b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2178,6 +2178,7 @@ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) const struct block_device_operations nvme_ns_head_ops = { .owner = THIS_MODULE, + .submit_bio = nvme_ns_head_submit_bio, .open = nvme_ns_head_open, .release = nvme_ns_head_release, .ioctl = nvme_ioctl, diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5a5205ea570a..89afcf943bf8 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -291,8 +291,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) return false; } -static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, - struct bio *bio) +blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) { struct nvme_ns_head *head = bio->bi_disk->private_data; struct device *dev = disk_to_dev(head->disk); @@ -374,7 +373,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) return 0; - q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); + q = blk_alloc_queue(ctrl->numa_node); if (!q) goto out; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 61780c38f51f..9f2b0e0b4558 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -586,6 +586,7 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); +blk_qc_t nvme_ns_head_submit_bio(struct bio *bio); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) { diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index dfe21eb72760..35666c3537de 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -31,8 +31,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); -static blk_qc_t dcssblk_make_request(struct request_queue *q, - struct bio *bio); +static blk_qc_t dcssblk_submit_bio(struct bio *bio); static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); @@ -41,6 +40,7 @@ static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; static int dcssblk_major; static const struct block_device_operations dcssblk_devops = { .owner = THIS_MODULE, + .submit_bio = dcssblk_submit_bio, .open = dcssblk_open, .release = dcssblk_release, }; @@ -651,8 +651,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char } dev_info->gd->major = dcssblk_major; dev_info->gd->fops = &dcssblk_devops; - dev_info->dcssblk_queue = - blk_alloc_queue(dcssblk_make_request, NUMA_NO_NODE); + dev_info->dcssblk_queue = blk_alloc_queue(NUMA_NO_NODE); dev_info->gd->queue = dev_info->dcssblk_queue; dev_info->gd->private_data = dev_info; blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096); @@ -868,7 +867,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) } static blk_qc_t -dcssblk_make_request(struct request_queue *q, struct bio *bio) +dcssblk_submit_bio(struct bio *bio) { struct dcssblk_dev_info *dev_info; struct bio_vec bvec; diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 5456f0ad5a40..c2536f7767b3 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -182,7 +182,7 @@ static unsigned long xpram_highest_page_index(void) /* * Block device make request function. */ -static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t xpram_submit_bio(struct bio *bio) { xpram_device_t *xdev = bio->bi_disk->private_data; struct bio_vec bvec; @@ -250,6 +250,7 @@ static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) static const struct block_device_operations xpram_devops = { .owner = THIS_MODULE, + .submit_bio = xpram_submit_bio, .getgeo = xpram_getgeo, }; @@ -343,8 +344,7 @@ static int __init xpram_setup_blkdev(void) xpram_disks[i] = alloc_disk(1); if (!xpram_disks[i]) goto out; - xpram_queues[i] = blk_alloc_queue(xpram_make_request, - NUMA_NO_NODE); + xpram_queues[i] = blk_alloc_queue(NUMA_NO_NODE); if (!xpram_queues[i]) { put_disk(xpram_disks[i]); goto out; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index faa340b70123..23230c1d031e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -596,6 +596,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } -blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio); +blk_qc_t blk_mq_submit_bio(struct bio *bio); #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d002defc1789..083ffc5bc51b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -286,8 +286,6 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); - struct bio_vec; enum blk_eh_timer_return { @@ -398,8 +396,6 @@ struct request_queue { struct blk_queue_stats *stats; struct rq_qos *rq_qos; - make_request_fn *make_request_fn; - const struct blk_mq_ops *mq_ops; /* sw queues */ @@ -1162,7 +1158,7 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, extern void blk_dump_rq_flags(struct request *, char *); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); +struct request_queue *blk_alloc_queue(int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); @@ -1778,6 +1774,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { } struct block_device_operations { + blk_qc_t (*submit_bio) (struct bio *bio); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ee8ec2e68055..1db223710b28 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -631,7 +631,6 @@ static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev, return last; } -typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int flags); @@ -650,7 +649,7 @@ struct nvm_tgt_type { int flags; /* target entry points */ - nvm_tgt_make_rq_fn *make_rq; + const struct block_device_operations *bops; nvm_tgt_capacity_fn *capacity; /* module-specific init/teardown */ -- cgit v1.2.3 From 4e2f62e566b5bddec00682f36a404e05a3b64b6a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 1 Jul 2020 22:58:32 -0600 Subject: Revert "blk-mq: put driver tag when this request is completed" This reverts commits the following commits: 37f4a24c2469a10a4c16c641671bd766e276cf9f 723bf178f158abd1ce6069cb049581b3cb003aab 36a3df5a4574d5ddf59804fcd0c4e9654c514d9a The last one is the culprit, but we have to go a bit deeper to get this to revert cleanly. There's been a report that this breaks some MMC setups [1], and also causes an issue with swap [2]. Until this can be figured out, revert the offending commits. [1] https://lore.kernel.org/linux-block/57fb09b1-54ba-f3aa-f82c-d709b0e6b281@samsung.com/ [2] https://lore.kernel.org/linux-block/20200702043721.GA1087@lca.pw/ Reported-by: Marek Szyprowski Reported-by: Qian Cai Signed-off-by: Jens Axboe --- block/blk-flush.c | 23 +++++++++++++++++------ block/blk-mq-tag.h | 12 ++++++++++++ block/blk-mq.c | 52 ++++++++++++++++------------------------------------ block/blk-mq.h | 20 ++++++++++++++++++++ block/blk.h | 5 +++++ 5 files changed, 70 insertions(+), 42 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-flush.c b/block/blk-flush.c index e756db088d84..15ae0155ec07 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -236,10 +236,13 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) error = fq->rq_status; hctx = flush_rq->mq_hctx; - if (!q->elevator) - flush_rq->tag = BLK_MQ_NO_TAG; - else - flush_rq->internal_tag = BLK_MQ_NO_TAG; + if (!q->elevator) { + blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); + flush_rq->tag = -1; + } else { + blk_mq_put_driver_tag(flush_rq); + flush_rq->internal_tag = -1; + } running = &fq->flush_queue[fq->flush_running_idx]; BUG_ON(fq->flush_pending_idx == fq->flush_running_idx); @@ -313,10 +316,13 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->mq_ctx = first_rq->mq_ctx; flush_rq->mq_hctx = first_rq->mq_hctx; - if (!q->elevator) + if (!q->elevator) { + fq->orig_rq = first_rq; flush_rq->tag = first_rq->tag; - else + blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq); + } else { flush_rq->internal_tag = first_rq->internal_tag; + } flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK); @@ -335,6 +341,11 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) unsigned long flags; struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx); + if (q->elevator) { + WARN_ON(rq->tag < 0); + blk_mq_put_driver_tag(rq); + } + /* * After populating an empty queue, kick it to avoid stall. Read * the comment in flush_end_io(). diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index b1acac518c4e..3945c7f5b944 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -101,6 +101,18 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return atomic_read(&hctx->nr_active) < depth; } +/* + * This helper should only be used for flush request to share tag + * with the request cloned from, and both the two requests can't be + * in flight at the same time. The caller has to make sure the tag + * can't be freed. + */ +static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx, + unsigned int tag, struct request *rq) +{ + hctx->tags->rqs[tag] = rq; +} + static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, unsigned int tag) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 948987e9b6ab..abcf590f6238 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -277,20 +277,26 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; + req_flags_t rq_flags = 0; if (data->q->elevator) { rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = tag; } else { + if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) { + rq_flags = RQF_MQ_INFLIGHT; + atomic_inc(&data->hctx->nr_active); + } rq->tag = tag; rq->internal_tag = BLK_MQ_NO_TAG; + data->hctx->tags->rqs[rq->tag] = rq; } /* csd/requeue_work/fifo_time is initialized before use */ rq->q = data->q; rq->mq_ctx = data->ctx; rq->mq_hctx = data->hctx; - rq->rq_flags = 0; + rq->rq_flags = rq_flags; rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; @@ -660,32 +666,10 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) return cpu_online(rq->mq_ctx->cpu); } -static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); - rq->tag = BLK_MQ_NO_TAG; - - if (rq->rq_flags & RQF_MQ_INFLIGHT) { - rq->rq_flags &= ~RQF_MQ_INFLIGHT; - atomic_dec(&hctx->nr_active); - } -} - -static inline void blk_mq_put_driver_tag(struct request *rq) -{ - if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) - return; - - __blk_mq_put_driver_tag(rq->mq_hctx, rq); -} - bool blk_mq_complete_request_remote(struct request *rq) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); - blk_mq_put_driver_tag(rq); - /* * For a polled request, always complete locallly, it's pointless * to redirect the completion. @@ -1121,10 +1105,9 @@ static bool __blk_mq_get_driver_tag(struct request *rq) { struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; + bool shared = blk_mq_tag_busy(rq->mq_hctx); int tag; - blk_mq_tag_busy(rq->mq_hctx); - if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { bt = &rq->mq_hctx->tags->breserved_tags; tag_offset = 0; @@ -1137,22 +1120,19 @@ static bool __blk_mq_get_driver_tag(struct request *rq) return false; rq->tag = tag + tag_offset; + if (shared) { + rq->rq_flags |= RQF_MQ_INFLIGHT; + atomic_inc(&rq->mq_hctx->nr_active); + } + rq->mq_hctx->tags->rqs[rq->tag] = rq; return true; } static bool blk_mq_get_driver_tag(struct request *rq) { - struct blk_mq_hw_ctx *hctx = rq->mq_hctx; - - if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) - return false; - - if (hctx->flags & BLK_MQ_F_TAG_SHARED) { - rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&hctx->nr_active); - } - hctx->tags->rqs[rq->tag] = rq; - return true; + if (rq->tag != BLK_MQ_NO_TAG) + return true; + return __blk_mq_get_driver_tag(rq); } static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, diff --git a/block/blk-mq.h b/block/blk-mq.h index 3661e821d460..863a2f3346d4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -193,6 +193,26 @@ static inline bool blk_mq_get_dispatch_budget(struct request_queue *q) return true; } +static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); + rq->tag = BLK_MQ_NO_TAG; + + if (rq->rq_flags & RQF_MQ_INFLIGHT) { + rq->rq_flags &= ~RQF_MQ_INFLIGHT; + atomic_dec(&hctx->nr_active); + } +} + +static inline void blk_mq_put_driver_tag(struct request *rq) +{ + if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) + return; + + __blk_mq_put_driver_tag(rq->mq_hctx, rq); +} + static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) { int cpu; diff --git a/block/blk.h b/block/blk.h index 9dcf51c94096..94f7c084f68f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,6 +25,11 @@ struct blk_flush_queue { struct list_head flush_data_in_flight; struct request *flush_rq; + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; struct lock_class_key key; spinlock_t mq_flush_lock; }; -- cgit v1.2.3 From 568f2700657794b8258e72983ba508793a658942 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 6 Jul 2020 22:41:11 +0800 Subject: blk-mq: centralise related handling into blk_mq_get_driver_tag Move .nr_active update and request assignment into blk_mq_get_driver_tag(), all are good to do during getting driver tag. Meantime blk-flush related code is simplified and flush request needn't to update the request table manually any more. Signed-off-by: Ming Lei Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 14 ++++---------- block/blk-mq-tag.h | 12 ------------ block/blk-mq.c | 31 +++++++++++++++---------------- block/blk.h | 5 ----- 4 files changed, 19 insertions(+), 43 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-flush.c b/block/blk-flush.c index 15ae0155ec07..71c7f520e040 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -219,7 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) struct request *rq, *n; unsigned long flags = 0; struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx); - struct blk_mq_hw_ctx *hctx; blk_account_io_flush(flush_rq); @@ -235,13 +234,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) if (fq->rq_status != BLK_STS_OK) error = fq->rq_status; - hctx = flush_rq->mq_hctx; if (!q->elevator) { - blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); - flush_rq->tag = -1; + flush_rq->tag = BLK_MQ_NO_TAG; } else { blk_mq_put_driver_tag(flush_rq); - flush_rq->internal_tag = -1; + flush_rq->internal_tag = BLK_MQ_NO_TAG; } running = &fq->flush_queue[fq->flush_running_idx]; @@ -316,13 +313,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->mq_ctx = first_rq->mq_ctx; flush_rq->mq_hctx = first_rq->mq_hctx; - if (!q->elevator) { - fq->orig_rq = first_rq; + if (!q->elevator) flush_rq->tag = first_rq->tag; - blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq); - } else { + else flush_rq->internal_tag = first_rq->internal_tag; - } flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK); diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 3945c7f5b944..b1acac518c4e 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -101,18 +101,6 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return atomic_read(&hctx->nr_active) < depth; } -/* - * This helper should only be used for flush request to share tag - * with the request cloned from, and both the two requests can't be - * in flight at the same time. The caller has to make sure the tag - * can't be freed. - */ -static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx, - unsigned int tag, struct request *rq) -{ - hctx->tags->rqs[tag] = rq; -} - static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, unsigned int tag) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 89c83fa97ba0..b6dd080d3962 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -277,26 +277,20 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; - req_flags_t rq_flags = 0; if (data->q->elevator) { rq->tag = BLK_MQ_NO_TAG; rq->internal_tag = tag; } else { - if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) { - rq_flags = RQF_MQ_INFLIGHT; - atomic_inc(&data->hctx->nr_active); - } rq->tag = tag; rq->internal_tag = BLK_MQ_NO_TAG; - data->hctx->tags->rqs[rq->tag] = rq; } /* csd/requeue_work/fifo_time is initialized before use */ rq->q = data->q; rq->mq_ctx = data->ctx; rq->mq_hctx = data->hctx; - rq->rq_flags = rq_flags; + rq->rq_flags = 0; rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; @@ -1105,9 +1099,10 @@ static bool __blk_mq_get_driver_tag(struct request *rq) { struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; - bool shared = blk_mq_tag_busy(rq->mq_hctx); int tag; + blk_mq_tag_busy(rq->mq_hctx); + if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { bt = &rq->mq_hctx->tags->breserved_tags; tag_offset = 0; @@ -1120,19 +1115,23 @@ static bool __blk_mq_get_driver_tag(struct request *rq) return false; rq->tag = tag + tag_offset; - if (shared) { - rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&rq->mq_hctx->nr_active); - } - rq->mq_hctx->tags->rqs[rq->tag] = rq; return true; } static bool blk_mq_get_driver_tag(struct request *rq) { - if (rq->tag != BLK_MQ_NO_TAG) - return true; - return __blk_mq_get_driver_tag(rq); + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) + return false; + + if ((hctx->flags & BLK_MQ_F_TAG_SHARED) && + !(rq->rq_flags & RQF_MQ_INFLIGHT)) { + rq->rq_flags |= RQF_MQ_INFLIGHT; + atomic_inc(&hctx->nr_active); + } + hctx->tags->rqs[rq->tag] = rq; + return true; } static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, diff --git a/block/blk.h b/block/blk.h index 94f7c084f68f..9dcf51c94096 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,11 +25,6 @@ struct blk_flush_queue { struct list_head flush_data_in_flight; struct request *flush_rq; - /* - * flush_rq shares tag with this rq, both can't be active - * at the same time - */ - struct request *orig_rq; struct lock_class_key key; spinlock_t mq_flush_lock; }; -- cgit v1.2.3 From 9b15d109a6b2c0c604c588d49a5a927dac6dd616 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 17 Jul 2020 10:42:30 +0800 Subject: block: improve discard bio alignment in __blkdev_issue_discard() This patch improves discard bio split for address and size alignment in __blkdev_issue_discard(). The aligned discard bio may help underlying device controller to perform better discard and internal garbage collection, and avoid unnecessary internal fragment. Current discard bio split algorithm in __blkdev_issue_discard() may have non-discarded fregment on device even the discard bio LBA and size are both aligned to device's discard granularity size. Here is the example steps on how to reproduce the above problem. - On a VMWare ESXi 6.5 update3 installation, create a 51GB virtual disk with thin mode and give it to a Linux virtual machine. - Inside the Linux virtual machine, if the 50GB virtual disk shows up as /dev/sdb, fill data into the first 50GB by, # dd if=/dev/zero of=/dev/sdb bs=4096 count=13107200 - Discard the 50GB range from offset 0 on /dev/sdb, # blkdiscard /dev/sdb -o 0 -l 53687091200 - Observe the underlying mapping status of the device # sg_get_lba_status /dev/sdb -m 1048 --lba=0 descriptor LBA: 0x0000000000000000 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000000000800 blocks: 16773120 deallocated descriptor LBA: 0x0000000000fff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000001000000 blocks: 8386560 deallocated descriptor LBA: 0x00000000017ff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000001800000 blocks: 8386560 deallocated descriptor LBA: 0x0000000001fff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000002000000 blocks: 8386560 deallocated descriptor LBA: 0x00000000027ff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000002800000 blocks: 8386560 deallocated descriptor LBA: 0x0000000002fff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000003000000 blocks: 8386560 deallocated descriptor LBA: 0x00000000037ff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000003800000 blocks: 8386560 deallocated descriptor LBA: 0x0000000003fff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000004000000 blocks: 8386560 deallocated descriptor LBA: 0x00000000047ff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000004800000 blocks: 8386560 deallocated descriptor LBA: 0x0000000004fff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000005000000 blocks: 8386560 deallocated descriptor LBA: 0x00000000057ff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000005800000 blocks: 8386560 deallocated descriptor LBA: 0x0000000005fff800 blocks: 2048 mapped (or unknown) descriptor LBA: 0x0000000006000000 blocks: 6291456 deallocated descriptor LBA: 0x0000000006600000 blocks: 0 deallocated Although the discard bio starts at LBA 0 and has 50<<30 bytes size which are perfect aligned to the discard granularity, from the above list these are many 1MB (2048 sectors) internal fragments exist unexpectedly. The problem is in __blkdev_issue_discard(), an improper algorithm causes an improper bio size which is not aligned. 25 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 26 sector_t nr_sects, gfp_t gfp_mask, int flags, 27 struct bio **biop) 28 { 29 struct request_queue *q = bdev_get_queue(bdev); [snipped] 56 57 while (nr_sects) { 58 sector_t req_sects = min_t(sector_t, nr_sects, 59 bio_allowed_max_sectors(q)); 60 61 WARN_ON_ONCE((req_sects << 9) > UINT_MAX); 62 63 bio = blk_next_bio(bio, 0, gfp_mask); 64 bio->bi_iter.bi_sector = sector; 65 bio_set_dev(bio, bdev); 66 bio_set_op_attrs(bio, op, 0); 67 68 bio->bi_iter.bi_size = req_sects << 9; 69 sector += req_sects; 70 nr_sects -= req_sects; [snipped] 79 } 80 81 *biop = bio; 82 return 0; 83 } 84 EXPORT_SYMBOL(__blkdev_issue_discard); At line 58-59, to discard a 50GB range, req_sects is set as return value of bio_allowed_max_sectors(q), which is 8388607 sectors. In the above case, the discard granularity is 2048 sectors, although the start LBA and discard length are aligned to discard granularity, req_sects never has chance to be aligned to discard granularity. This is why there are some still-mapped 2048 sectors fragment in every 4 or 8 GB range. If req_sects at line 58 is set to a value aligned to discard_granularity and close to UNIT_MAX, then all consequent split bios inside device driver are (almostly) aligned to discard_granularity of the device queue. The 2048 sectors still-mapped fragment will disappear. This patch introduces bio_aligned_discard_max_sectors() to return the the value which is aligned to q->limits.discard_granularity and closest to UINT_MAX. Then this patch replaces bio_allowed_max_sectors() with this new routine to decide a more proper split bio length. But we still need to handle the situation when discard start LBA is not aligned to q->limits.discard_granularity, otherwise even the length is aligned, current code may still leave 2048 fragment around every 4GB range. Therefore, to calculate req_sects, firstly the start LBA of discard range is checked (including partition offset), if it is not aligned to discard granularity, the first split location should make sure following bio has bi_sector aligned to discard granularity. Then there won't be still-mapped fragment in the middle of the discard range. The above is how this patch improves discard bio alignment in __blkdev_issue_discard(). Now with this patch, after discard with same command line mentiond previously, sg_get_lba_status returns, descriptor LBA: 0x0000000000000000 blocks: 106954752 deallocated descriptor LBA: 0x0000000006600000 blocks: 0 deallocated We an see there is no 2048 sectors segment anymore, everything is clean. Reported-and-tested-by: Acshai Manoj Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: Xiao Ni Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Enzo Matsumiya Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-lib.c | 31 ++++++++++++++++++++++++++++--- block/blk.h | 14 ++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) (limited to 'block/blk.h') diff --git a/block/blk-lib.c b/block/blk-lib.c index 5f2c429d4378..019e09bb9c0e 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; unsigned int op; - sector_t bs_mask; + sector_t bs_mask, part_offset = 0; if (!q) return -ENXIO; @@ -54,9 +54,34 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!nr_sects) return -EINVAL; + /* In case the discard request is in a partition */ + if (bdev->bd_partno) + part_offset = bdev->bd_part->start_sect; + while (nr_sects) { - sector_t req_sects = min_t(sector_t, nr_sects, - bio_allowed_max_sectors(q)); + sector_t granularity_aligned_lba, req_sects; + sector_t sector_mapped = sector + part_offset; + + granularity_aligned_lba = round_up(sector_mapped, + q->limits.discard_granularity >> SECTOR_SHIFT); + + /* + * Check whether the discard bio starts at a discard_granularity + * aligned LBA, + * - If no: set (granularity_aligned_lba - sector_mapped) to + * bi_size of the first split bio, then the second bio will + * start at a discard_granularity aligned LBA on the device. + * - If yes: use bio_aligned_discard_max_sectors() as the max + * possible bi_size of the first split bio. Then when this bio + * is split in device drive, the split ones are very probably + * to be aligned to discard_granularity of the device's queue. + */ + if (granularity_aligned_lba == sector_mapped) + req_sects = min_t(sector_t, nr_sects, + bio_aligned_discard_max_sectors(q)); + else + req_sects = min_t(sector_t, nr_sects, + granularity_aligned_lba - sector_mapped); WARN_ON_ONCE((req_sects << 9) > UINT_MAX); diff --git a/block/blk.h b/block/blk.h index 9dcf51c94096..49e2928a1632 100644 --- a/block/blk.h +++ b/block/blk.h @@ -264,6 +264,20 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q) return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9; } +/* + * The max bio size which is aligned to q->limits.discard_granularity. This + * is a hint to split large discard bio in generic block layer, then if device + * driver needs to split the discard bio into smaller ones, their bi_size can + * be very probably and easily aligned to discard_granularity of the device's + * queue. + */ +static inline unsigned int bio_aligned_discard_max_sectors( + struct request_queue *q) +{ + return round_down(UINT_MAX, q->limits.discard_granularity) >> + SECTOR_SHIFT; +} + /* * Internal io_context interface */ -- cgit v1.2.3