From cb8432d650fe3be58bb962bc8e602dc405510327 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2020 18:47:17 +0100 Subject: block: allocate struct hd_struct as part of struct bdev_inode Allocate hd_struct together with struct block_device to pre-load the lifetime rule changes in preparation of merging the two structures. Note that part0 was previously embedded into struct gendisk, but is a separate allocation now, and already points to the block_device instead of the hd_struct. The lifetime of struct gendisk is still controlled by the struct device embedded in the part0 hd_struct. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'block/blk-merge.c') diff --git a/block/blk-merge.c b/block/blk-merge.c index bcf5e4580603..cb351ab9b77d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -683,8 +683,6 @@ static void blk_account_io_merge_request(struct request *req) part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); part_stat_unlock(); - - hd_struct_put(req->part); } } -- cgit v1.2.3 From e8a676d61c07eccfcd9d6fddfe4dcb630651c29a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2020 17:21:36 +0100 Subject: block: simplify and extend the block_bio_merge tracepoint class The block_bio_merge tracepoint class can be reused for most bio-based tracepoints. For that it just needs to lose the superfluous q and rq parameters. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-merge.c | 4 +- block/blk-mq.c | 2 +- block/bounce.c | 2 +- include/trace/events/block.h | 158 +++++++++---------------------------------- kernel/trace/blktrace.c | 41 +++-------- 6 files changed, 48 insertions(+), 161 deletions(-) (limited to 'block/blk-merge.c') diff --git a/block/blk-core.c b/block/blk-core.c index cee568389b7e..cb24654983e1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -907,7 +907,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_queue(q, bio); + trace_block_bio_queue(bio); /* Now that enqueuing has been traced, we need to trace * completion as well. */ diff --git a/block/blk-merge.c b/block/blk-merge.c index cb351ab9b77d..1a46d5bbd399 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -922,7 +922,7 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req, if (!ll_back_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; - trace_block_bio_backmerge(req->q, req, bio); + trace_block_bio_backmerge(bio); rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) @@ -946,7 +946,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req, if (!ll_front_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; - trace_block_bio_frontmerge(req->q, req, bio); + trace_block_bio_frontmerge(bio); rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) diff --git a/block/blk-mq.c b/block/blk-mq.c index 37c682855a63..21e2b4b6b742 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2184,7 +2184,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) goto queue_exit; } - trace_block_getrq(q, bio, bio->bi_opf); + trace_block_getrq(bio); rq_qos_track(q, rq, bio); diff --git a/block/bounce.c b/block/bounce.c index 162a6eee8999..d3f51acd6e3b 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -340,7 +340,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, } } - trace_block_bio_bounce(q, *bio_orig); + trace_block_bio_bounce(*bio_orig); bio->bi_flags |= (1 << BIO_BOUNCED); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 76459cf750e1..506c29dc7c76 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -226,45 +226,6 @@ DEFINE_EVENT(block_rq, block_rq_merge, TP_ARGS(q, rq) ); -/** - * block_bio_bounce - used bounce buffer when processing block operation - * @q: queue holding the block operation - * @bio: block operation - * - * A bounce buffer was used to handle the block operation @bio in @q. - * This occurs when hardware limitations prevent a direct transfer of - * data between the @bio data memory area and the IO device. Use of a - * bounce buffer requires extra copying of data and decreases - * performance. - */ -TRACE_EVENT(block_bio_bounce, - - TP_PROTO(struct request_queue *q, struct bio *bio), - - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio_dev(bio); - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) -); - /** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation @@ -301,11 +262,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio_merge, +DECLARE_EVENT_CLASS(block_bio, - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), + TP_PROTO(struct bio *bio), - TP_ARGS(q, rq, bio), + TP_ARGS(bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -329,116 +290,63 @@ DECLARE_EVENT_CLASS(block_bio_merge, __entry->nr_sector, __entry->comm) ); +/** + * block_bio_bounce - used bounce buffer when processing block operation + * @bio: block operation + * + * A bounce buffer was used to handle the block operation @bio in @q. + * This occurs when hardware limitations prevent a direct transfer of + * data between the @bio data memory area and the IO device. Use of a + * bounce buffer requires extra copying of data and decreases + * performance. + */ +DEFINE_EVENT(block_bio, block_bio_bounce, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); + /** * block_bio_backmerge - merging block operation to the end of an existing operation - * @q: queue holding operation - * @rq: request bio is being merged into * @bio: new block operation to merge * - * Merging block request @bio to the end of an existing block request - * in queue @q. + * Merging block request @bio to the end of an existing block request. */ -DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - - TP_ARGS(q, rq, bio) +DEFINE_EVENT(block_bio, block_bio_backmerge, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation - * @q: queue holding operation - * @rq: request bio is being merged into * @bio: new block operation to merge * - * Merging block IO operation @bio to the beginning of an existing block - * operation in queue @q. + * Merging block IO operation @bio to the beginning of an existing block request. */ -DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - - TP_ARGS(q, rq, bio) +DEFINE_EVENT(block_bio, block_bio_frontmerge, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_bio_queue - putting new block IO operation in queue - * @q: queue holding operation * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ -TRACE_EVENT(block_bio_queue, - - TP_PROTO(struct request_queue *q, struct bio *bio), - - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio_dev(bio); - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) -); - -DECLARE_EVENT_CLASS(block_get_rq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio ? bio_dev(bio) : 0; - __entry->sector = bio ? bio->bi_iter.bi_sector : 0; - __entry->nr_sector = bio ? bio_sectors(bio) : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_opf : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_bio, block_bio_queue, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_getrq - get a free request entry in queue for block IO operations - * @q: queue for operations * @bio: pending block IO operation (can be %NULL) - * @rw: low bit indicates a read (%0) or a write (%1) * - * A request struct for queue @q has been allocated to handle the - * block IO operation @bio. + * A request struct has been allocated to handle the block IO operation @bio. */ -DEFINE_EVENT(block_get_rq, block_getrq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw) +DEFINE_EVENT(block_bio, block_getrq, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ced589df304b..7ab88e00c157 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -906,10 +906,9 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, rcu_read_unlock(); } -static void blk_add_trace_bio_bounce(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, @@ -919,44 +918,24 @@ static void blk_add_trace_bio_complete(void *ignore, blk_status_to_errno(bio->bi_status)); } -static void blk_add_trace_bio_backmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BACKMERGE, 0); } -static void blk_add_trace_bio_frontmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_FRONTMERGE, 0); } -static void blk_add_trace_bio_queue(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_queue(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_QUEUE, 0); } -static void blk_add_trace_getrq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) +static void blk_add_trace_getrq(void *ignore, struct bio *bio) { - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, - NULL, 0); - rcu_read_unlock(); - } + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_GETRQ, 0); } static void blk_add_trace_plug(void *ignore, struct request_queue *q) -- cgit v1.2.3 From eb6f7f7cd3af0f67ce57b21fab1bc64beb643581 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2020 17:21:37 +0100 Subject: block: remove the request_queue argument to the block_split tracepoint The request_queue can trivially be derived from the bio. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 +- drivers/md/dm.c | 2 +- include/trace/events/block.h | 14 ++++++-------- kernel/trace/blktrace.c | 5 ++--- 4 files changed, 10 insertions(+), 13 deletions(-) (limited to 'block/blk-merge.c') diff --git a/block/blk-merge.c b/block/blk-merge.c index 1a46d5bbd399..4071daa88a5e 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -338,7 +338,7 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) split->bi_opf |= REQ_NOMERGE; bio_chain(split, *bio); - trace_block_split(q, split, (*bio)->bi_iter.bi_sector); + trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ed7e836efbcd..9a5bd90779c7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1612,7 +1612,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, part_stat_unlock(); bio_chain(b, bio); - trace_block_split(md->queue, b, bio->bi_iter.bi_sector); + trace_block_split(b, bio->bi_iter.bi_sector); ret = submit_bio_noacct(bio); break; } diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 506c29dc7c76..b415e4cba843 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -411,21 +411,19 @@ DEFINE_EVENT(block_unplug, block_unplug, /** * block_split - split a single bio struct into two bio structs - * @q: queue containing the bio * @bio: block operation being split * @new_sector: The starting sector for the new bio * - * The bio request @bio in request queue @q needs to be split into two - * bio requests. The newly created @bio request starts at - * @new_sector. This split may be required due to hardware limitation - * such as operation crossing device boundaries in a RAID system. + * The bio request @bio needs to be split into two bio requests. The newly + * created @bio request starts at @new_sector. This split may be required due to + * hardware limitations such as operation crossing device boundaries in a RAID + * system. */ TRACE_EVENT(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, - unsigned int new_sector), + TP_PROTO(struct bio *bio, unsigned int new_sector), - TP_ARGS(q, bio, new_sector), + TP_ARGS(bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7ab88e00c157..3ca6d62114f4 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -970,10 +970,9 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, rcu_read_unlock(); } -static void blk_add_trace_split(void *ignore, - struct request_queue *q, struct bio *bio, - unsigned int pdu) +static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) { + struct request_queue *q = bio->bi_disk->queue; struct blk_trace *bt; rcu_read_lock(); -- cgit v1.2.3 From a54895fa057c67700270777f7661d8d3c7fda88a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Dec 2020 17:21:39 +0100 Subject: block: remove the request_queue to argument request based tracepoints The request_queue can trivially be derived from the request. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 +- block/blk-mq-sched.c | 2 +- block/blk-mq.c | 8 ++++---- drivers/md/dm-rq.c | 2 +- drivers/s390/scsi/zfcp_fsf.c | 3 +-- include/linux/blktrace_api.h | 5 ++--- include/trace/events/block.h | 30 ++++++++++++------------------ kernel/trace/blktrace.c | 44 +++++++++++++++++--------------------------- 8 files changed, 39 insertions(+), 57 deletions(-) (limited to 'block/blk-merge.c') diff --git a/block/blk-merge.c b/block/blk-merge.c index 4071daa88a5e..7497d86fff38 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -799,7 +799,7 @@ static struct request *attempt_merge(struct request_queue *q, */ blk_account_io_merge_request(next); - trace_block_rq_merge(q, next); + trace_block_rq_merge(next); /* * ownership of bio passed from next to req, return 'next' for diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d1eafe2c045c..deff4e826e23 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); void blk_mq_sched_request_inserted(struct request *rq) { - trace_block_rq_insert(rq->q, rq); + trace_block_rq_insert(rq); } EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); diff --git a/block/blk-mq.c b/block/blk-mq.c index 21e2b4b6b742..cf3916e2852f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -733,7 +733,7 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - trace_block_rq_issue(q, rq); + trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { rq->io_start_time_ns = ktime_get_ns(); @@ -760,7 +760,7 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_put_driver_tag(rq); - trace_block_rq_requeue(q, rq); + trace_block_rq_requeue(rq); rq_qos_requeue(q, rq); if (blk_mq_request_started(rq)) { @@ -1821,7 +1821,7 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, lockdep_assert_held(&ctx->lock); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); if (at_head) list_add(&rq->queuelist, &ctx->rq_lists[type]); @@ -1878,7 +1878,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, */ list_for_each_entry(rq, list, queuelist) { BUG_ON(rq->mq_ctx != ctx); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); } spin_lock(&ctx->lock); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 729a72ec30cc..13b4385f4d5a 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -397,7 +397,7 @@ static int map_request(struct dm_rq_target_io *tio) } /* The target has remapped the I/O so dispatch it */ - trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), + trace_block_rq_remap(clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); ret = dm_dispatch_clone_request(clone, rq); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 6cb963a06777..37d450f46952 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2359,8 +2359,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) } } - blk_add_driver_data(scsi->request->q, scsi->request, &blktrc, - sizeof(blktrc)); + blk_add_driver_data(scsi->request, &blktrc, sizeof(blktrc)); } /** diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b6ff5902edc..05556573b896 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -75,8 +75,7 @@ static inline bool blk_trace_note_message_enabled(struct request_queue *q) return ret; } -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); @@ -90,7 +89,7 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 8fb89574d867..0d782663a005 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -64,7 +64,6 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, /** * block_rq_requeue - place block IO request back on a queue - * @q: queue holding operation * @rq: block IO operation request * * The block operation request @rq is being placed back into queue @@ -73,9 +72,9 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, */ TRACE_EVENT(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -147,9 +146,9 @@ TRACE_EVENT(block_rq_complete, DECLARE_EVENT_CLASS(block_rq, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -181,7 +180,6 @@ DECLARE_EVENT_CLASS(block_rq, /** * block_rq_insert - insert block operation request into queue - * @q: target queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted @@ -191,14 +189,13 @@ DECLARE_EVENT_CLASS(block_rq, */ DEFINE_EVENT(block_rq, block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is sent to a @@ -206,14 +203,13 @@ DEFINE_EVENT(block_rq, block_rq_insert, */ DEFINE_EVENT(block_rq, block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is merged to another @@ -221,9 +217,9 @@ DEFINE_EVENT(block_rq, block_rq_issue, */ DEFINE_EVENT(block_rq, block_rq_merge, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** @@ -491,7 +487,6 @@ TRACE_EVENT(block_bio_remap, /** * block_rq_remap - map request for a block operation request - * @q: queue holding the operation * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation @@ -502,10 +497,9 @@ TRACE_EVENT(block_bio_remap, */ TRACE_EVENT(block_rq_remap, - TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, - sector_t from), + TP_PROTO(struct request *rq, dev_t dev, sector_t from), - TP_ARGS(q, rq, dev, from), + TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 405637144a03..7839a78205c2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -795,12 +795,12 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) #endif static u64 -blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) +blk_trace_request_get_cgid(struct request *rq) { if (!rq->bio) return 0; /* Use the first bio */ - return blk_trace_bio_get_cgid(q, rq->bio); + return blk_trace_bio_get_cgid(rq->q, rq->bio); } /* @@ -841,40 +841,35 @@ static void blk_add_trace_rq(struct request *rq, int error, rcu_read_unlock(); } -static void blk_add_trace_rq_insert(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_insert(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_issue(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_issue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_merge(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_merge(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_requeue(void *ignore, - struct request_queue *q, - struct request *rq) +static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, int error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, - blk_trace_request_get_cgid(rq->q, rq)); + blk_trace_request_get_cgid(rq)); } /** @@ -1037,16 +1032,14 @@ static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, * Add a trace for that action. * **/ -static void blk_add_trace_rq_remap(void *ignore, - struct request_queue *q, - struct request *rq, dev_t dev, +static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1058,13 +1051,12 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, - sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } /** * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for * @rq: io request * @data: driver-specific data * @len: length of driver-specific data @@ -1073,14 +1065,12 @@ static void blk_add_trace_rq_remap(void *ignore, * Some drivers might want to write driver-specific data per request. * **/ -void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) +void blk_add_driver_data(struct request *rq, void *data, size_t len) { struct blk_trace *bt; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1088,7 +1078,7 @@ void blk_add_driver_data(struct request_queue *q, __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); -- cgit v1.2.3 From cc29e1bf0d63f728a5bd60ef22638bbf77369552 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Thu, 26 Nov 2020 17:18:52 +0800 Subject: block: disable iopoll for split bio iopoll is initially for small size, latency sensitive IO. It doesn't work well for big IO, especially when it needs to be split to multiple bios. In this case, the returned cookie of __submit_bio_noacct_mq() is indeed the cookie of the last split bio. The completion of *this* last split bio done by iopoll doesn't mean the whole original bio has completed. Callers of iopoll still need to wait for completion of other split bios. Besides bio splitting may cause more trouble for iopoll which isn't supposed to be used in case of big IO. iopoll for split bio may cause potential race if CPU migration happens during bio submission. Since the returned cookie is that of the last split bio, polling on the corresponding hardware queue doesn't help complete other split bios, if these split bios are enqueued into different hardware queues. Since interrupts are disabled for polling queues, the completion of these other split bios depends on timeout mechanism, thus causing a potential hang. iopoll for split bio may also cause hang for sync polling. Currently both the blkdev and iomap-based fs (ext4/xfs, etc) support sync polling in direct IO routine. These routines will submit bio without REQ_NOWAIT flag set, and then start sync polling in current process context. The process may hang in blk_mq_get_tag() if the submitted bio has to be split into multiple bios and can rapidly exhaust the queue depth. The process are waiting for the completion of the previously allocated requests, which should be reaped by the following polling, and thus causing a deadlock. To avoid these subtle trouble described above, just disable iopoll for split bio and return BLK_QC_T_NONE in this case. The side effect is that non-HIPRI IO also returns BLK_QC_T_NONE now. It should be acceptable since the returned cookie is never used for non-HIPRI IO. Suggested-by: Ming Lei Signed-off-by: Jeffle Xu Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-merge.c | 8 ++++++++ block/blk-mq.c | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'block/blk-merge.c') diff --git a/block/blk-merge.c b/block/blk-merge.c index 7497d86fff38..c3399bf29e9c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -279,6 +279,14 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, return NULL; split: *segs = nsegs; + + /* + * Bio splitting may cause subtle trouble such as hang when doing sync + * iopoll in direct IO routine. Given performance gain of iopoll for + * big IO can be trival, disable iopoll when split needed. + */ + bio->bi_opf &= ~REQ_HIPRI; + return bio_split(bio, sectors, GFP_NOIO, bs); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 2881a457de83..95ecc4c69969 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2159,6 +2159,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) unsigned int nr_segs; blk_qc_t cookie; blk_status_t ret; + bool hipri; blk_queue_bounce(q, &bio); __blk_queue_split(&bio, &nr_segs); @@ -2175,6 +2176,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) rq_qos_throttle(q, bio); + hipri = bio->bi_opf & REQ_HIPRI; + data.cmd_flags = bio->bi_opf; rq = __blk_mq_alloc_request(&data); if (unlikely(!rq)) { @@ -2267,6 +2270,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) blk_mq_sched_insert_request(rq, false, true, true); } + if (!hipri) + return BLK_QC_T_NONE; return cookie; queue_exit: blk_queue_exit(q); -- cgit v1.2.3