diff options
author | Yu Kuai <yukuai3@huawei.com> | 2024-05-09 15:37:16 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2024-05-30 10:49:03 +0300 |
commit | ac18de98a60f1d162248cb4e362ad18a264594a0 (patch) | |
tree | 037035db8a07906a76274df5075a2ef850c0b113 | |
parent | df799a71c6cc322197673d93ce472c61168a6e50 (diff) | |
download | linux-ac18de98a60f1d162248cb4e362ad18a264594a0.tar.xz |
block: support to account io_ticks precisely
[ Upstream commit 99dc422335d8b2bd4d105797241d3e715bae90e9 ]
Currently, io_ticks is accounted based on sampling, specifically
update_io_ticks() will always account io_ticks by 1 jiffies from
bdev_start_io_acct()/blk_account_io_start(), and the result can be
inaccurate, for example(HZ is 250):
Test script:
fio -filename=/dev/sda -bs=4k -rw=write -direct=1 -name=test -thinktime=4ms
Test result: util is about 90%, while the disk is really idle.
This behaviour is introduced by commit 5b18b5a73760 ("block: delete
part_round_stats and switch to less precise counting"), however, there
was a key point that is missed that this patch also improve performance
a lot:
Before the commit:
part_round_stats:
if (part->stamp != now)
stats |= 1;
part_in_flight()
-> there can be lots of task here in 1 jiffies.
part_round_stats_single()
__part_stat_add()
part->stamp = now;
After the commit:
update_io_ticks:
stamp = part->bd_stamp;
if (time_after(now, stamp))
if (try_cmpxchg())
__part_stat_add()
-> only one task can reach here in 1 jiffies.
Hence in order to account io_ticks precisely, we only need to know if
there are IO inflight at most once in one jiffies. Noted that for
rq-based device, iterating tags should not be used here because
'tags->lock' is grabbed in blk_mq_find_and_get_req(), hence
part_stat_lock_inc/dec() and part_in_flight() is used to trace inflight.
The additional overhead is quite little:
- per cpu add/dec for each IO for rq-based device;
- per cpu sum for each jiffies;
And it's verified by null-blk that there are no performance degration
under heavy IO pressure.
Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20240509123717.3223892-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r-- | block/blk-core.c | 9 | ||||
-rw-r--r-- | block/blk-merge.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 4 | ||||
-rw-r--r-- | block/blk.h | 1 | ||||
-rw-r--r-- | block/genhd.c | 2 |
5 files changed, 13 insertions, 5 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 99d684085719..923b7d91e6dc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -976,10 +976,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end) unsigned long stamp; again: stamp = READ_ONCE(part->bd_stamp); - if (unlikely(time_after(now, stamp))) { - if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now))) - __part_stat_add(part, io_ticks, end ? now - stamp : 1); - } + if (unlikely(time_after(now, stamp)) && + likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) && + (end || part_in_flight(part))) + __part_stat_add(part, io_ticks, now - stamp); + if (part->bd_partno) { part = bdev_whole(part); goto again; diff --git a/block/blk-merge.c b/block/blk-merge.c index 2d470cf2173e..925c5eaac581 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -779,6 +779,8 @@ static void blk_account_io_merge_request(struct request *req) if (blk_do_io_stat(req)) { part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); + part_stat_local_dec(req->part, + in_flight[op_is_write(req_op(req))]); part_stat_unlock(); } } diff --git a/block/blk-mq.c b/block/blk-mq.c index 25d2f3239eb6..f1d071810893 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -998,6 +998,8 @@ static inline void blk_account_io_done(struct request *req, u64 now) update_io_ticks(req->part, jiffies, true); part_stat_inc(req->part, ios[sgrp]); part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); + part_stat_local_dec(req->part, + in_flight[op_is_write(req_op(req))]); part_stat_unlock(); } } @@ -1020,6 +1022,8 @@ static inline void blk_account_io_start(struct request *req) part_stat_lock(); update_io_ticks(req->part, jiffies, false); + part_stat_local_inc(req->part, + in_flight[op_is_write(req_op(req))]); part_stat_unlock(); } } diff --git a/block/blk.h b/block/blk.h index 1ef920f72e0f..1154e87a4022 100644 --- a/block/blk.h +++ b/block/blk.h @@ -344,6 +344,7 @@ static inline bool blk_do_io_stat(struct request *rq) } void update_io_ticks(struct block_device *part, unsigned long now, bool end); +unsigned int part_in_flight(struct block_device *part); static inline void req_set_nomerge(struct request_queue *q, struct request *req) { diff --git a/block/genhd.c b/block/genhd.c index d0471f469f7d..2e4c2521584a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -118,7 +118,7 @@ static void part_stat_read_all(struct block_device *part, } } -static unsigned int part_in_flight(struct block_device *part) +unsigned int part_in_flight(struct block_device *part) { unsigned int inflight = 0; int cpu; |