summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2020-04-13 19:27:55 +0300
committerJens Axboe <axboe@kernel.dk>2020-05-01 00:54:45 +0300
commit54c52e10dc9b939084a7e6e3d32ce8fd8dee7898 (patch)
treeea26acd9b9827ca2287e8027baca7a5ff3bce47a /block
parentaccea322f5439df22b19465bbe67b836f36165e8 (diff)
downloadlinux-54c52e10dc9b939084a7e6e3d32ce8fd8dee7898.tar.xz
blk-iocost: switch to fixed non-auto-decaying use_delay
The use_delay mechanism was introduced by blk-iolatency to hold memory allocators accountable for the reclaim and other shared IOs they cause. The duration of the delay is dynamically balanced between iolatency increasing the value on each target miss and it auto-decaying as time passes and threads get delayed on it. While this works well for iolatency, iocost's control model isn't compatible with it. There is no repeated "violation" events which can be balanced against auto-decaying. iocost instead knows how much a given cgroup is over budget and wants to prevent that cgroup from issuing IOs while over budget. Until now, iocost has been adding the cost of force-issued IOs. However, this doesn't reflect the amount which is already over budget and is simply not enough to counter the auto-decaying allowing anon-memory leaking low priority cgroup to go over its alloted share of IOs. As auto-decaying doesn't make much sense for iocost, this patch introduces a different mode of operation for use_delay - when blkcg_set_delay() are used insted of blkcg_add/use_delay(), the delay duration is not auto-decayed until it is explicitly cleared with blkcg_clear_delay(). iocost is updated to keep the delay duration synchronized to the budget overage amount. With this change, iocost can effectively police cgroups which generate significant amount of force-issued IOs. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c6
-rw-r--r--block/blk-iocost.c23
2 files changed, 15 insertions, 14 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c5dc833212e1..0a63c6cbbcb1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1530,6 +1530,10 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
{
u64 old = atomic64_read(&blkg->delay_start);
+ /* negative use_delay means no scaling, see blkcg_set_delay() */
+ if (atomic_read(&blkg->use_delay) < 0)
+ return;
+
/*
* We only want to scale down every second. The idea here is that we
* want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
@@ -1717,6 +1721,8 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
*/
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
{
+ if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
+ return;
blkcg_scale_delay(blkg, now);
atomic64_add(delta, &blkg->delay_nsec);
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index db35ee682294..a8e99ef76a08 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1209,14 +1209,14 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
+static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
u64 vtime = atomic64_read(&iocg->vtime);
u64 vmargin = ioc->margin_us * now->vrate;
u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
- u64 expires, oexpires;
+ u64 delta_ns, expires, oexpires;
u32 hw_inuse;
/* debt-adjust vtime */
@@ -1233,15 +1233,10 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
return false;
/* use delay */
- if (cost) {
- u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC,
- now->vrate);
- blkcg_add_delay(blkg, now->now_ns, cost_ns);
- }
- blkcg_use_delay(blkg);
-
- expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow,
- now->vrate) * NSEC_PER_USEC;
+ delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
+ now->vrate) * NSEC_PER_USEC;
+ blkcg_set_delay(blkg, delta_ns);
+ expires = now->now_ns + delta_ns;
/* if already active and close enough, don't bother */
oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
@@ -1260,7 +1255,7 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
struct ioc_now now;
ioc_now(iocg->ioc, &now);
- iocg_kick_delay(iocg, &now, 0);
+ iocg_kick_delay(iocg, &now);
return HRTIMER_NORESTART;
}
@@ -1378,7 +1373,7 @@ static void ioc_timer_fn(struct timer_list *timer)
atomic64_read(&iocg->abs_vdebt)) {
/* might be oversleeping vtime / hweight changes, kick */
iocg_kick_waitq(iocg, &now);
- iocg_kick_delay(iocg, &now, 0);
+ iocg_kick_delay(iocg, &now);
} else if (iocg_is_idle(iocg)) {
/* no waiter and idle, deactivate */
iocg->last_inuse = iocg->inuse;
@@ -1737,7 +1732,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
*/
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
atomic64_add(abs_cost, &iocg->abs_vdebt);
- if (iocg_kick_delay(iocg, &now, cost))
+ if (iocg_kick_delay(iocg, &now))
blkcg_schedule_throttle(rqos->q,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
return;