summaryrefslogtreecommitdiff
path: root/block/blk-iocost.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2020-09-01 21:52:47 +0300
committerJens Axboe <axboe@kernel.dk>2020-09-02 04:38:32 +0300
commit93f7d2db80e4aea2731619d7b907a029e0d14259 (patch)
tree9acf67d3477d6647a98939d66ff24a2cc3cc2d7b /block/blk-iocost.c
parent065655c862fedf4b04e1b28b83ca6f338d81cf0b (diff)
downloadlinux-93f7d2db80e4aea2731619d7b907a029e0d14259.tar.xz
blk-iocost: restructure surplus donation logic
The way the surplus donation logic is structured isn't great. There are two separate paths for starting/increasing donations and decreasing them making the logic harder to follow and is prone to unnecessary behavior differences. In preparation for improved donation handling, this patch restructures the code so that * All donors - new, increasing and decreasing - are funneled through the same code path. * The target donation calculation is factored into hweight_after_donation() which is called once from the same spot for all possible donors. * Actual inuse adjustment is factored into trasnfer_surpluses(). This change introduces a few behavior differences - e.g. donation amount reduction now uses the max usage of the recent three periods just like new and increasing donations, and inuse now gets adjusted upwards the same way it gets downwards. These differences are unlikely to have severely negative implications and the whole logic will be revamped soon. This patch also removes two tracepoints. The existing TPs don't quite fit the new implementation. A later patch will update and reinstate them. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-iocost.c')
-rw-r--r--block/blk-iocost.c179
1 files changed, 103 insertions, 76 deletions
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a3889a8b0a33..61b008d0801f 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -494,6 +494,7 @@ struct ioc_gq {
int hweight_gen;
u32 hweight_active;
u32 hweight_inuse;
+ u32 hweight_after_donation;
struct list_head walk_list;
struct list_head surplus_list;
@@ -1070,6 +1071,32 @@ out:
*hw_inusep = iocg->hweight_inuse;
}
+/*
+ * Calculate the hweight_inuse @iocg would get with max @inuse assuming all the
+ * other weights stay unchanged.
+ */
+static u32 current_hweight_max(struct ioc_gq *iocg)
+{
+ u32 hwm = WEIGHT_ONE;
+ u32 inuse = iocg->active;
+ u64 child_inuse_sum;
+ int lvl;
+
+ lockdep_assert_held(&iocg->ioc->lock);
+
+ for (lvl = iocg->level - 1; lvl >= 0; lvl--) {
+ struct ioc_gq *parent = iocg->ancestors[lvl];
+ struct ioc_gq *child = iocg->ancestors[lvl + 1];
+
+ child_inuse_sum = parent->child_inuse_sum + inuse - child->inuse;
+ hwm = div64_u64((u64)hwm * inuse, child_inuse_sum);
+ inuse = DIV64_U64_ROUND_UP(parent->active * child_inuse_sum,
+ parent->child_active_sum);
+ }
+
+ return max_t(u32, hwm, 1);
+}
+
static void weight_updated(struct ioc_gq *iocg)
{
struct ioc *ioc = iocg->ioc;
@@ -1488,20 +1515,58 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
}
}
-/* returns usage with margin added if surplus is large enough */
-static u32 surplus_adjusted_hweight_inuse(u32 usage, u32 hw_inuse)
+/*
+ * Determine what @iocg's hweight_inuse should be after donating unused
+ * capacity. @hwm is the upper bound and used to signal no donation. This
+ * function also throws away @iocg's excess budget.
+ */
+static u32 hweight_after_donation(struct ioc_gq *iocg, u32 hwm, u32 usage,
+ struct ioc_now *now)
{
+ struct ioc *ioc = iocg->ioc;
+ u64 vtime = atomic64_read(&iocg->vtime);
+ s64 excess;
+
+ /* see whether minimum margin requirement is met */
+ if (waitqueue_active(&iocg->waitq) ||
+ time_after64(vtime, now->vnow - ioc->margins.min))
+ return hwm;
+
+ /* throw away excess above max */
+ excess = now->vnow - vtime - ioc->margins.max;
+ if (excess > 0) {
+ atomic64_add(excess, &iocg->vtime);
+ atomic64_add(excess, &iocg->done_vtime);
+ vtime += excess;
+ }
+
/* add margin */
usage = DIV_ROUND_UP(usage * SURPLUS_SCALE_PCT, 100);
usage += SURPLUS_SCALE_ABS;
/* don't bother if the surplus is too small */
- if (usage + SURPLUS_MIN_ADJ_DELTA > hw_inuse)
- return 0;
+ if (usage + SURPLUS_MIN_ADJ_DELTA > hwm)
+ return hwm;
return usage;
}
+static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
+{
+ struct ioc_gq *iocg;
+
+ list_for_each_entry(iocg, surpluses, surplus_list) {
+ u32 old_hwi, new_hwi, new_inuse;
+
+ current_hweight(iocg, NULL, &old_hwi);
+ new_hwi = iocg->hweight_after_donation;
+
+ new_inuse = DIV64_U64_ROUND_UP((u64)iocg->inuse * new_hwi,
+ old_hwi);
+ __propagate_weights(iocg, iocg->weight, new_inuse);
+ }
+}
+
static void ioc_timer_fn(struct timer_list *timer)
{
struct ioc *ioc = container_of(timer, struct ioc, timer);
@@ -1560,9 +1625,9 @@ static void ioc_timer_fn(struct timer_list *timer)
/* calc usages and see whether some weights need to be moved around */
list_for_each_entry(iocg, &ioc->active_iocgs, active_list) {
- u64 vdone, vtime, usage_us, vmin;
+ u64 vdone, vtime, usage_us;
u32 hw_active, hw_inuse, usage;
- int uidx;
+ int uidx, nr_valid;
/*
* Collect unused and wind vtime closer to vnow to prevent
@@ -1618,92 +1683,54 @@ static void ioc_timer_fn(struct timer_list *timer)
started_at = ioc->period_at;
dur = max_t(u64, now.now - started_at, 1);
- usage = clamp_t(u32,
+
+ iocg->usage_idx = uidx;
+ iocg->usages[uidx] = clamp_t(u32,
DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, dur),
1, WEIGHT_ONE);
+ }
- iocg->usage_idx = uidx;
- iocg->usages[uidx] = usage;
- } else {
- usage = 0;
+ /* base the decision on max historical usage */
+ for (i = 0, usage = 0, nr_valid = 0; i < NR_USAGE_SLOTS; i++) {
+ if (iocg->usages[i]) {
+ usage = max(usage, iocg->usages[i]);
+ nr_valid++;
+ }
}
+ if (nr_valid < MIN_VALID_USAGES)
+ usage = WEIGHT_ONE;
/* see whether there's surplus vtime */
- vmin = now.vnow - ioc->margins.max;
-
WARN_ON_ONCE(!list_empty(&iocg->surplus_list));
- if (!waitqueue_active(&iocg->waitq) &&
- time_before64(vtime, vmin)) {
- u64 delta = vmin - vtime;
-
- /* throw away surplus vtime */
- atomic64_add(delta, &iocg->vtime);
- atomic64_add(delta, &iocg->done_vtime);
- /* if usage is sufficiently low, maybe it can donate */
- if (surplus_adjusted_hweight_inuse(usage, hw_inuse))
- list_add(&iocg->surplus_list, &surpluses);
- } else if (hw_inuse < hw_active) {
- u32 new_hwi, new_inuse;
+ if (hw_inuse < hw_active ||
+ (!waitqueue_active(&iocg->waitq) &&
+ time_before64(vtime, now.vnow - ioc->margins.max))) {
+ u32 hwm, new_hwi;
- /* was donating but might need to take back some */
- if (waitqueue_active(&iocg->waitq)) {
- new_hwi = hw_active;
+ /*
+ * Already donating or accumulated enough to start.
+ * Determine the donation amount.
+ */
+ hwm = current_hweight_max(iocg);
+ new_hwi = hweight_after_donation(iocg, hwm, usage,
+ &now);
+ if (new_hwi < hwm) {
+ iocg->hweight_after_donation = new_hwi;
+ list_add(&iocg->surplus_list, &surpluses);
} else {
- new_hwi = max(hw_inuse,
- usage * SURPLUS_SCALE_PCT / 100 +
- SURPLUS_SCALE_ABS);
- }
-
- new_inuse = div64_u64((u64)iocg->inuse * new_hwi,
- hw_inuse);
- new_inuse = clamp_t(u32, new_inuse, 1, iocg->active);
-
- if (new_inuse > iocg->inuse) {
- TRACE_IOCG_PATH(inuse_takeback, iocg, &now,
- iocg->inuse, new_inuse,
- hw_inuse, new_hwi);
- __propagate_weights(iocg, iocg->weight,
- new_inuse);
+ __propagate_weights(iocg, iocg->active,
+ iocg->active);
+ nr_shortages++;
}
} else {
- /* genuninely out of vtime */
+ /* genuinely short on vtime */
nr_shortages++;
}
}
- if (!nr_shortages || list_empty(&surpluses))
- goto skip_surplus_transfers;
+ if (!list_empty(&surpluses) && nr_shortages)
+ transfer_surpluses(&surpluses, &now);
- /* there are both shortages and surpluses, transfer surpluses */
- list_for_each_entry(iocg, &surpluses, surplus_list) {
- u32 usage, hw_active, hw_inuse, new_hwi, new_inuse;
- int nr_valid = 0;
-
- /* base the decision on max historical usage */
- for (i = 0, usage = 0; i < NR_USAGE_SLOTS; i++) {
- if (iocg->usages[i]) {
- usage = max(usage, iocg->usages[i]);
- nr_valid++;
- }
- }
- if (nr_valid < MIN_VALID_USAGES)
- continue;
-
- current_hweight(iocg, &hw_active, &hw_inuse);
- new_hwi = surplus_adjusted_hweight_inuse(usage, hw_inuse);
- if (!new_hwi)
- continue;
-
- new_inuse = DIV64_U64_ROUND_UP((u64)iocg->inuse * new_hwi,
- hw_inuse);
- if (new_inuse < iocg->inuse) {
- TRACE_IOCG_PATH(inuse_giveaway, iocg, &now,
- iocg->inuse, new_inuse,
- hw_inuse, new_hwi);
- __propagate_weights(iocg, iocg->weight, new_inuse);
- }
- }
-skip_surplus_transfers:
commit_weights(ioc);
/* surplus list should be dissolved after use */