summaryrefslogtreecommitdiff
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c280
1 files changed, 175 insertions, 105 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9f5fe62afff9..847721dc2b2b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -329,8 +329,8 @@ static struct bio *throtl_pop_queued(struct list_head *queued,
/* init a service_queue, assumes the caller zeroed it */
static void throtl_service_queue_init(struct throtl_service_queue *sq)
{
- INIT_LIST_HEAD(&sq->queued[0]);
- INIT_LIST_HEAD(&sq->queued[1]);
+ INIT_LIST_HEAD(&sq->queued[READ]);
+ INIT_LIST_HEAD(&sq->queued[WRITE]);
sq->pending_tree = RB_ROOT_CACHED;
timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
}
@@ -420,24 +420,17 @@ static void tg_update_has_rules(struct throtl_grp *tg)
struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
struct throtl_data *td = tg->td;
int rw;
- int has_iops_limit = 0;
for (rw = READ; rw <= WRITE; rw++) {
- unsigned int iops_limit = tg_iops_limit(tg, rw);
-
- tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
+ tg->has_rules_iops[rw] =
+ (parent_tg && parent_tg->has_rules_iops[rw]) ||
(td->limit_valid[td->limit_index] &&
- (tg_bps_limit(tg, rw) != U64_MAX ||
- iops_limit != UINT_MAX));
-
- if (iops_limit != UINT_MAX)
- has_iops_limit = 1;
+ tg_iops_limit(tg, rw) != UINT_MAX);
+ tg->has_rules_bps[rw] =
+ (parent_tg && parent_tg->has_rules_bps[rw]) ||
+ (td->limit_valid[td->limit_index] &&
+ (tg_bps_limit(tg, rw) != U64_MAX));
}
-
- if (has_iops_limit)
- tg->flags |= THROTL_TG_HAS_IOPS_LIMIT;
- else
- tg->flags &= ~THROTL_TG_HAS_IOPS_LIMIT;
}
static void throtl_pd_online(struct blkg_policy_data *pd)
@@ -520,7 +513,6 @@ static void throtl_rb_erase(struct rb_node *n,
{
rb_erase_cached(n, &parent_sq->pending_tree);
RB_CLEAR_NODE(n);
- --parent_sq->nr_pending;
}
static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
@@ -572,7 +564,11 @@ static void throtl_enqueue_tg(struct throtl_grp *tg)
static void throtl_dequeue_tg(struct throtl_grp *tg)
{
if (tg->flags & THROTL_TG_PENDING) {
- throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
+ struct throtl_service_queue *parent_sq =
+ tg->service_queue.parent_sq;
+
+ throtl_rb_erase(&tg->rb_node, parent_sq);
+ --parent_sq->nr_pending;
tg->flags &= ~THROTL_TG_PENDING;
}
}
@@ -639,6 +635,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
{
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
+ tg->carryover_bytes[rw] = 0;
+ tg->carryover_ios[rw] = 0;
/*
* Previous slice has expired. We must have trimmed it after last
@@ -656,12 +654,17 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
tg->slice_end[rw], jiffies);
}
-static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
+static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
+ bool clear_carryover)
{
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+ if (clear_carryover) {
+ tg->carryover_bytes[rw] = 0;
+ tg->carryover_ios[rw] = 0;
+ }
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
@@ -754,33 +757,20 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
tg->slice_start[rw], tg->slice_end[rw], jiffies);
}
-static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
- u32 iops_limit, unsigned long *wait)
+static unsigned int calculate_io_allowed(u32 iops_limit,
+ unsigned long jiffy_elapsed)
{
- bool rw = bio_data_dir(bio);
unsigned int io_allowed;
- unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
u64 tmp;
- if (iops_limit == UINT_MAX) {
- if (wait)
- *wait = 0;
- return true;
- }
-
- jiffy_elapsed = jiffies - tg->slice_start[rw];
-
- /* Round up to the next throttle slice, wait time must be nonzero */
- jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
-
/*
- * jiffy_elapsed_rnd should not be a big value as minimum iops can be
+ * jiffy_elapsed should not be a big value as minimum iops can be
* 1 then at max jiffy elapsed should be equivalent of 1 second as we
* will allow dispatch after 1 second and after that slice should
* have been trimmed.
*/
- tmp = (u64)iops_limit * jiffy_elapsed_rnd;
+ tmp = (u64)iops_limit * jiffy_elapsed;
do_div(tmp, HZ);
if (tmp > UINT_MAX)
@@ -788,6 +778,68 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
else
io_allowed = tmp;
+ return io_allowed;
+}
+
+static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
+{
+ return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
+}
+
+static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
+{
+ unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
+ u64 bps_limit = tg_bps_limit(tg, rw);
+ u32 iops_limit = tg_iops_limit(tg, rw);
+
+ /*
+ * If config is updated while bios are still throttled, calculate and
+ * accumulate how many bytes/ios are waited across changes. And
+ * carryover_bytes/ios will be used to calculate new wait time under new
+ * configuration.
+ */
+ if (bps_limit != U64_MAX)
+ tg->carryover_bytes[rw] +=
+ calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
+ tg->bytes_disp[rw];
+ if (iops_limit != UINT_MAX)
+ tg->carryover_ios[rw] +=
+ calculate_io_allowed(iops_limit, jiffy_elapsed) -
+ tg->io_disp[rw];
+}
+
+static void tg_update_carryover(struct throtl_grp *tg)
+{
+ if (tg->service_queue.nr_queued[READ])
+ __tg_update_carryover(tg, READ);
+ if (tg->service_queue.nr_queued[WRITE])
+ __tg_update_carryover(tg, WRITE);
+
+ /* see comments in struct throtl_grp for meaning of these fields. */
+ throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
+ tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
+ tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
+}
+
+static bool tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
+ u32 iops_limit, unsigned long *wait)
+{
+ bool rw = bio_data_dir(bio);
+ unsigned int io_allowed;
+ unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+
+ if (iops_limit == UINT_MAX) {
+ if (wait)
+ *wait = 0;
+ return true;
+ }
+
+ jiffy_elapsed = jiffies - tg->slice_start[rw];
+
+ /* Round up to the next throttle slice, wait time must be nonzero */
+ jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
+ io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
+ tg->carryover_ios[rw];
if (tg->io_disp[rw] + 1 <= io_allowed) {
if (wait)
*wait = 0;
@@ -802,16 +854,16 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
return false;
}
-static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
- u64 bps_limit, unsigned long *wait)
+static bool tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
+ u64 bps_limit, unsigned long *wait)
{
bool rw = bio_data_dir(bio);
- u64 bytes_allowed, extra_bytes, tmp;
+ u64 bytes_allowed, extra_bytes;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio);
/* no need to throttle if this bio's bytes have been accounted */
- if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
+ if (bps_limit == U64_MAX || bio_flagged(bio, BIO_BPS_THROTTLED)) {
if (wait)
*wait = 0;
return true;
@@ -824,11 +876,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = tg->td->throtl_slice;
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
-
- tmp = bps_limit * jiffy_elapsed_rnd;
- do_div(tmp, HZ);
- bytes_allowed = tmp;
-
+ bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
+ tg->carryover_bytes[rw];
if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
if (wait)
*wait = 0;
@@ -889,7 +938,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
* slice and it should be extended instead.
*/
if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
- throtl_start_new_slice(tg, rw);
+ throtl_start_new_slice(tg, rw, true);
else {
if (time_before(tg->slice_end[rw],
jiffies + tg->td->throtl_slice))
@@ -897,8 +946,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice);
}
- if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
- tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
+ if (tg_within_bps_limit(tg, bio, bps_limit, &bps_wait) &&
+ tg_within_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
*wait = 0;
return true;
@@ -921,22 +970,13 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
- if (!bio_flagged(bio, BIO_THROTTLED)) {
+ if (!bio_flagged(bio, BIO_BPS_THROTTLED)) {
tg->bytes_disp[rw] += bio_size;
tg->last_bytes_disp[rw] += bio_size;
}
tg->io_disp[rw]++;
tg->last_io_disp[rw]++;
-
- /*
- * BIO_THROTTLED is used to prevent the same bio to be throttled
- * more than once as a throttled bio will go through blk-throtl the
- * second time when it eventually gets issued. Set it when a bio
- * is being charged to a tg.
- */
- if (!bio_flagged(bio, BIO_THROTTLED))
- bio_set_flag(bio, BIO_THROTTLED);
}
/**
@@ -990,9 +1030,9 @@ static void tg_update_disptime(struct throtl_grp *tg)
disptime = jiffies + min_wait;
/* Update dispatch time */
- throtl_dequeue_tg(tg);
+ throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
tg->disptime = disptime;
- throtl_enqueue_tg(tg);
+ tg_service_queue_add(tg);
/* see throtl_add_bio_tg() */
tg->flags &= ~THROTL_TG_WAS_EMPTY;
@@ -1026,6 +1066,7 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
sq->nr_queued[rw]--;
throtl_charge_bio(tg, bio);
+ bio_set_flag(bio, BIO_BPS_THROTTLED);
/*
* If our parent is another tg, we just need to transfer @bio to
@@ -1101,13 +1142,13 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
if (time_before(jiffies, tg->disptime))
break;
- throtl_dequeue_tg(tg);
-
nr_disp += throtl_dispatch_tg(tg);
sq = &tg->service_queue;
- if (sq->nr_queued[0] || sq->nr_queued[1])
+ if (sq->nr_queued[READ] || sq->nr_queued[WRITE])
tg_update_disptime(tg);
+ else
+ throtl_dequeue_tg(tg);
if (nr_disp >= THROTL_QUANTUM)
break;
@@ -1321,8 +1362,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
* that a group's limit are dropped suddenly and we don't want to
* account recently dispatched IO with new low rate.
*/
- throtl_start_new_slice(tg, READ);
- throtl_start_new_slice(tg, WRITE);
+ throtl_start_new_slice(tg, READ, false);
+ throtl_start_new_slice(tg, WRITE, false);
if (tg->flags & THROTL_TG_PENDING) {
tg_update_disptime(tg);
@@ -1350,6 +1391,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
v = U64_MAX;
tg = blkg_to_tg(ctx.blkg);
+ tg_update_carryover(tg);
if (is_u64)
*(u64 *)((void *)tg + of_cft(of)->private) = v;
@@ -1536,6 +1578,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
return ret;
tg = blkg_to_tg(ctx.blkg);
+ tg_update_carryover(tg);
v[0] = tg->bps_conf[READ][index];
v[1] = tg->bps_conf[WRITE][index];
@@ -1673,6 +1716,41 @@ struct blkcg_policy blkcg_policy_throtl = {
.pd_free_fn = throtl_pd_free,
};
+void blk_throtl_cancel_bios(struct gendisk *disk)
+{
+ struct request_queue *q = disk->queue;
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+
+ spin_lock_irq(&q->queue_lock);
+ /*
+ * queue_lock is held, rcu lock is not needed here technically.
+ * However, rcu lock is still held to emphasize that following
+ * path need RCU protection and to prevent warning from lockdep.
+ */
+ rcu_read_lock();
+ blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+ struct throtl_grp *tg = blkg_to_tg(blkg);
+ struct throtl_service_queue *sq = &tg->service_queue;
+
+ /*
+ * Set the flag to make sure throtl_pending_timer_fn() won't
+ * stop until all throttled bios are dispatched.
+ */
+ blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
+ /*
+ * Update disptime after setting the above flag to make sure
+ * throtl_select_dispatch() won't exit without dispatching.
+ */
+ tg_update_disptime(tg);
+
+ throtl_schedule_pending_timer(sq, jiffies + 1);
+ }
+ rcu_read_unlock();
+ spin_unlock_irq(&q->queue_lock);
+}
+
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
{
unsigned long rtime = jiffies, wtime = jiffies;
@@ -1777,39 +1855,6 @@ static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
return false;
}
-void blk_throtl_cancel_bios(struct request_queue *q)
-{
- struct cgroup_subsys_state *pos_css;
- struct blkcg_gq *blkg;
-
- spin_lock_irq(&q->queue_lock);
- /*
- * queue_lock is held, rcu lock is not needed here technically.
- * However, rcu lock is still held to emphasize that following
- * path need RCU protection and to prevent warning from lockdep.
- */
- rcu_read_lock();
- blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
- struct throtl_grp *tg = blkg_to_tg(blkg);
- struct throtl_service_queue *sq = &tg->service_queue;
-
- /*
- * Set the flag to make sure throtl_pending_timer_fn() won't
- * stop until all throttled bios are dispatched.
- */
- blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
- /*
- * Update disptime after setting the above flag to make sure
- * throtl_select_dispatch() won't exit without dispatching.
- */
- tg_update_disptime(tg);
-
- throtl_schedule_pending_timer(sq, jiffies + 1);
- }
- rcu_read_unlock();
- spin_unlock_irq(&q->queue_lock);
-}
-
static bool throtl_can_upgrade(struct throtl_data *td,
struct throtl_grp *this_tg)
{
@@ -2005,7 +2050,6 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
tg->checked_last_finish_time = last_finish_time;
}
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
static void throtl_update_latency_buckets(struct throtl_data *td)
{
struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
@@ -2086,6 +2130,28 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
static inline void throtl_update_latency_buckets(struct throtl_data *td)
{
}
+
+static void blk_throtl_update_idletime(struct throtl_grp *tg)
+{
+}
+
+static void throtl_downgrade_check(struct throtl_grp *tg)
+{
+}
+
+static void throtl_upgrade_check(struct throtl_grp *tg)
+{
+}
+
+static bool throtl_can_upgrade(struct throtl_data *td,
+ struct throtl_grp *this_tg)
+{
+ return false;
+}
+
+static void throtl_upgrade_state(struct throtl_data *td)
+{
+}
#endif
bool __blk_throtl_bio(struct bio *bio)
@@ -2159,8 +2225,10 @@ again:
qn = &tg->qnode_on_parent[rw];
sq = sq->parent_sq;
tg = sq_to_tg(sq);
- if (!tg)
+ if (!tg) {
+ bio_set_flag(bio, BIO_BPS_THROTTLED);
goto out_unlock;
+ }
}
/* out-of-limit, queue to @tg */
@@ -2189,8 +2257,6 @@ again:
}
out_unlock:
- bio_set_flag(bio, BIO_THROTTLED);
-
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
@@ -2286,8 +2352,9 @@ void blk_throtl_bio_endio(struct bio *bio)
}
#endif
-int blk_throtl_init(struct request_queue *q)
+int blk_throtl_init(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
struct throtl_data *td;
int ret;
@@ -2329,8 +2396,10 @@ int blk_throtl_init(struct request_queue *q)
return ret;
}
-void blk_throtl_exit(struct request_queue *q)
+void blk_throtl_exit(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
+
BUG_ON(!q->td);
del_timer_sync(&q->td->service_queue.pending_timer);
throtl_shutdown_wq(q);
@@ -2340,8 +2409,9 @@ void blk_throtl_exit(struct request_queue *q)
kfree(q->td);
}
-void blk_throtl_register_queue(struct request_queue *q)
+void blk_throtl_register(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
struct throtl_data *td;
int i;