@@ -456,6 +456,7 @@ static void blk_throtl_update_valid_limit(struct throtl_data *td)
td->limit_valid[LIMIT_HIGH] = false;
}
+static void throtl_upgrade_state(struct throtl_data *td);
static void throtl_pd_offline(struct blkg_policy_data *pd)
{
struct throtl_grp *tg = pd_to_tg(pd);
@@ -467,9 +468,8 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
blk_throtl_update_valid_limit(tg->td);
- if (tg->td->limit_index == LIMIT_HIGH &&
- !tg->td->limit_valid[LIMIT_HIGH])
- tg->td->limit_index = LIMIT_MAX;
+ if (!tg->td->limit_valid[tg->td->limit_index])
+ throtl_upgrade_state(tg->td);
}
static void throtl_pd_free(struct blkg_policy_data *pd)
@@ -1077,6 +1077,8 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
return nr_disp;
}
+static bool throtl_can_upgrade(struct throtl_data *td,
+ struct throtl_grp *this_tg);
/**
* throtl_pending_timer_fn - timer function for service_queue->pending_timer
* @arg: the throtl_service_queue being serviced
@@ -1103,6 +1105,9 @@ static void throtl_pending_timer_fn(unsigned long arg)
int ret;
spin_lock_irq(q->queue_lock);
+ if (throtl_can_upgrade(td, NULL))
+ throtl_upgrade_state(td);
+
again:
parent_sq = sq->parent_sq;
dispatched = false;
@@ -1505,6 +1510,91 @@ static struct blkcg_policy blkcg_policy_throtl = {
.pd_free_fn = throtl_pd_free,
};
+static bool throtl_upgrade_check_one(struct throtl_grp *tg)
+{
+ struct throtl_service_queue *sq = &tg->service_queue;
+ bool read_limit, write_limit;
+
+ /* if cgroup reaches high/max limit, it's ok to next limit */
+ read_limit = tg->bps[READ][LIMIT_HIGH] != -1 ||
+ tg->iops[READ][LIMIT_HIGH] != -1 ||
+ tg->bps[READ][LIMIT_MAX] != -1 ||
+ tg->iops[READ][LIMIT_MAX] != -1;
+ write_limit = tg->bps[WRITE][LIMIT_HIGH] != -1 ||
+ tg->iops[WRITE][LIMIT_HIGH] != -1 ||
+ tg->bps[WRITE][LIMIT_MAX] != -1 ||
+ tg->iops[WRITE][LIMIT_MAX] != -1;
+ if (read_limit && sq->nr_queued[READ] &&
+ (!write_limit || sq->nr_queued[WRITE]))
+ return true;
+ if (write_limit && sq->nr_queued[WRITE] &&
+ (!read_limit || sq->nr_queued[READ]))
+ return true;
+ return false;
+}
+
+static bool throtl_upgrade_check_hierarchy(struct throtl_grp *tg)
+{
+ if (throtl_upgrade_check_one(tg))
+ return true;
+ while (true) {
+ if (!tg || (cgroup_subsys_on_dfl(io_cgrp_subsys) &&
+ !tg_to_blkg(tg)->parent))
+ return false;
+ if (throtl_upgrade_check_one(tg))
+ return true;
+ tg = sq_to_tg(tg->service_queue.parent_sq);
+ }
+ return false;
+}
+
+static bool throtl_can_upgrade(struct throtl_data *td,
+ struct throtl_grp *this_tg)
+{
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+
+ if (td->limit_index != LIMIT_HIGH)
+ return false;
+
+ rcu_read_lock();
+ blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+ struct throtl_grp *tg = blkg_to_tg(blkg);
+
+ if (tg == this_tg)
+ continue;
+ if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
+ continue;
+ if (!throtl_upgrade_check_hierarchy(tg)) {
+ rcu_read_unlock();
+ return false;
+ }
+ }
+ rcu_read_unlock();
+ return true;
+}
+
+static void throtl_upgrade_state(struct throtl_data *td)
+{
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+
+ td->limit_index = LIMIT_MAX;
+ rcu_read_lock();
+ blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+ struct throtl_grp *tg = blkg_to_tg(blkg);
+ struct throtl_service_queue *sq = &tg->service_queue;
+
+ tg->disptime = jiffies - 1;
+ throtl_select_dispatch(sq);
+ throtl_schedule_next_dispatch(sq, false);
+ }
+ rcu_read_unlock();
+ throtl_select_dispatch(&td->service_queue);
+ throtl_schedule_next_dispatch(&td->service_queue, false);
+ queue_work(kthrotld_workqueue, &td->dispatch_work);
+}
+
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
struct bio *bio)
{
@@ -1527,14 +1617,20 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
sq = &tg->service_queue;
+again:
while (true) {
/* throtl is FIFO - if bios are already queued, should queue */
if (sq->nr_queued[rw])
break;
/* if above limits, break to queue */
- if (!tg_may_dispatch(tg, bio, NULL))
+ if (!tg_may_dispatch(tg, bio, NULL)) {
+ if (throtl_can_upgrade(tg->td, tg)) {
+ throtl_upgrade_state(tg->td);
+ goto again;
+ }
break;
+ }
/* within limits, let's charge and dispatch directly */
throtl_charge_bio(tg, bio);
When queue is in LIMIT_HIGH state and all cgroups with high limit cross the bps/iops limitation, we will upgrade queue's state to LIMIT_MAX For a cgroup hierarchy, there are two cases. Children has lower high limit than parent. Parent's high limit is meaningless. If children's bps/iops cross high limit, we can upgrade queue state. The other case is children has higher high limit than parent. Children's high limit is meaningless. As long as parent's bps/iops cross high limit, we can upgrade queue state. Signed-off-by: Shaohua Li <shli@fb.com> --- block/blk-throttle.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 4 deletions(-)