diff mbox

[V4,02/15] blk-throttle: add .high interface

Message ID 239e20c62e028b570f48403faea1419c4d84f2bd.1479161136.git.shli@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shaohua Li Nov. 14, 2016, 10:22 p.m. UTC
Add high limit for cgroup and corresponding cgroup interface.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 block/blk-throttle.c | 132 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 103 insertions(+), 29 deletions(-)

Comments

Tejun Heo Nov. 22, 2016, 8:02 p.m. UTC | #1
Hello, Shaohua.

Sorry about the delay.

On Mon, Nov 14, 2016 at 02:22:09PM -0800, Shaohua Li wrote:
> @@ -1376,11 +1414,37 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
>  			goto out_finish;
>  	}
>  
> -	tg->bps[READ][LIMIT_MAX] = v[0];
> -	tg->bps[WRITE][LIMIT_MAX] = v[1];
> -	tg->iops[READ][LIMIT_MAX] = v[2];
> -	tg->iops[WRITE][LIMIT_MAX] = v[3];
> -
> +	if (index == LIMIT_MAX) {
> +		if ((v[0] < tg->bps[READ][LIMIT_HIGH] &&
> +		       tg->bps[READ][LIMIT_HIGH] != -1) ||
> +		    (v[1] < tg->bps[WRITE][LIMIT_HIGH] &&
> +		       tg->bps[WRITE][LIMIT_HIGH] != -1) ||
> +		    (v[2] < tg->iops[READ][LIMIT_HIGH] &&
> +		       tg->iops[READ][LIMIT_HIGH] != -1) ||
> +		    (v[3] < tg->iops[WRITE][LIMIT_HIGH] &&
> +		       tg->iops[WRITE][LIMIT_HIGH] != -1)) {
> +			ret = -EINVAL;
> +			goto out_finish;

Is this necessary?  memcg doesn't put restrictions on input but just
enforces whatever is configured.  I think it'd be better to follow the
same model here too.  Hmm... is this because throtl will be able to
choose either all high or max limits per cgroup?

And this isn't from your patches but can we please switch to
UINT64_MAX instead of -1?

> +		}
> +	} else if (index == LIMIT_HIGH) {
> +		if ((v[0] > tg->bps[READ][LIMIT_MAX] && v[0] != -1) ||
> +		    (v[1] > tg->bps[WRITE][LIMIT_MAX] && v[1] != -1) ||
> +		    (v[2] > tg->iops[READ][LIMIT_MAX] && v[2] != -1) ||
> +		    (v[3] > tg->iops[WRITE][LIMIT_MAX] && v[3] != -1)) {

Ditto here.

> @@ -1412,6 +1484,7 @@ static struct blkcg_policy blkcg_policy_throtl = {
>  	.pd_alloc_fn		= throtl_pd_alloc,
>  	.pd_init_fn		= throtl_pd_init,
>  	.pd_online_fn		= throtl_pd_online,
> +	.pd_offline_fn		= throtl_pd_offline,
>  	.pd_free_fn		= throtl_pd_free,
>  };

I haven't read the whole thing yet but this looks a bit suspicious.  A
css going offline indicates that the destruction of the css started.
I don't get why that'd reset high limits.  There can be a lot of async
IOs after offline.

Thanks.
Shaohua Li Nov. 22, 2016, 11:08 p.m. UTC | #2
On Tue, Nov 22, 2016 at 03:02:53PM -0500, Tejun Heo wrote:
> Hello, Shaohua.
> 
> Sorry about the delay.
> 
> On Mon, Nov 14, 2016 at 02:22:09PM -0800, Shaohua Li wrote:
> > @@ -1376,11 +1414,37 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
> >  			goto out_finish;
> >  	}
> >  
> > -	tg->bps[READ][LIMIT_MAX] = v[0];
> > -	tg->bps[WRITE][LIMIT_MAX] = v[1];
> > -	tg->iops[READ][LIMIT_MAX] = v[2];
> > -	tg->iops[WRITE][LIMIT_MAX] = v[3];
> > -
> > +	if (index == LIMIT_MAX) {
> > +		if ((v[0] < tg->bps[READ][LIMIT_HIGH] &&
> > +		       tg->bps[READ][LIMIT_HIGH] != -1) ||
> > +		    (v[1] < tg->bps[WRITE][LIMIT_HIGH] &&
> > +		       tg->bps[WRITE][LIMIT_HIGH] != -1) ||
> > +		    (v[2] < tg->iops[READ][LIMIT_HIGH] &&
> > +		       tg->iops[READ][LIMIT_HIGH] != -1) ||
> > +		    (v[3] < tg->iops[WRITE][LIMIT_HIGH] &&
> > +		       tg->iops[WRITE][LIMIT_HIGH] != -1)) {
> > +			ret = -EINVAL;
> > +			goto out_finish;
> 
> Is this necessary?  memcg doesn't put restrictions on input but just
> enforces whatever is configured.  I think it'd be better to follow the
> same model here too.  Hmm... is this because throtl will be able to
> choose either all high or max limits per cgroup?

Thanks for your time!

Yep, the limit could be high or max. It's doable moving the restrictions on
input, but will increase trouble using the limits. If high is bigger than max,
can I set high to max? if not, I'd prefer to keep the restrictions.
 
> And this isn't from your patches but can we please switch to
> UINT64_MAX instead of -1?

Sure, will fix this.

> > +		}
> > +	} else if (index == LIMIT_HIGH) {
> > +		if ((v[0] > tg->bps[READ][LIMIT_MAX] && v[0] != -1) ||
> > +		    (v[1] > tg->bps[WRITE][LIMIT_MAX] && v[1] != -1) ||
> > +		    (v[2] > tg->iops[READ][LIMIT_MAX] && v[2] != -1) ||
> > +		    (v[3] > tg->iops[WRITE][LIMIT_MAX] && v[3] != -1)) {
> 
> Ditto here.
> 
> > @@ -1412,6 +1484,7 @@ static struct blkcg_policy blkcg_policy_throtl = {
> >  	.pd_alloc_fn		= throtl_pd_alloc,
> >  	.pd_init_fn		= throtl_pd_init,
> >  	.pd_online_fn		= throtl_pd_online,
> > +	.pd_offline_fn		= throtl_pd_offline,
> >  	.pd_free_fn		= throtl_pd_free,
> >  };
> 
> I haven't read the whole thing yet but this looks a bit suspicious.  A
> css going offline indicates that the destruction of the css started.
> I don't get why that'd reset high limits.  There can be a lot of async
> IOs after offline.

Ok. We do want to reset the limits. Because if no cgroup has high limit, we
definitively should use max limit for all cgroups. The whole state machine
(switching between high and max limit) is meaningless in that case.

Is pd_free_fn a good place to guarantee anyc IOs finish?

Thanks,
Shaohua
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo Nov. 23, 2016, 9:11 p.m. UTC | #3
Hello,

On Tue, Nov 22, 2016 at 03:08:36PM -0800, Shaohua Li wrote:
> Yep, the limit could be high or max. It's doable moving the restrictions on
> input, but will increase trouble using the limits. If high is bigger than max,
> can I set high to max? if not, I'd prefer to keep the restrictions.

You can do that internally but userland should keep seeing what it
configured.

> > I haven't read the whole thing yet but this looks a bit suspicious.  A
> > css going offline indicates that the destruction of the css started.
> > I don't get why that'd reset high limits.  There can be a lot of async
> > IOs after offline.
> 
> Ok. We do want to reset the limits. Because if no cgroup has high limit, we
> definitively should use max limit for all cgroups. The whole state machine
> (switching between high and max limit) is meaningless in that case.
> 
> Is pd_free_fn a good place to guarantee anyc IOs finish?

Yeap.

Thanks.
diff mbox

Patch

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 925aa1ed..a564215 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -84,6 +84,7 @@  enum tg_state_flags {
 #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
 
 enum {
+	LIMIT_HIGH,
 	LIMIT_MAX,
 	LIMIT_CNT,
 };
@@ -414,6 +415,46 @@  static void throtl_pd_online(struct blkg_policy_data *pd)
 	tg_update_has_rules(pd_to_tg(pd));
 }
 
+static void blk_throtl_update_valid_limit(struct throtl_data *td)
+{
+	struct cgroup_subsys_state *pos_css;
+	struct blkcg_gq *blkg;
+	bool high_valid = false;
+
+	rcu_read_lock();
+	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
+		struct throtl_grp *tg = blkg_to_tg(blkg);
+
+		if (tg->bps[READ][LIMIT_HIGH] != -1 ||
+		    tg->bps[WRITE][LIMIT_HIGH] != -1 ||
+		    tg->iops[READ][LIMIT_HIGH] != -1 ||
+		    tg->iops[WRITE][LIMIT_HIGH] != -1)
+			high_valid = true;
+	}
+	rcu_read_unlock();
+
+	if (high_valid)
+		td->limit_valid[LIMIT_HIGH] = true;
+	else
+		td->limit_valid[LIMIT_HIGH] = false;
+}
+
+static void throtl_pd_offline(struct blkg_policy_data *pd)
+{
+	struct throtl_grp *tg = pd_to_tg(pd);
+
+	tg->bps[READ][LIMIT_HIGH] = -1;
+	tg->bps[WRITE][LIMIT_HIGH] = -1;
+	tg->iops[READ][LIMIT_HIGH] = -1;
+	tg->iops[WRITE][LIMIT_HIGH] = -1;
+
+	blk_throtl_update_valid_limit(tg->td);
+
+	if (tg->td->limit_index == LIMIT_HIGH &&
+	    !tg->td->limit_valid[LIMIT_HIGH])
+		tg->td->limit_index = LIMIT_MAX;
+}
+
 static void throtl_pd_free(struct blkg_policy_data *pd)
 {
 	struct throtl_grp *tg = pd_to_tg(pd);
@@ -1283,7 +1324,7 @@  static struct cftype throtl_legacy_files[] = {
 	{ }	/* terminate */
 };
 
-static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd,
+static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 			 int off)
 {
 	struct throtl_grp *tg = pd_to_tg(pd);
@@ -1292,36 +1333,32 @@  static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd,
 
 	if (!dname)
 		return 0;
-	if (tg->bps[READ][LIMIT_MAX] == -1 && tg->bps[WRITE][LIMIT_MAX] == -1 &&
-	    tg->iops[READ][LIMIT_MAX] == -1 && tg->iops[WRITE][LIMIT_MAX] == -1)
+	if (tg->bps[READ][off] == -1 && tg->bps[WRITE][off] == -1 &&
+	    tg->iops[READ][off] == -1 && tg->iops[WRITE][off] == -1)
 		return 0;
 
-	if (tg->bps[READ][LIMIT_MAX] != -1)
-		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
-			tg->bps[READ][LIMIT_MAX]);
-	if (tg->bps[WRITE][LIMIT_MAX] != -1)
-		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
-			tg->bps[WRITE][LIMIT_MAX]);
-	if (tg->iops[READ][LIMIT_MAX] != -1)
-		snprintf(bufs[2], sizeof(bufs[2]), "%u",
-			tg->iops[READ][LIMIT_MAX]);
-	if (tg->iops[WRITE][LIMIT_MAX] != -1)
-		snprintf(bufs[3], sizeof(bufs[3]), "%u",
-			tg->iops[WRITE][LIMIT_MAX]);
+	if (tg->bps[READ][off] != -1)
+		snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps[READ][off]);
+	if (tg->bps[WRITE][off] != -1)
+		snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps[WRITE][off]);
+	if (tg->iops[READ][off] != -1)
+		snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops[READ][off]);
+	if (tg->iops[WRITE][off] != -1)
+		snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops[WRITE][off]);
 
 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s\n",
 		   dname, bufs[0], bufs[1], bufs[2], bufs[3]);
 	return 0;
 }
 
-static int tg_print_max(struct seq_file *sf, void *v)
+static int tg_print_limit(struct seq_file *sf, void *v)
 {
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_max,
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
 	return 0;
 }
 
-static ssize_t tg_set_max(struct kernfs_open_file *of,
+static ssize_t tg_set_limit(struct kernfs_open_file *of,
 			  char *buf, size_t nbytes, loff_t off)
 {
 	struct blkcg *blkcg = css_to_blkcg(of_css(of));
@@ -1329,6 +1366,7 @@  static ssize_t tg_set_max(struct kernfs_open_file *of,
 	struct throtl_grp *tg;
 	u64 v[4];
 	int ret;
+	int index = of_cft(of)->private;
 
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
 	if (ret)
@@ -1336,10 +1374,10 @@  static ssize_t tg_set_max(struct kernfs_open_file *of,
 
 	tg = blkg_to_tg(ctx.blkg);
 
-	v[0] = tg->bps[READ][LIMIT_MAX];
-	v[1] = tg->bps[WRITE][LIMIT_MAX];
-	v[2] = tg->iops[READ][LIMIT_MAX];
-	v[3] = tg->iops[WRITE][LIMIT_MAX];
+	v[0] = tg->bps[READ][index];
+	v[1] = tg->bps[WRITE][index];
+	v[2] = tg->iops[READ][index];
+	v[3] = tg->iops[WRITE][index];
 
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
@@ -1376,11 +1414,37 @@  static ssize_t tg_set_max(struct kernfs_open_file *of,
 			goto out_finish;
 	}
 
-	tg->bps[READ][LIMIT_MAX] = v[0];
-	tg->bps[WRITE][LIMIT_MAX] = v[1];
-	tg->iops[READ][LIMIT_MAX] = v[2];
-	tg->iops[WRITE][LIMIT_MAX] = v[3];
-
+	if (index == LIMIT_MAX) {
+		if ((v[0] < tg->bps[READ][LIMIT_HIGH] &&
+		       tg->bps[READ][LIMIT_HIGH] != -1) ||
+		    (v[1] < tg->bps[WRITE][LIMIT_HIGH] &&
+		       tg->bps[WRITE][LIMIT_HIGH] != -1) ||
+		    (v[2] < tg->iops[READ][LIMIT_HIGH] &&
+		       tg->iops[READ][LIMIT_HIGH] != -1) ||
+		    (v[3] < tg->iops[WRITE][LIMIT_HIGH] &&
+		       tg->iops[WRITE][LIMIT_HIGH] != -1)) {
+			ret = -EINVAL;
+			goto out_finish;
+		}
+	} else if (index == LIMIT_HIGH) {
+		if ((v[0] > tg->bps[READ][LIMIT_MAX] && v[0] != -1) ||
+		    (v[1] > tg->bps[WRITE][LIMIT_MAX] && v[1] != -1) ||
+		    (v[2] > tg->iops[READ][LIMIT_MAX] && v[2] != -1) ||
+		    (v[3] > tg->iops[WRITE][LIMIT_MAX] && v[3] != -1)) {
+			ret = -EINVAL;
+			goto out_finish;
+		}
+	}
+	tg->bps[READ][index] = v[0];
+	tg->bps[WRITE][index] = v[1];
+	tg->iops[READ][index] = v[2];
+	tg->iops[WRITE][index] = v[3];
+
+	if (index == LIMIT_HIGH) {
+		blk_throtl_update_valid_limit(tg->td);
+		if (tg->td->limit_valid[LIMIT_HIGH])
+			tg->td->limit_index = LIMIT_HIGH;
+	}
 	tg_conf_updated(tg);
 	ret = 0;
 out_finish:
@@ -1390,10 +1454,18 @@  static ssize_t tg_set_max(struct kernfs_open_file *of,
 
 static struct cftype throtl_files[] = {
 	{
+		.name = "high",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = tg_print_limit,
+		.write = tg_set_limit,
+		.private = LIMIT_HIGH,
+	},
+	{
 		.name = "max",
 		.flags = CFTYPE_NOT_ON_ROOT,
-		.seq_show = tg_print_max,
-		.write = tg_set_max,
+		.seq_show = tg_print_limit,
+		.write = tg_set_limit,
+		.private = LIMIT_MAX,
 	},
 	{ }	/* terminate */
 };
@@ -1412,6 +1484,7 @@  static struct blkcg_policy blkcg_policy_throtl = {
 	.pd_alloc_fn		= throtl_pd_alloc,
 	.pd_init_fn		= throtl_pd_init,
 	.pd_online_fn		= throtl_pd_online,
+	.pd_offline_fn		= throtl_pd_offline,
 	.pd_free_fn		= throtl_pd_free,
 };
 
@@ -1591,6 +1664,7 @@  int blk_throtl_init(struct request_queue *q)
 	td->queue = q;
 
 	td->limit_valid[LIMIT_MAX] = true;
+	td->limit_index = LIMIT_MAX;
 	/* activate policy */
 	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
 	if (ret)