diff mbox

[v2,2/2] blk-mq: Add a polling specific stats function

Message ID 1491413957-30885-3-git-send-email-sbates@raithlin.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stephen Bates April 5, 2017, 5:39 p.m. UTC
From: Stephen Bates <sbates@raithlin.com>

Rather than bucketing IO statisics based on direction only we also
bucket based on the IO size. This leads to improved polling
performance. Update the bucket callback function and use it in the
polling latency estimation.

Signed-off-by: Stephen Bates <sbates@raithlin.com>
---
 block/blk-mq.c | 53 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 10 deletions(-)

Comments

Jens Axboe April 5, 2017, 6:14 p.m. UTC | #1
On 04/05/2017 11:39 AM, sbates@raithlin.com wrote:
> @@ -42,6 +42,33 @@ static LIST_HEAD(all_q_list);
>  static void blk_mq_poll_stats_start(struct request_queue *q);
>  static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
>  
> +static int blk_mq_poll_stats_bkt(const struct request *rq)
> +{
> +	int dir, bytes;
> +
> +	dir = blk_stat_rq_ddir(rq);
> +	bytes = blk_rq_bytes(rq);
> +
> +	if (bytes <= 512)
> +		return dir;
> +	else if (bytes <= 4096)
> +		return dir + 2;
> +	else if (bytes <= 8192)
> +		return dir + 4;
> +	else if (bytes <= 16384)
> +		return dir + 6;
> +	else if (bytes <= 32768)
> +		return dir + 8;
> +	else if (bytes <= 65536)
> +		return dir + 10;
> +	else
> +		return dir + 12;

Why not just have 8 buckets, and make it:

	bucket = ddir + ilog2(bytes) - 9;

and cap it at MAX_BUCKET (8) and put all those above into the top
bucket.
Stephen Bates April 7, 2017, 12:11 p.m. UTC | #2
On 2017-04-05, 7:14 PM, "Jens Axboe" <axboe@kernel.dk> wrote:

> Why not just have 8 buckets, and make it:

>

>	bucket = ddir + ilog2(bytes) - 9;

>

> and cap it at MAX_BUCKET (8) and put all those above into the top

> bucket.


Thanks. However, that equation does not differentiate between direction and size. Instead we can use

bucket = ddir + 2*(ilog2(bytes) - 9);

and then bin any IO over 64K in the largest of the two buckets based on direction. I’ll implement this in a v3….

Cheers

Stephen
Jens Axboe April 7, 2017, 2:01 p.m. UTC | #3
On 04/07/2017 06:11 AM, Stephen  Bates wrote:
> On 2017-04-05, 7:14 PM, "Jens Axboe" <axboe@kernel.dk> wrote:
> 
>> Why not just have 8 buckets, and make it:
>>
>> 	bucket = ddir + ilog2(bytes) - 9;
>>
>> and cap it at MAX_BUCKET (8) and put all those above into the top
>> bucket.
> 
> Thanks. However, that equation does not differentiate between
> direction and size. Instead we can use
> 
> bucket = ddir + 2*(ilog2(bytes) - 9);

It would be cleaner to just embed the fact that we have 2 sets of
identical buckets, and return 

	bucket = ilog2(bytes) - 9;

and have poll_stat be indexed by:

	->poll_stat[ddir][bucket];

instead.
diff mbox

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 061fc2c..8fb1fb0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,6 +42,33 @@  static LIST_HEAD(all_q_list);
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
+static int blk_mq_poll_stats_bkt(const struct request *rq)
+{
+	int dir, bytes;
+
+	dir = blk_stat_rq_ddir(rq);
+	bytes = blk_rq_bytes(rq);
+
+	if (bytes <= 512)
+		return dir;
+	else if (bytes <= 4096)
+		return dir + 2;
+	else if (bytes <= 8192)
+		return dir + 4;
+	else if (bytes <= 16384)
+		return dir + 6;
+	else if (bytes <= 32768)
+		return dir + 8;
+	else if (bytes <= 65536)
+		return dir + 10;
+	else
+		return dir + 12;
+
+	return -1;
+}
+/* Must be consisitent with function above */
+#define BLK_MQ_POLL_STATS_BKTS 14
+
 /*
  * Check if any of the ctx's have pending work in this hardware queue
  */
@@ -2245,7 +2272,8 @@  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->mq_ops = set->ops;
 
 	q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
-					     blk_stat_rq_ddir, 2, q);
+					     blk_mq_poll_stats_bkt,
+					     BLK_MQ_POLL_STATS_BKTS, q);
 	if (!q->poll_cb)
 		goto err_exit;
 
@@ -2663,11 +2691,12 @@  static void blk_mq_poll_stats_start(struct request_queue *q)
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
 {
 	struct request_queue *q = cb->data;
+	int bucket;
 
-	if (cb->stat[READ].nr_samples)
-		q->poll_stat[READ] = cb->stat[READ];
-	if (cb->stat[WRITE].nr_samples)
-		q->poll_stat[WRITE] = cb->stat[WRITE];
+	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
+		if (cb->stat[bucket].nr_samples)
+			q->poll_stat[bucket] = cb->stat[bucket];
+	}
 }
 
 static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
@@ -2675,6 +2704,7 @@  static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
 				       struct request *rq)
 {
 	unsigned long ret = 0;
+	int bucket;
 
 	/*
 	 * If stats collection isn't on, don't sleep but turn it on for
@@ -2689,12 +2719,15 @@  static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
 	 * For instance, if the completion latencies are tight, we can
 	 * get closer than just half the mean. This is especially
 	 * important on devices where the completion latencies are longer
-	 * than ~10 usec.
+	 * than ~10 usec. We do use the stats for the relevant IO size
+	 * if available which does lead to better estimates.
 	 */
-	if (req_op(rq) == REQ_OP_READ && q->poll_stat[READ].nr_samples)
-		ret = (q->poll_stat[READ].mean + 1) / 2;
-	else if (req_op(rq) == REQ_OP_WRITE && q->poll_stat[WRITE].nr_samples)
-		ret = (q->poll_stat[WRITE].mean + 1) / 2;
+	bucket = blk_mq_poll_stats_bkt(rq);
+	if (bucket < 0)
+		return ret;
+
+	if (q->poll_stat[bucket].nr_samples)
+		ret = (q->poll_stat[bucket].mean + 1) / 2;
 
 	return ret;
 }