diff mbox

[v3,RESEND,1/2] blk-throttle: track read and write request individually

Message ID ce8504cb-d3ad-d8cb-4db0-11788578d1af@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Joseph Qi Nov. 21, 2017, 1:38 a.m. UTC
From: Joseph Qi <qijiang.qj@alibaba-inc.com>

In mixed read/write workload on SSD, write latency is much lower than
read. But now we only track and record read latency and then use it as
threshold base for both read and write io latency accounting. As a
result, write io latency will always be considered as good and
bad_bio_cnt is much smaller than 20% of bio_cnt. That is to mean, the
tg to be checked will be treated as idle most of the time and still let
others dispatch more ios, even it is truly running under low limit and
wants its low limit to be guaranteed, which is not we expected in fact.
So track read and write request individually, which can bring more
precise latency control for low limit idle detection.

Signed-off-by: Joseph Qi <qijiang.qj@alibaba-inc.com>
Reviewed-by: Shaohua Li <shli@fb.com>
---
 block/blk-throttle.c | 134 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 79 insertions(+), 55 deletions(-)

Comments

Joseph Qi Nov. 24, 2017, 1:31 a.m. UTC | #1
Hi Jens,
Could you please give your advice for the two patches or pick them up if
you think they are good?

Thanks,
Joseph

On 17/11/21 09:38, Joseph Qi wrote:
> From: Joseph Qi <qijiang.qj@alibaba-inc.com>
> 
> In mixed read/write workload on SSD, write latency is much lower than
> read. But now we only track and record read latency and then use it as
> threshold base for both read and write io latency accounting. As a
> result, write io latency will always be considered as good and
> bad_bio_cnt is much smaller than 20% of bio_cnt. That is to mean, the
> tg to be checked will be treated as idle most of the time and still let
> others dispatch more ios, even it is truly running under low limit and
> wants its low limit to be guaranteed, which is not we expected in fact.
> So track read and write request individually, which can bring more
> precise latency control for low limit idle detection.
> 
> Signed-off-by: Joseph Qi <qijiang.qj@alibaba-inc.com>
> Reviewed-by: Shaohua Li <shli@fb.com>
> ---
>  block/blk-throttle.c | 134 ++++++++++++++++++++++++++++++---------------------
>  1 file changed, 79 insertions(+), 55 deletions(-)
> 
> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index 96ad326..bf52035 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -216,9 +216,9 @@ struct throtl_data
>  
>  	unsigned int scale;
>  
> -	struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE];
> -	struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
> -	struct latency_bucket __percpu *latency_buckets;
> +	struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE];
> +	struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE];
> +	struct latency_bucket __percpu *latency_buckets[2];
>  	unsigned long last_calculate_time;
>  	unsigned long filtered_latency;
>  
> @@ -2041,10 +2041,10 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
>  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
>  static void throtl_update_latency_buckets(struct throtl_data *td)
>  {
> -	struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE];
> -	int i, cpu;
> -	unsigned long last_latency = 0;
> -	unsigned long latency;
> +	struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
> +	int i, cpu, rw;
> +	unsigned long last_latency[2] = { 0 };
> +	unsigned long latency[2];
>  
>  	if (!blk_queue_nonrot(td->queue))
>  		return;
> @@ -2053,56 +2053,67 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
>  	td->last_calculate_time = jiffies;
>  
>  	memset(avg_latency, 0, sizeof(avg_latency));
> -	for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
> -		struct latency_bucket *tmp = &td->tmp_buckets[i];
> -
> -		for_each_possible_cpu(cpu) {
> -			struct latency_bucket *bucket;
> -
> -			/* this isn't race free, but ok in practice */
> -			bucket = per_cpu_ptr(td->latency_buckets, cpu);
> -			tmp->total_latency += bucket[i].total_latency;
> -			tmp->samples += bucket[i].samples;
> -			bucket[i].total_latency = 0;
> -			bucket[i].samples = 0;
> -		}
> +	for (rw = READ; rw <= WRITE; rw++) {
> +		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
> +			struct latency_bucket *tmp = &td->tmp_buckets[rw][i];
> +
> +			for_each_possible_cpu(cpu) {
> +				struct latency_bucket *bucket;
> +
> +				/* this isn't race free, but ok in practice */
> +				bucket = per_cpu_ptr(td->latency_buckets[rw],
> +					cpu);
> +				tmp->total_latency += bucket[i].total_latency;
> +				tmp->samples += bucket[i].samples;
> +				bucket[i].total_latency = 0;
> +				bucket[i].samples = 0;
> +			}
>  
> -		if (tmp->samples >= 32) {
> -			int samples = tmp->samples;
> +			if (tmp->samples >= 32) {
> +				int samples = tmp->samples;
>  
> -			latency = tmp->total_latency;
> +				latency[rw] = tmp->total_latency;
>  
> -			tmp->total_latency = 0;
> -			tmp->samples = 0;
> -			latency /= samples;
> -			if (latency == 0)
> -				continue;
> -			avg_latency[i].latency = latency;
> +				tmp->total_latency = 0;
> +				tmp->samples = 0;
> +				latency[rw] /= samples;
> +				if (latency[rw] == 0)
> +					continue;
> +				avg_latency[rw][i].latency = latency[rw];
> +			}
>  		}
>  	}
>  
> -	for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
> -		if (!avg_latency[i].latency) {
> -			if (td->avg_buckets[i].latency < last_latency)
> -				td->avg_buckets[i].latency = last_latency;
> -			continue;
> -		}
> +	for (rw = READ; rw <= WRITE; rw++) {
> +		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
> +			if (!avg_latency[rw][i].latency) {
> +				if (td->avg_buckets[rw][i].latency < last_latency[rw])
> +					td->avg_buckets[rw][i].latency =
> +						last_latency[rw];
> +				continue;
> +			}
>  
> -		if (!td->avg_buckets[i].valid)
> -			latency = avg_latency[i].latency;
> -		else
> -			latency = (td->avg_buckets[i].latency * 7 +
> -				avg_latency[i].latency) >> 3;
> +			if (!td->avg_buckets[rw][i].valid)
> +				latency[rw] = avg_latency[rw][i].latency;
> +			else
> +				latency[rw] = (td->avg_buckets[rw][i].latency * 7 +
> +					avg_latency[rw][i].latency) >> 3;
>  
> -		td->avg_buckets[i].latency = max(latency, last_latency);
> -		td->avg_buckets[i].valid = true;
> -		last_latency = td->avg_buckets[i].latency;
> +			td->avg_buckets[rw][i].latency = max(latency[rw],
> +				last_latency[rw]);
> +			td->avg_buckets[rw][i].valid = true;
> +			last_latency[rw] = td->avg_buckets[rw][i].latency;
> +		}
>  	}
>  
>  	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
>  		throtl_log(&td->service_queue,
> -			"Latency bucket %d: latency=%ld, valid=%d", i,
> -			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
> +			"Latency bucket %d: read latency=%ld, read valid=%d, "
> +			"write latency=%ld, write valid=%d", i,
> +			td->avg_buckets[READ][i].latency,
> +			td->avg_buckets[READ][i].valid,
> +			td->avg_buckets[WRITE][i].latency,
> +			td->avg_buckets[WRITE][i].valid);
>  }
>  #else
>  static inline void throtl_update_latency_buckets(struct throtl_data *td)
> @@ -2249,16 +2260,17 @@ static void throtl_track_latency(struct throtl_data *td, sector_t size,
>  	struct latency_bucket *latency;
>  	int index;
>  
> -	if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ ||
> +	if (!td || td->limit_index != LIMIT_LOW ||
> +	    !(op == REQ_OP_READ || op == REQ_OP_WRITE) ||
>  	    !blk_queue_nonrot(td->queue))
>  		return;
>  
>  	index = request_bucket_index(size);
>  
> -	latency = get_cpu_ptr(td->latency_buckets);
> +	latency = get_cpu_ptr(td->latency_buckets[op]);
>  	latency[index].total_latency += time;
>  	latency[index].samples++;
> -	put_cpu_ptr(td->latency_buckets);
> +	put_cpu_ptr(td->latency_buckets[op]);
>  }
>  
>  void blk_throtl_stat_add(struct request *rq, u64 time_ns)
> @@ -2277,6 +2289,7 @@ void blk_throtl_bio_endio(struct bio *bio)
>  	unsigned long finish_time;
>  	unsigned long start_time;
>  	unsigned long lat;
> +	int rw = bio_data_dir(bio);
>  
>  	tg = bio->bi_cg_private;
>  	if (!tg)
> @@ -2305,7 +2318,7 @@ void blk_throtl_bio_endio(struct bio *bio)
>  
>  		bucket = request_bucket_index(
>  			blk_stat_size(&bio->bi_issue_stat));
> -		threshold = tg->td->avg_buckets[bucket].latency +
> +		threshold = tg->td->avg_buckets[rw][bucket].latency +
>  			tg->latency_target;
>  		if (lat > threshold)
>  			tg->bad_bio_cnt++;
> @@ -2398,9 +2411,16 @@ int blk_throtl_init(struct request_queue *q)
>  	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
>  	if (!td)
>  		return -ENOMEM;
> -	td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) *
> +	td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *
> +		LATENCY_BUCKET_SIZE, __alignof__(u64));
> +	if (!td->latency_buckets[READ]) {
> +		kfree(td);
> +		return -ENOMEM;
> +	}
> +	td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *
>  		LATENCY_BUCKET_SIZE, __alignof__(u64));
> -	if (!td->latency_buckets) {
> +	if (!td->latency_buckets[WRITE]) {
> +		free_percpu(td->latency_buckets[READ]);
>  		kfree(td);
>  		return -ENOMEM;
>  	}
> @@ -2419,7 +2439,8 @@ int blk_throtl_init(struct request_queue *q)
>  	/* activate policy */
>  	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
>  	if (ret) {
> -		free_percpu(td->latency_buckets);
> +		free_percpu(td->latency_buckets[READ]);
> +		free_percpu(td->latency_buckets[WRITE]);
>  		kfree(td);
>  	}
>  	return ret;
> @@ -2430,7 +2451,8 @@ void blk_throtl_exit(struct request_queue *q)
>  	BUG_ON(!q->td);
>  	throtl_shutdown_wq(q);
>  	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
> -	free_percpu(q->td->latency_buckets);
> +	free_percpu(q->td->latency_buckets[READ]);
> +	free_percpu(q->td->latency_buckets[WRITE]);
>  	kfree(q->td);
>  }
>  
> @@ -2448,8 +2470,10 @@ void blk_throtl_register_queue(struct request_queue *q)
>  	} else {
>  		td->throtl_slice = DFL_THROTL_SLICE_HD;
>  		td->filtered_latency = LATENCY_FILTERED_HD;
> -		for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
> -			td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY;
> +		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
> +			td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;
> +			td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;
> +		}
>  	}
>  #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
>  	/* if no low limit, use previous default */
>
Jens Axboe Nov. 24, 2017, 5:13 a.m. UTC | #2
On 11/23/2017 06:31 PM, Joseph Qi wrote:
> Hi Jens,
> Could you please give your advice for the two patches or pick them up if
> you think they are good?

It looks OK to me, but my preference would be to push this until
4.16.
Joseph Qi Jan. 8, 2018, 12:05 p.m. UTC | #3
A polite ping for the two pending patches...

Thanks,
Joseph

On 17/11/24 13:13, Jens Axboe wrote:
> On 11/23/2017 06:31 PM, Joseph Qi wrote:
>> Hi Jens,
>> Could you please give your advice for the two patches or pick them up if
>> you think they are good?
> 
> It looks OK to me, but my preference would be to push this until
> 4.16.
>
Joseph Qi Jan. 19, 2018, 2:11 a.m. UTC | #4
Hi Jens,
Could you please pick the two pending patches for 4.16?
They all have been reviewed by Shaohua.

Thanks,
Joseph

On 18/1/8 20:05, Joseph Qi wrote:
> A polite ping for the two pending patches...
> 
> Thanks,
> Joseph
> 
> On 17/11/24 13:13, Jens Axboe wrote:
>> On 11/23/2017 06:31 PM, Joseph Qi wrote:
>>> Hi Jens,
>>> Could you please give your advice for the two patches or pick them up if
>>> you think they are good?
>>
>> It looks OK to me, but my preference would be to push this until
>> 4.16.
>>
Jens Axboe Jan. 19, 2018, 2:52 a.m. UTC | #5
On 1/18/18 7:11 PM, Joseph Qi wrote:
> Hi Jens,
> Could you please pick the two pending patches for 4.16?
> They all have been reviewed by Shaohua.

Sorry, I guess I forgot about this series. I picked up 1/2,
for some reason I don't have 2/2 and I can't find it on
linux-block either. The thread only shows the first patch.

Can you resend 2/2?
Joseph Qi Jan. 19, 2018, 2:54 a.m. UTC | #6
Sure, I will resend it.
Thanks.

Joseph

On 18/1/19 10:52, Jens Axboe wrote:
> On 1/18/18 7:11 PM, Joseph Qi wrote:
>> Hi Jens,
>> Could you please pick the two pending patches for 4.16?
>> They all have been reviewed by Shaohua.
> 
> Sorry, I guess I forgot about this series. I picked up 1/2,
> for some reason I don't have 2/2 and I can't find it on
> linux-block either. The thread only shows the first patch.
> 
> Can you resend 2/2?
>
diff mbox

Patch

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 96ad326..bf52035 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -216,9 +216,9 @@  struct throtl_data
 
 	unsigned int scale;
 
-	struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE];
-	struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
-	struct latency_bucket __percpu *latency_buckets;
+	struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE];
+	struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE];
+	struct latency_bucket __percpu *latency_buckets[2];
 	unsigned long last_calculate_time;
 	unsigned long filtered_latency;
 
@@ -2041,10 +2041,10 @@  static void blk_throtl_update_idletime(struct throtl_grp *tg)
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 static void throtl_update_latency_buckets(struct throtl_data *td)
 {
-	struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE];
-	int i, cpu;
-	unsigned long last_latency = 0;
-	unsigned long latency;
+	struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
+	int i, cpu, rw;
+	unsigned long last_latency[2] = { 0 };
+	unsigned long latency[2];
 
 	if (!blk_queue_nonrot(td->queue))
 		return;
@@ -2053,56 +2053,67 @@  static void throtl_update_latency_buckets(struct throtl_data *td)
 	td->last_calculate_time = jiffies;
 
 	memset(avg_latency, 0, sizeof(avg_latency));
-	for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
-		struct latency_bucket *tmp = &td->tmp_buckets[i];
-
-		for_each_possible_cpu(cpu) {
-			struct latency_bucket *bucket;
-
-			/* this isn't race free, but ok in practice */
-			bucket = per_cpu_ptr(td->latency_buckets, cpu);
-			tmp->total_latency += bucket[i].total_latency;
-			tmp->samples += bucket[i].samples;
-			bucket[i].total_latency = 0;
-			bucket[i].samples = 0;
-		}
+	for (rw = READ; rw <= WRITE; rw++) {
+		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+			struct latency_bucket *tmp = &td->tmp_buckets[rw][i];
+
+			for_each_possible_cpu(cpu) {
+				struct latency_bucket *bucket;
+
+				/* this isn't race free, but ok in practice */
+				bucket = per_cpu_ptr(td->latency_buckets[rw],
+					cpu);
+				tmp->total_latency += bucket[i].total_latency;
+				tmp->samples += bucket[i].samples;
+				bucket[i].total_latency = 0;
+				bucket[i].samples = 0;
+			}
 
-		if (tmp->samples >= 32) {
-			int samples = tmp->samples;
+			if (tmp->samples >= 32) {
+				int samples = tmp->samples;
 
-			latency = tmp->total_latency;
+				latency[rw] = tmp->total_latency;
 
-			tmp->total_latency = 0;
-			tmp->samples = 0;
-			latency /= samples;
-			if (latency == 0)
-				continue;
-			avg_latency[i].latency = latency;
+				tmp->total_latency = 0;
+				tmp->samples = 0;
+				latency[rw] /= samples;
+				if (latency[rw] == 0)
+					continue;
+				avg_latency[rw][i].latency = latency[rw];
+			}
 		}
 	}
 
-	for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
-		if (!avg_latency[i].latency) {
-			if (td->avg_buckets[i].latency < last_latency)
-				td->avg_buckets[i].latency = last_latency;
-			continue;
-		}
+	for (rw = READ; rw <= WRITE; rw++) {
+		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+			if (!avg_latency[rw][i].latency) {
+				if (td->avg_buckets[rw][i].latency < last_latency[rw])
+					td->avg_buckets[rw][i].latency =
+						last_latency[rw];
+				continue;
+			}
 
-		if (!td->avg_buckets[i].valid)
-			latency = avg_latency[i].latency;
-		else
-			latency = (td->avg_buckets[i].latency * 7 +
-				avg_latency[i].latency) >> 3;
+			if (!td->avg_buckets[rw][i].valid)
+				latency[rw] = avg_latency[rw][i].latency;
+			else
+				latency[rw] = (td->avg_buckets[rw][i].latency * 7 +
+					avg_latency[rw][i].latency) >> 3;
 
-		td->avg_buckets[i].latency = max(latency, last_latency);
-		td->avg_buckets[i].valid = true;
-		last_latency = td->avg_buckets[i].latency;
+			td->avg_buckets[rw][i].latency = max(latency[rw],
+				last_latency[rw]);
+			td->avg_buckets[rw][i].valid = true;
+			last_latency[rw] = td->avg_buckets[rw][i].latency;
+		}
 	}
 
 	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
 		throtl_log(&td->service_queue,
-			"Latency bucket %d: latency=%ld, valid=%d", i,
-			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
+			"Latency bucket %d: read latency=%ld, read valid=%d, "
+			"write latency=%ld, write valid=%d", i,
+			td->avg_buckets[READ][i].latency,
+			td->avg_buckets[READ][i].valid,
+			td->avg_buckets[WRITE][i].latency,
+			td->avg_buckets[WRITE][i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
@@ -2249,16 +2260,17 @@  static void throtl_track_latency(struct throtl_data *td, sector_t size,
 	struct latency_bucket *latency;
 	int index;
 
-	if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ ||
+	if (!td || td->limit_index != LIMIT_LOW ||
+	    !(op == REQ_OP_READ || op == REQ_OP_WRITE) ||
 	    !blk_queue_nonrot(td->queue))
 		return;
 
 	index = request_bucket_index(size);
 
-	latency = get_cpu_ptr(td->latency_buckets);
+	latency = get_cpu_ptr(td->latency_buckets[op]);
 	latency[index].total_latency += time;
 	latency[index].samples++;
-	put_cpu_ptr(td->latency_buckets);
+	put_cpu_ptr(td->latency_buckets[op]);
 }
 
 void blk_throtl_stat_add(struct request *rq, u64 time_ns)
@@ -2277,6 +2289,7 @@  void blk_throtl_bio_endio(struct bio *bio)
 	unsigned long finish_time;
 	unsigned long start_time;
 	unsigned long lat;
+	int rw = bio_data_dir(bio);
 
 	tg = bio->bi_cg_private;
 	if (!tg)
@@ -2305,7 +2318,7 @@  void blk_throtl_bio_endio(struct bio *bio)
 
 		bucket = request_bucket_index(
 			blk_stat_size(&bio->bi_issue_stat));
-		threshold = tg->td->avg_buckets[bucket].latency +
+		threshold = tg->td->avg_buckets[rw][bucket].latency +
 			tg->latency_target;
 		if (lat > threshold)
 			tg->bad_bio_cnt++;
@@ -2398,9 +2411,16 @@  int blk_throtl_init(struct request_queue *q)
 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
 	if (!td)
 		return -ENOMEM;
-	td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) *
+	td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *
+		LATENCY_BUCKET_SIZE, __alignof__(u64));
+	if (!td->latency_buckets[READ]) {
+		kfree(td);
+		return -ENOMEM;
+	}
+	td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *
 		LATENCY_BUCKET_SIZE, __alignof__(u64));
-	if (!td->latency_buckets) {
+	if (!td->latency_buckets[WRITE]) {
+		free_percpu(td->latency_buckets[READ]);
 		kfree(td);
 		return -ENOMEM;
 	}
@@ -2419,7 +2439,8 @@  int blk_throtl_init(struct request_queue *q)
 	/* activate policy */
 	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
 	if (ret) {
-		free_percpu(td->latency_buckets);
+		free_percpu(td->latency_buckets[READ]);
+		free_percpu(td->latency_buckets[WRITE]);
 		kfree(td);
 	}
 	return ret;
@@ -2430,7 +2451,8 @@  void blk_throtl_exit(struct request_queue *q)
 	BUG_ON(!q->td);
 	throtl_shutdown_wq(q);
 	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
-	free_percpu(q->td->latency_buckets);
+	free_percpu(q->td->latency_buckets[READ]);
+	free_percpu(q->td->latency_buckets[WRITE]);
 	kfree(q->td);
 }
 
@@ -2448,8 +2470,10 @@  void blk_throtl_register_queue(struct request_queue *q)
 	} else {
 		td->throtl_slice = DFL_THROTL_SLICE_HD;
 		td->filtered_latency = LATENCY_FILTERED_HD;
-		for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
-			td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY;
+		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
+			td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;
+			td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;
+		}
 	}
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
 	/* if no low limit, use previous default */