diff mbox series

[v2,2/2] block/mq-deadline: Fix the tag reservation code

Message ID 20240509170149.7639-3-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Fix the mq-deadline async_depth implementation | expand

Commit Message

Bart Van Assche May 9, 2024, 5:01 p.m. UTC
The current tag reservation code is based on a misunderstanding of the
meaning of data->shallow_depth. Fix the tag reservation code as follows:
* By default, do not reserve any tags for synchronous requests because
  for certain use cases reserving tags reduces performance. See also
  Harshit Mogalapalli, [bug-report] Performance regression with fio
  sequential-write on a multipath setup, 2024-03-07
  (https://lore.kernel.org/linux-block/5ce2ae5d-61e2-4ede-ad55-551112602401@oracle.com/)
* Reduce min_shallow_depth to one because min_shallow_depth must be less
  than or equal any shallow_depth value.
* Scale dd->async_depth from the range [1, nr_requests] to [1,
  bits_per_sbitmap_word].

Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Zhiguo Niu <zhiguo.niu@unisoc.com>
Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

Comments

YangYang May 16, 2024, 8:14 a.m. UTC | #1
On 2024/5/10 1:01, Bart Van Assche wrote:
> The current tag reservation code is based on a misunderstanding of the
> meaning of data->shallow_depth. Fix the tag reservation code as follows:
> * By default, do not reserve any tags for synchronous requests because
>    for certain use cases reserving tags reduces performance. See also
>    Harshit Mogalapalli, [bug-report] Performance regression with fio
>    sequential-write on a multipath setup, 2024-03-07
>    (https://lore.kernel.org/linux-block/5ce2ae5d-61e2-4ede-ad55-551112602401@oracle.com/)
> * Reduce min_shallow_depth to one because min_shallow_depth must be less
>    than or equal any shallow_depth value.
> * Scale dd->async_depth from the range [1, nr_requests] to [1,
>    bits_per_sbitmap_word].
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Zhiguo Niu <zhiguo.niu@unisoc.com>
> Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests")
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   block/mq-deadline.c | 20 +++++++++++++++++---
>   1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 94eede4fb9eb..acdc28756d9d 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -487,6 +487,20 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>   	return rq;
>   }
>   
> +/*
> + * 'depth' is a number in the range 1..INT_MAX representing a number of
> + * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
> + * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
> + * Values larger than q->nr_requests have the same effect as q->nr_requests.
> + */
> +static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
> +{
> +	struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
> +	const unsigned int nrr = hctx->queue->nr_requests;
> +
> +	return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
> +}
> +
>   /*
>    * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
>    * function is used by __blk_mq_get_tag().
> @@ -503,7 +517,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
>   	 * Throttle asynchronous requests and writes such that these requests
>   	 * do not block the allocation of synchronous requests.
>   	 */
> -	data->shallow_depth = dd->async_depth;
> +	data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
>   }
>   
>   /* Called by blk_mq_update_nr_requests(). */
> @@ -513,9 +527,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
>   	struct deadline_data *dd = q->elevator->elevator_data;
>   	struct blk_mq_tags *tags = hctx->sched_tags;
>   
> -	dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
> +	dd->async_depth = q->nr_requests;
>   
> -	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
> +	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);

If sbq->min_shallow_depth is set to 1, sbq->wake_batch will also be set
to 1. I guess this may result in batch wakeup not working as expected.

>   }
>   
>   /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
>
Bart Van Assche May 16, 2024, 9:27 p.m. UTC | #2
On 5/16/24 02:14, YangYang wrote:
>> @@ -513,9 +527,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx 
>> *hctx)
>>       struct deadline_data *dd = q->elevator->elevator_data;
>>       struct blk_mq_tags *tags = hctx->sched_tags;
>> -    dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
>> +    dd->async_depth = q->nr_requests;
>> -    sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 
>> dd->async_depth);
>> +    sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
> 
> If sbq->min_shallow_depth is set to 1, sbq->wake_batch will also be set
> to 1. I guess this may result in batch wakeup not working as expected.

The value of the sbq->min_shallow_depth parameter may affect performance
but does not affect correctness. See also the comment above the
sbitmap_queue_min_shallow_depth() declaration. Is this sufficient to
address your concern?

Thanks,

Bart.
Zhiguo Niu June 4, 2024, 9:08 a.m. UTC | #3
Hi Bart Van Assche and Jens Axboe

Sorry to disturb you.
Would you have a plan When will these patch sets be merged into the mainline? 
Thanks!

-----邮件原件-----
发件人: Bart Van Assche <bvanassche@acm.org> 
发送时间: 2024年5月17日 5:28
收件人: YangYang <yang.yang@vivo.com>
抄送: linux-block@vger.kernel.org; Christoph Hellwig <hch@lst.de>; Damien Le Moal <dlemoal@kernel.org>; 牛志国 (Zhiguo Niu) <Zhiguo.Niu@unisoc.com>; Jens Axboe <axboe@kernel.dk>
主题: Re: [PATCH v2 2/2] block/mq-deadline: Fix the tag reservation code


注意: 这封邮件来自于外部。除非你确定邮件内容安全,否则不要点击任何链接和附件。
CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.



On 5/16/24 02:14, YangYang wrote:
>> @@ -513,9 +527,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx
>> *hctx)
>>       struct deadline_data *dd = q->elevator->elevator_data;
>>       struct blk_mq_tags *tags = hctx->sched_tags;
>> -    dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
>> +    dd->async_depth = q->nr_requests;
>> -    sbitmap_queue_min_shallow_depth(&tags->bitmap_tags,
>> dd->async_depth);
>> +    sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
>
> If sbq->min_shallow_depth is set to 1, sbq->wake_batch will also be 
> set to 1. I guess this may result in batch wakeup not working as expected.

The value of the sbq->min_shallow_depth parameter may affect performance but does not affect correctness. See also the comment above the
sbitmap_queue_min_shallow_depth() declaration. Is this sufficient to address your concern?

Thanks,

Bart.
Bart Van Assche June 4, 2024, 11:48 a.m. UTC | #4
On 6/4/24 02:08, 牛志国 (Zhiguo Niu) wrote:
> Would you have a plan When will these patch sets be merged into the mainline?

These patches still apply without any changes to Jens' block/for-next
branch. I think the next step is that someone helps by posting
Reviewed-by or Tested-by tags for these patches.

Thanks,

Bart.
Zhiguo Niu July 1, 2024, 1:30 a.m. UTC | #5
Hi block developers,

Can you help review this serials patch from Bart Van Assche? 
https://lore.kernel.org/all/20240509170149.7639-1-bvanassche@acm.org/
[PATCH v2 1/2] block: Call .limit_depth() after .hctx has been set
[PATCH v2 2/2] block/mq-deadline: Fix the tag reservation code

These patch will fix the issue "there may warning happen if we set dd async_depth from user", 
For more information about warnings, please refer to commit msg:
https://lore.kernel.org/all/CAHJ8P3KEOC_DXQmZK3u7PHgZFmWpMVzPa6pgkOgpyoH7wgT5nw@mail.gmail.com/

If you need any tests, you can ask me. I can help if my experimental environment can be implemented.
Thanks!
-----邮件原件-----
发件人: Bart Van Assche <bvanassche@acm.org> 
发送时间: 2024年6月4日 19:49
收件人: 牛志国 (Zhiguo Niu) <Zhiguo.Niu@unisoc.com>; Jens Axboe <axboe@kernel.dk>
抄送: linux-block@vger.kernel.org; Christoph Hellwig <hch@lst.de>; Damien Le Moal <dlemoal@kernel.org>; 王皓 (Hao_hao Wang) <Hao_hao.Wang@unisoc.com>
主题: Re: 答复: [PATCH v2 2/2] block/mq-deadline: Fix the tag reservation code


注意: 这封邮件来自于外部。除非你确定邮件内容安全,否则不要点击任何链接和附件。
CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.



On 6/4/24 02:08, 牛志国 (Zhiguo Niu) wrote:
> Would you have a plan When will these patch sets be merged into the mainline?

These patches still apply without any changes to Jens' block/for-next branch. I think the next step is that someone helps by posting Reviewed-by or Tested-by tags for these patches.

Thanks,

Bart.
Bart Van Assche July 1, 2024, 11:21 p.m. UTC | #6
On 6/30/24 6:30 PM, 牛志国 (Zhiguo Niu) wrote:
> Can you help review this serials patch from Bart Van Assche?
> https://lore.kernel.org/all/20240509170149.7639-1-bvanassche@acm.org/
> [PATCH v2 1/2] block: Call .limit_depth() after .hctx has been set
> [PATCH v2 2/2] block/mq-deadline: Fix the tag reservation code
> 
> These patch will fix the issue "there may warning happen if we set dd async_depth from user",
> For more information about warnings, please refer to commit msg:
> https://lore.kernel.org/all/CAHJ8P3KEOC_DXQmZK3u7PHgZFmWpMVzPa6pgkOgpyoH7wgT5nw@mail.gmail.com/

Hi Zhiguo,

If these patches pass your tests then you can reply to these patches 
with your own Tested-by.

Thanks,

Bart.
Christoph Hellwig July 2, 2024, 12:09 p.m. UTC | #7
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 94eede4fb9eb..acdc28756d9d 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -487,6 +487,20 @@  static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	return rq;
 }
 
+/*
+ * 'depth' is a number in the range 1..INT_MAX representing a number of
+ * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
+ * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
+ * Values larger than q->nr_requests have the same effect as q->nr_requests.
+ */
+static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
+{
+	struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
+	const unsigned int nrr = hctx->queue->nr_requests;
+
+	return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
+}
+
 /*
  * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
  * function is used by __blk_mq_get_tag().
@@ -503,7 +517,7 @@  static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
 	 * Throttle asynchronous requests and writes such that these requests
 	 * do not block the allocation of synchronous requests.
 	 */
-	data->shallow_depth = dd->async_depth;
+	data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
 }
 
 /* Called by blk_mq_update_nr_requests(). */
@@ -513,9 +527,9 @@  static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct blk_mq_tags *tags = hctx->sched_tags;
 
-	dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
+	dd->async_depth = q->nr_requests;
 
-	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
+	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
 }
 
 /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */