diff mbox series

[v2,5/5] block/null_blk: add queue_rqs() support

Message ID 20230913151616.3164338-6-chengming.zhou@linux.dev (mailing list archive)
State New, archived
Headers show
Series blk-mq: optimize queue_rqs() support | expand

Commit Message

Chengming Zhou Sept. 13, 2023, 3:16 p.m. UTC
From: Chengming Zhou <zhouchengming@bytedance.com>

Add batched mq_ops.queue_rqs() support in null_blk for testing. The
implementation is much easy since null_blk doesn't have commit_rqs().

We simply handle each request one by one, if errors are encountered,
leave them in the passed in list and return back.

There is about 3.6% improvement in IOPS of fio/t/io_uring on null_blk
with hw_queue_depth=256 on my test VM, from 1.09M to 1.13M.

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 drivers/block/null_blk/main.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Comments

Ming Lei Sept. 22, 2023, 8:54 a.m. UTC | #1
On Wed, Sep 13, 2023 at 03:16:16PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
> 
> Add batched mq_ops.queue_rqs() support in null_blk for testing. The
> implementation is much easy since null_blk doesn't have commit_rqs().
> 
> We simply handle each request one by one, if errors are encountered,
> leave them in the passed in list and return back.
> 
> There is about 3.6% improvement in IOPS of fio/t/io_uring on null_blk
> with hw_queue_depth=256 on my test VM, from 1.09M to 1.13M.

I guess you pass 'shared_tags' to null_blk for the verification?

> 
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  drivers/block/null_blk/main.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> index 968090935eb2..79d6cd3c3d41 100644
> --- a/drivers/block/null_blk/main.c
> +++ b/drivers/block/null_blk/main.c
> @@ -1750,6 +1750,25 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
>  }
>  
> +static void null_queue_rqs(struct request **rqlist)
> +{
> +	struct request *requeue_list = NULL;
> +	struct request **requeue_lastp = &requeue_list;
> +	struct blk_mq_queue_data bd = { };
> +	blk_status_t ret;
> +
> +	do {
> +		struct request *rq = rq_list_pop(rqlist);
> +
> +		bd.rq = rq;
> +		ret = null_queue_rq(rq->mq_hctx, &bd);
> +		if (ret != BLK_STS_OK)
> +			rq_list_add_tail(&requeue_lastp, rq);
> +	} while (!rq_list_empty(*rqlist));
> +
> +	*rqlist = requeue_list;
> +}
> +

null_blk may not be one perfect example for showing queue_rqs()
which is usually for handling request in batch, but for test or
demo purpose, it is fine:

Reviewed-by: Ming Lei <ming.lei@redhat.com>


Thanks
Ming
Chengming Zhou Sept. 23, 2023, 7:01 a.m. UTC | #2
On 2023/9/22 16:54, Ming Lei wrote:
> On Wed, Sep 13, 2023 at 03:16:16PM +0000, chengming.zhou@linux.dev wrote:
>> From: Chengming Zhou <zhouchengming@bytedance.com>
>>
>> Add batched mq_ops.queue_rqs() support in null_blk for testing. The
>> implementation is much easy since null_blk doesn't have commit_rqs().
>>
>> We simply handle each request one by one, if errors are encountered,
>> leave them in the passed in list and return back.
>>
>> There is about 3.6% improvement in IOPS of fio/t/io_uring on null_blk
>> with hw_queue_depth=256 on my test VM, from 1.09M to 1.13M.
> 
> I guess you pass 'shared_tags' to null_blk for the verification?
IIRC it should be "modprobe null_blk hw_queue_depth=256 nr_devices=2 shared_tags=1".

> 
>>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>> ---
>>  drivers/block/null_blk/main.c | 20 ++++++++++++++++++++
>>  1 file changed, 20 insertions(+)
>>
>> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
>> index 968090935eb2..79d6cd3c3d41 100644
>> --- a/drivers/block/null_blk/main.c
>> +++ b/drivers/block/null_blk/main.c
>> @@ -1750,6 +1750,25 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
>>  	return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
>>  }
>>  
>> +static void null_queue_rqs(struct request **rqlist)
>> +{
>> +	struct request *requeue_list = NULL;
>> +	struct request **requeue_lastp = &requeue_list;
>> +	struct blk_mq_queue_data bd = { };
>> +	blk_status_t ret;
>> +
>> +	do {
>> +		struct request *rq = rq_list_pop(rqlist);
>> +
>> +		bd.rq = rq;
>> +		ret = null_queue_rq(rq->mq_hctx, &bd);
>> +		if (ret != BLK_STS_OK)
>> +			rq_list_add_tail(&requeue_lastp, rq);
>> +	} while (!rq_list_empty(*rqlist));
>> +
>> +	*rqlist = requeue_list;
>> +}
>> +
> 
> null_blk may not be one perfect example for showing queue_rqs()
> which is usually for handling request in batch, but for test or
> demo purpose, it is fine:
> 
> Reviewed-by: Ming Lei <ming.lei@redhat.com>
> 

Yes, some other "real" drivers should be better choice that we can
handle more things in batch to improve performance. Maybe ublk driver
can benefit from this too.

Thanks!
diff mbox series

Patch

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 968090935eb2..79d6cd3c3d41 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1750,6 +1750,25 @@  static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
 }
 
+static void null_queue_rqs(struct request **rqlist)
+{
+	struct request *requeue_list = NULL;
+	struct request **requeue_lastp = &requeue_list;
+	struct blk_mq_queue_data bd = { };
+	blk_status_t ret;
+
+	do {
+		struct request *rq = rq_list_pop(rqlist);
+
+		bd.rq = rq;
+		ret = null_queue_rq(rq->mq_hctx, &bd);
+		if (ret != BLK_STS_OK)
+			rq_list_add_tail(&requeue_lastp, rq);
+	} while (!rq_list_empty(*rqlist));
+
+	*rqlist = requeue_list;
+}
+
 static void cleanup_queue(struct nullb_queue *nq)
 {
 	bitmap_free(nq->tag_map);
@@ -1802,6 +1821,7 @@  static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
 
 static const struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
+	.queue_rqs	= null_queue_rqs,
 	.complete	= null_complete_rq,
 	.timeout	= null_timeout_rq,
 	.poll		= null_poll,