diff mbox series

[2/2] blk-mq: always call into the scheduler in blk_mq_make_request()

Message ID 20190919094547.67194-3-hare@suse.de (mailing list archive)
State New, archived
Headers show
Series blk-mq I/O scheduling fixes | expand

Commit Message

Hannes Reinecke Sept. 19, 2019, 9:45 a.m. UTC
From: Hannes Reinecke <hare@suse.com>

A scheduler might be attached even for devices exposing more than
one hardware queue, so the check for the number of hardware queue
is pointless and should be removed.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-mq.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

Comments

Damien Le Moal Sept. 19, 2019, 10:21 a.m. UTC | #1
On 2019/09/19 11:45, Hannes Reinecke wrote:
> From: Hannes Reinecke <hare@suse.com>
> 
> A scheduler might be attached even for devices exposing more than
> one hardware queue, so the check for the number of hardware queue
> is pointless and should be removed.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.com>
> ---
>  block/blk-mq.c | 6 +-----
>  1 file changed, 1 insertion(+), 5 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 44ff3c1442a4..faab542e4836 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1931,7 +1931,6 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
>  
>  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  {
> -	const int is_sync = op_is_sync(bio->bi_opf);
>  	const int is_flush_fua = op_is_flush(bio->bi_opf);
>  	struct blk_mq_alloc_data data = { .flags = 0};
>  	struct request *rq;
> @@ -1977,7 +1976,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  		/* bypass scheduler for flush rq */
>  		blk_insert_flush(rq);
>  		blk_mq_run_hw_queue(data.hctx, true);
> -	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
> +	} else if (plug && q->mq_ops->commit_rqs) {
>  		/*
>  		 * Use plugging if we have a ->commit_rqs() hook as well, as
>  		 * we know the driver uses bd->last in a smart fashion.
> @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
>  					&cookie);
>  		}
> -	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
> -			!data.hctx->dispatch_busy)) {
> -		blk_mq_try_issue_directly(data.hctx, rq, &cookie);

It may be worth mentioning that blk_mq_sched_insert_request() will do a direct
insert of the request using __blk_mq_insert_request(). But that insert is
slightly different from what blk_mq_try_issue_directly() does with
__blk_mq_issue_directly() as the request in that case is passed along to the
device using queue->mq_ops->queue_rq() while __blk_mq_insert_request() will put
the request in ctx->rq_lists[type].

This removes the optimized case !q->elevator && !data.hctx->dispatch_busy, but I
am not sure of the actual performance impact yet. We may want to patch
blk_mq_sched_insert_request() to handle that case.

>  	} else {
>  		blk_mq_sched_insert_request(rq, false, true, true);
>  	}
>
Ming Lei Sept. 19, 2019, 2:23 p.m. UTC | #2
On Thu, Sep 19, 2019 at 10:21:54AM +0000, Damien Le Moal wrote:
> On 2019/09/19 11:45, Hannes Reinecke wrote:
> > From: Hannes Reinecke <hare@suse.com>
> > 
> > A scheduler might be attached even for devices exposing more than
> > one hardware queue, so the check for the number of hardware queue
> > is pointless and should be removed.
> > 
> > Signed-off-by: Hannes Reinecke <hare@suse.com>
> > ---
> >  block/blk-mq.c | 6 +-----
> >  1 file changed, 1 insertion(+), 5 deletions(-)
> > 
> > diff --git a/block/blk-mq.c b/block/blk-mq.c
> > index 44ff3c1442a4..faab542e4836 100644
> > --- a/block/blk-mq.c
> > +++ b/block/blk-mq.c
> > @@ -1931,7 +1931,6 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
> >  
> >  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
> >  {
> > -	const int is_sync = op_is_sync(bio->bi_opf);
> >  	const int is_flush_fua = op_is_flush(bio->bi_opf);
> >  	struct blk_mq_alloc_data data = { .flags = 0};
> >  	struct request *rq;
> > @@ -1977,7 +1976,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
> >  		/* bypass scheduler for flush rq */
> >  		blk_insert_flush(rq);
> >  		blk_mq_run_hw_queue(data.hctx, true);
> > -	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
> > +	} else if (plug && q->mq_ops->commit_rqs) {
> >  		/*
> >  		 * Use plugging if we have a ->commit_rqs() hook as well, as
> >  		 * we know the driver uses bd->last in a smart fashion.
> > @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
> >  			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
> >  					&cookie);
> >  		}
> > -	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
> > -			!data.hctx->dispatch_busy)) {
> > -		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
> 
> It may be worth mentioning that blk_mq_sched_insert_request() will do a direct
> insert of the request using __blk_mq_insert_request(). But that insert is
> slightly different from what blk_mq_try_issue_directly() does with
> __blk_mq_issue_directly() as the request in that case is passed along to the
> device using queue->mq_ops->queue_rq() while __blk_mq_insert_request() will put
> the request in ctx->rq_lists[type].
> 
> This removes the optimized case !q->elevator && !data.hctx->dispatch_busy, but I
> am not sure of the actual performance impact yet. We may want to patch
> blk_mq_sched_insert_request() to handle that case.

The optimization did improve IOPS of single queue SCSI SSD a lot, see 

commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8
Author: Ming Lei <ming.lei@redhat.com>
Date:   Tue Jul 10 09:03:31 2018 +0800

    blk-mq: issue directly if hw queue isn't busy in case of 'none'

    In case of 'none' io scheduler, when hw queue isn't busy, it isn't
    necessary to enqueue request to sw queue and dequeue it from
    sw queue because request may be submitted to hw queue asap without
    extra cost, meantime there shouldn't be much request in sw queue,
    and we don't need to worry about effect on IO merge.

    There are still some single hw queue SCSI HBAs(HPSA, megaraid_sas, ...)
    which may connect high performance devices, so 'none' is often required
    for obtaining good performance.

    This patch improves IOPS and decreases CPU unilization on megaraid_sas,
    per Kashyap's test.


Thanks,
Ming
Kashyap Desai Sept. 19, 2019, 3:48 p.m. UTC | #3
> > > -	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops-
> >commit_rqs)) {
> > > +	} else if (plug && q->mq_ops->commit_rqs) {
> > >  		/*
> > >  		 * Use plugging if we have a ->commit_rqs() hook as well,
as
> > >  		 * we know the driver uses bd->last in a smart fashion.
> > > @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct
> request_queue *q, struct bio *bio)
> > >  			blk_mq_try_issue_directly(data.hctx,
same_queue_rq,
> > >  					&cookie);
> > >  		}
> > > -	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
> > > -			!data.hctx->dispatch_busy)) {
> > > -		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
Hannes -

Earlier check prior to "commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8"
was only (q->nr_hw_queues > 1 && is_sync).
I am not sure if check of nr_hw_queues are required or not at this place,
but other part of check (!q->elevator && !data.hctx->dispatch_busy) to
qualify for direct dispatch is required for higher performance.

Recent MegaRaid and MPT HBA Aero series controller is capable of doing
~3.0 M IOPs and for such high performance using single hardware queue,
 commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 is very important.

Kashyap


> >
> > It may be worth mentioning that blk_mq_sched_insert_request() will do
> > a direct insert of the request using __blk_mq_insert_request(). But
> > that insert is slightly different from what
> > blk_mq_try_issue_directly() does with
> > __blk_mq_issue_directly() as the request in that case is passed along
> > to the device using queue->mq_ops->queue_rq() while
> > __blk_mq_insert_request() will put the request in ctx->rq_lists[type].
> >
> > This removes the optimized case !q->elevator &&
> > !data.hctx->dispatch_busy, but I am not sure of the actual performance
> > impact yet. We may want to patch
> > blk_mq_sched_insert_request() to handle that case.
>
> The optimization did improve IOPS of single queue SCSI SSD a lot, see
>
> commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8
> Author: Ming Lei <ming.lei@redhat.com>
> Date:   Tue Jul 10 09:03:31 2018 +0800
>
>     blk-mq: issue directly if hw queue isn't busy in case of 'none'
>
>     In case of 'none' io scheduler, when hw queue isn't busy, it isn't
>     necessary to enqueue request to sw queue and dequeue it from
>     sw queue because request may be submitted to hw queue asap without
>     extra cost, meantime there shouldn't be much request in sw queue,
>     and we don't need to worry about effect on IO merge.
>
>     There are still some single hw queue SCSI HBAs(HPSA, megaraid_sas,
...)
>     which may connect high performance devices, so 'none' is often
required
>     for obtaining good performance.
>
>     This patch improves IOPS and decreases CPU unilization on
megaraid_sas,
>     per Kashyap's test.
>
>
> Thanks,
> Ming
Damien Le Moal Sept. 19, 2019, 4:13 p.m. UTC | #4
On 2019/09/19 17:48, Kashyap Desai wrote:
>>>> -	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops-
>>> commit_rqs)) {
>>>> +	} else if (plug && q->mq_ops->commit_rqs) {
>>>>  		/*
>>>>  		 * Use plugging if we have a ->commit_rqs() hook as well,
> as
>>>>  		 * we know the driver uses bd->last in a smart fashion.
>>>> @@ -2020,9 +2019,6 @@ static blk_qc_t blk_mq_make_request(struct
>> request_queue *q, struct bio *bio)
>>>>  			blk_mq_try_issue_directly(data.hctx,
> same_queue_rq,
>>>>  					&cookie);
>>>>  		}
>>>> -	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
>>>> -			!data.hctx->dispatch_busy)) {
>>>> -		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
> Hannes -
> 
> Earlier check prior to "commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8"
> was only (q->nr_hw_queues > 1 && is_sync).
> I am not sure if check of nr_hw_queues are required or not at this place,
> but other part of check (!q->elevator && !data.hctx->dispatch_busy) to
> qualify for direct dispatch is required for higher performance.
> 
> Recent MegaRaid and MPT HBA Aero series controller is capable of doing
> ~3.0 M IOPs and for such high performance using single hardware queue,
>  commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8 is very important.

Kashyap, Ming,

Thanks for the information. We will restore this case.

> 
> Kashyap
> 
> 
>>>
>>> It may be worth mentioning that blk_mq_sched_insert_request() will do
>>> a direct insert of the request using __blk_mq_insert_request(). But
>>> that insert is slightly different from what
>>> blk_mq_try_issue_directly() does with
>>> __blk_mq_issue_directly() as the request in that case is passed along
>>> to the device using queue->mq_ops->queue_rq() while
>>> __blk_mq_insert_request() will put the request in ctx->rq_lists[type].
>>>
>>> This removes the optimized case !q->elevator &&
>>> !data.hctx->dispatch_busy, but I am not sure of the actual performance
>>> impact yet. We may want to patch
>>> blk_mq_sched_insert_request() to handle that case.
>>
>> The optimization did improve IOPS of single queue SCSI SSD a lot, see
>>
>> commit 6ce3dd6eec114930cf2035a8bcb1e80477ed79a8
>> Author: Ming Lei <ming.lei@redhat.com>
>> Date:   Tue Jul 10 09:03:31 2018 +0800
>>
>>     blk-mq: issue directly if hw queue isn't busy in case of 'none'
>>
>>     In case of 'none' io scheduler, when hw queue isn't busy, it isn't
>>     necessary to enqueue request to sw queue and dequeue it from
>>     sw queue because request may be submitted to hw queue asap without
>>     extra cost, meantime there shouldn't be much request in sw queue,
>>     and we don't need to worry about effect on IO merge.
>>
>>     There are still some single hw queue SCSI HBAs(HPSA, megaraid_sas,
> ...)
>>     which may connect high performance devices, so 'none' is often
> required
>>     for obtaining good performance.
>>
>>     This patch improves IOPS and decreases CPU unilization on
> megaraid_sas,
>>     per Kashyap's test.
>>
>>
>> Thanks,
>> Ming
>
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 44ff3c1442a4..faab542e4836 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1931,7 +1931,6 @@  static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
-	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct blk_mq_alloc_data data = { .flags = 0};
 	struct request *rq;
@@ -1977,7 +1976,7 @@  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		/* bypass scheduler for flush rq */
 		blk_insert_flush(rq);
 		blk_mq_run_hw_queue(data.hctx, true);
-	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
+	} else if (plug && q->mq_ops->commit_rqs) {
 		/*
 		 * Use plugging if we have a ->commit_rqs() hook as well, as
 		 * we know the driver uses bd->last in a smart fashion.
@@ -2020,9 +2019,6 @@  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
 					&cookie);
 		}
-	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
-			!data.hctx->dispatch_busy)) {
-		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
 	} else {
 		blk_mq_sched_insert_request(rq, false, true, true);
 	}