diff mbox series

[RFC] add io_uring support in scsi layer

Message ID CAHsXFKFy+ZVvaCr=H224VGA755k45fAJhz5TaMz+tOP6hNpj1g@mail.gmail.com (mailing list archive)
State Changes Requested
Headers show
Series [RFC] add io_uring support in scsi layer | expand

Commit Message

Kashyap Desai Sept. 3, 2020, 4:14 p.m. UTC
Currently io_uring support is only available in the block layer.
This RFC is to extend support of mq_poll in the scsi layer.

megaraid_sas and mpt3sas driver will be immediate users of this interface.
Both the drivers can use mq_poll only if it has exposed more than one
nr_hw_queues.
Waiting for below changes to enable shared host tag support.

https://patchwork.kernel.org/cover/11724429/

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
---
 drivers/scsi/scsi_lib.c  | 16 ++++++++++++++++
 include/scsi/scsi_cmnd.h |  1 +
 include/scsi/scsi_host.h |  3 +++
 3 files changed, 20 insertions(+)

Comments

Jens Axboe Sept. 3, 2020, 4:20 p.m. UTC | #1
On 9/3/20 10:14 AM, Kashyap Desai wrote:
> Currently io_uring support is only available in the block layer.
> This RFC is to extend support of mq_poll in the scsi layer.

I think this needs to clarify that io_uring with IOPOLL is not currently
supported, outside of that everything else should work and no extra
support in the driver is needed.

The summary and title makes it sound like it currently doesn't work at
all, which obviously isn't true!

> megaraid_sas and mpt3sas driver will be immediate users of this interface.
> Both the drivers can use mq_poll only if it has exposed more than one
> nr_hw_queues.
> Waiting for below changes to enable shared host tag support.

Just a quick question, do these low level drivers support non-irq mode
for requests? That's a requirement for IOPOLL support to work well, and
I don't think it'd be worthwhile to plumb anything up that _doesn't_
support pure non-IRQ mode. That's identical to the NVMe support, we will
not allow IOPOLL if you don't have explicit non-IRQ support.

Outside of that, no comments on this enablement patch, looks pretty
straight forward and fine to me.
Douglas Gilbert Sept. 3, 2020, 4:42 p.m. UTC | #2
On 2020-09-03 12:14 p.m., Kashyap Desai wrote:
> Currently io_uring support is only available in the block layer.
> This RFC is to extend support of mq_poll in the scsi layer.
> 
> megaraid_sas and mpt3sas driver will be immediate users of this interface.
> Both the drivers can use mq_poll only if it has exposed more than one
> nr_hw_queues.

Perhaps you could add some comments in scsi_host.h about what the
    int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num);

callback should do. And you could implement it in the scsi_debug
driver which should have all the other hooks as it is part 15 of
the shared host tag support patchset.

> Waiting for below changes to enable shared host tag support.
> 
> https://patchwork.kernel.org/cover/11724429/

I'm testing this at the moment. So far so good.

Doug Gilbert

> 
> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
> ---
>   drivers/scsi/scsi_lib.c  | 16 ++++++++++++++++
>   include/scsi/scsi_cmnd.h |  1 +
>   include/scsi/scsi_host.h |  3 +++
>   3 files changed, 20 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 780cf084e366..7a3c59d2dfcc 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1802,6 +1802,19 @@ static void scsi_mq_exit_request(struct
> blk_mq_tag_set *set, struct request *rq,
>                                 cmd->sense_buffer);
>   }
> 
> +
> +static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx)
> +{
> +       struct request_queue *q = hctx->queue;
> +       struct scsi_device *sdev = q->queuedata;
> +       struct Scsi_Host *shost = sdev->host;
> +
> +       if (shost->hostt->mq_poll)
> +               return shost->hostt->mq_poll(shost, hctx->queue_num);
> +
> +       return 0;
> +}
> +
>   static int scsi_map_queues(struct blk_mq_tag_set *set)
>   {
>          struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set);
> @@ -1869,6 +1882,7 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = {
>          .cleanup_rq     = scsi_cleanup_rq,
>          .busy           = scsi_mq_lld_busy,
>          .map_queues     = scsi_map_queues,
> +       .poll           = scsi_mq_poll,
>   };
> 
> 
> @@ -1897,6 +1911,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
>          .cleanup_rq     = scsi_cleanup_rq,
>          .busy           = scsi_mq_lld_busy,
>          .map_queues     = scsi_map_queues,
> +       .poll           = scsi_mq_poll,
>   };
> 
>   struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
> @@ -1929,6 +1944,7 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
>          else
>                  tag_set->ops = &scsi_mq_ops_no_commit;
>          tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1;
> +       tag_set->nr_maps = shost->nr_maps ? : 1;
>          tag_set->queue_depth = shost->can_queue;
>          tag_set->cmd_size = cmd_size;
>          tag_set->numa_node = NUMA_NO_NODE;
> diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
> index e76bac4d14c5..5844374a85b1 100644
> --- a/include/scsi/scsi_cmnd.h
> +++ b/include/scsi/scsi_cmnd.h
> @@ -9,6 +9,7 @@
>   #include <linux/types.h>
>   #include <linux/timer.h>
>   #include <linux/scatterlist.h>
> +#include <scsi/scsi_host.h>
>   #include <scsi/scsi_device.h>
>   #include <scsi/scsi_request.h>
> 
> diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
> index 701f178b20ae..d34602c68d0b 100644
> --- a/include/scsi/scsi_host.h
> +++ b/include/scsi/scsi_host.h
> @@ -269,6 +269,8 @@ struct scsi_host_template {
>           * Status: OPTIONAL
>           */
>          int (* map_queues)(struct Scsi_Host *shost);
> +
> +       int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num);
> 
>          /*
>           * Check if scatterlists need to be padded for DMA draining.
> @@ -610,6 +612,7 @@ struct Scsi_Host {
>           * the total queue depth is can_queue.
>           */
>          unsigned nr_hw_queues;
> +       unsigned nr_maps;
>          unsigned active_mode:2;
>          unsigned unchecked_isa_dma:1;
>
Kashyap Desai Sept. 3, 2020, 4:44 p.m. UTC | #3
> On 9/3/20 10:14 AM, Kashyap Desai wrote:
> > Currently io_uring support is only available in the block layer.
> > This RFC is to extend support of mq_poll in the scsi layer.
>
> I think this needs to clarify that io_uring with IOPOLL is not currently
> supported, outside of that everything else should work and no extra
> support
> in the driver is needed.
>
> The summary and title makes it sound like it currently doesn't work at
> all,
> which obviously isn't true!

I actually mean io_uring with IOPOLL support. I will fix this.

>
> > megaraid_sas and mpt3sas driver will be immediate users of this
> > interface.
> > Both the drivers can use mq_poll only if it has exposed more than one
> > nr_hw_queues.
> > Waiting for below changes to enable shared host tag support.
>
> Just a quick question, do these low level drivers support non-irq mode for
> requests? That's a requirement for IOPOLL support to work well, and I
> don't
> think it'd be worthwhile to plumb anything up that _doesn't_ support pure
> non-IRQ mode. That's identical to the NVMe support, we will not allow
> IOPOLL if you don't have explicit non-IRQ support.

I guess you mean non-IRQ mode = There will not be any msix vector associated
for poll_queues and h/w can still work in this mode.
If above is correct, yes both the controller can support non-IRQ mode, but
we need support of shared host tag as well for completeness of this feature
in driver.
Both the h/w are single submission queue and multiple reply queue, but using
shared host tagset support we will enable simulated multiple hw queue.

I have megaraid_sas driver patch available and it does identical to what
NVME driver does.  Driver allocates some extra  reply queues and it will be
marked as poll_queue.
This poll_queue will not have associated msix vectors. All the IO completion
on this queue will be done from IOPOLL interface.

I tested megaraid_sas driver having 8 poll_queues and using io_uring
hiprio=1 settings. It can reach 3.2M IOPs and there is *zero* interrupt.
This is what NVME does so I assume this is what you wanted to confirm here.

>
> Outside of that, no comments on this enablement patch, looks pretty
> straight
> forward and fine to me.

Thanks for review.

>
> --
> Jens Axboe
Jens Axboe Sept. 3, 2020, 4:48 p.m. UTC | #4
On 9/3/20 10:44 AM, Kashyap Desai wrote:
>> On 9/3/20 10:14 AM, Kashyap Desai wrote:
>>> Currently io_uring support is only available in the block layer.
>>> This RFC is to extend support of mq_poll in the scsi layer.
>>
>> I think this needs to clarify that io_uring with IOPOLL is not currently
>> supported, outside of that everything else should work and no extra
>> support
>> in the driver is needed.
>>
>> The summary and title makes it sound like it currently doesn't work at
>> all,
>> which obviously isn't true!
> 
> I actually mean io_uring with IOPOLL support. I will fix this.

Great, thanks.

>>> megaraid_sas and mpt3sas driver will be immediate users of this
>>> interface.
>>> Both the drivers can use mq_poll only if it has exposed more than one
>>> nr_hw_queues.
>>> Waiting for below changes to enable shared host tag support.
>>
>> Just a quick question, do these low level drivers support non-irq mode for
>> requests? That's a requirement for IOPOLL support to work well, and I
>> don't
>> think it'd be worthwhile to plumb anything up that _doesn't_ support pure
>> non-IRQ mode. That's identical to the NVMe support, we will not allow
>> IOPOLL if you don't have explicit non-IRQ support.
> 
> I guess you mean non-IRQ mode = There will not be any msix vector associated
> for poll_queues and h/w can still work in this mode.
> If above is correct, yes both the controller can support non-IRQ mode, but
> we need support of shared host tag as well for completeness of this feature
> in driver.
> Both the h/w are single submission queue and multiple reply queue, but using
> shared host tagset support we will enable simulated multiple hw queue.
> 
> I have megaraid_sas driver patch available and it does identical to what
> NVME driver does.  Driver allocates some extra  reply queues and it will be
> marked as poll_queue.
> This poll_queue will not have associated msix vectors. All the IO completion
> on this queue will be done from IOPOLL interface.
> 
> I tested megaraid_sas driver having 8 poll_queues and using io_uring
> hiprio=1 settings. It can reach 3.2M IOPs and there is *zero* interrupt.
> This is what NVME does so I assume this is what you wanted to confirm here.

This is exactly what I meant, just wanted to ensure it wasn't the poor mans
edition that we've seen before, we're IRQs are still triggered and you end
up getting completions from both manual finding via the poll hook, and from
the "normal" side. Because that kind of sucks.

What you have sounds exactly how it should be, both in terms of functionality
and implementation. Thanks for confirming!

Guess I need to take another look at the shared host tag set support, see if
we an get that in for 5.10.
Kashyap Desai Sept. 3, 2020, 4:51 p.m. UTC | #5
> > megaraid_sas and mpt3sas driver will be immediate users of this
> > interface.
> > Both the drivers can use mq_poll only if it has exposed more than one
> > nr_hw_queues.
>
> Perhaps you could add some comments in scsi_host.h about what the
>     int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num);
>
> callback should do. And you could implement it in the scsi_debug driver
> which
> should have all the other hooks as it is part 15 of the shared host tag
> support
> patchset.

I will add supporting comment in  scsi_host.h.  I will also add scsi_debug
support and include it in my next patchset.

Kashyap

>
> > Waiting for below changes to enable shared host tag support.
> >
> > https://patchwork.kernel.org/cover/11724429/
>
> I'm testing this at the moment. So far so good.
>
> Doug Gilbert
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 780cf084e366..7a3c59d2dfcc 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1802,6 +1802,19 @@  static void scsi_mq_exit_request(struct
blk_mq_tag_set *set, struct request *rq,
                               cmd->sense_buffer);
 }

+
+static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx)
+{
+       struct request_queue *q = hctx->queue;
+       struct scsi_device *sdev = q->queuedata;
+       struct Scsi_Host *shost = sdev->host;
+
+       if (shost->hostt->mq_poll)
+               return shost->hostt->mq_poll(shost, hctx->queue_num);
+
+       return 0;
+}
+
 static int scsi_map_queues(struct blk_mq_tag_set *set)
 {
        struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set);
@@ -1869,6 +1882,7 @@  static const struct blk_mq_ops scsi_mq_ops_no_commit = {
        .cleanup_rq     = scsi_cleanup_rq,
        .busy           = scsi_mq_lld_busy,
        .map_queues     = scsi_map_queues,
+       .poll           = scsi_mq_poll,
 };


@@ -1897,6 +1911,7 @@  static const struct blk_mq_ops scsi_mq_ops = {
        .cleanup_rq     = scsi_cleanup_rq,
        .busy           = scsi_mq_lld_busy,
        .map_queues     = scsi_map_queues,
+       .poll           = scsi_mq_poll,
 };

 struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
@@ -1929,6 +1944,7 @@  int scsi_mq_setup_tags(struct Scsi_Host *shost)
        else
                tag_set->ops = &scsi_mq_ops_no_commit;
        tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1;
+       tag_set->nr_maps = shost->nr_maps ? : 1;
        tag_set->queue_depth = shost->can_queue;
        tag_set->cmd_size = cmd_size;
        tag_set->numa_node = NUMA_NO_NODE;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index e76bac4d14c5..5844374a85b1 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -9,6 +9,7 @@ 
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
+#include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_request.h>

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 701f178b20ae..d34602c68d0b 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -269,6 +269,8 @@  struct scsi_host_template {
         * Status: OPTIONAL
         */
        int (* map_queues)(struct Scsi_Host *shost);
+
+       int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num);

        /*
         * Check if scatterlists need to be padded for DMA draining.
@@ -610,6 +612,7 @@  struct Scsi_Host {
         * the total queue depth is can_queue.
         */
        unsigned nr_hw_queues;
+       unsigned nr_maps;
        unsigned active_mode:2;
        unsigned unchecked_isa_dma:1;