diff mbox series

[v1,3/3] scsi_debug: iouring iopoll support

Message ID 20201015133721.63476-1-kashyap.desai@broadcom.com (mailing list archive)
State Changes Requested
Headers show
Series io_uring iopoll in scsi layer | expand

Commit Message

Kashyap Desai Oct. 15, 2020, 1:37 p.m. UTC
Add support of iouring iopoll interface in scsi_debug.
This feature requires shared hosttag support in kernel and driver.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: dgilbert@interlog.com
Cc: linux-block@vger.kernel.org
---
 drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

Comments

Kashyap Desai Nov. 13, 2020, 10:50 a.m. UTC | #1
> -----Original Message-----
> From: Kashyap Desai [mailto:kashyap.desai@broadcom.com]
> Sent: Thursday, October 15, 2020 7:07 PM
> To: linux-scsi@vger.kernel.org
> Cc: Kashyap Desai <kashyap.desai@broadcom.com>; dgilbert@interlog.com;
> linux-block@vger.kernel.org
> Subject: [PATCH v1 3/3] scsi_debug: iouring iopoll support
>
> Add support of iouring iopoll interface in scsi_debug.
> This feature requires shared hosttag support in kernel and driver.
>
> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
> Cc: dgilbert@interlog.com
> Cc: linux-block@vger.kernel.org
> ---
>  drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 123 insertions(+)


Hi Doug - Any comment/feedback ?

Kashyap
Douglas Gilbert Nov. 15, 2020, 4:04 p.m. UTC | #2
On 2020-11-13 5:50 a.m., Kashyap Desai wrote:
>> -----Original Message-----
>> From: Kashyap Desai [mailto:kashyap.desai@broadcom.com]
>> Sent: Thursday, October 15, 2020 7:07 PM
>> To: linux-scsi@vger.kernel.org
>> Cc: Kashyap Desai <kashyap.desai@broadcom.com>; dgilbert@interlog.com;
>> linux-block@vger.kernel.org
>> Subject: [PATCH v1 3/3] scsi_debug: iouring iopoll support
>>
>> Add support of iouring iopoll interface in scsi_debug.
>> This feature requires shared hosttag support in kernel and driver.
>>
>> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
>> Cc: dgilbert@interlog.com
>> Cc: linux-block@vger.kernel.org
>> ---
>>   drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 123 insertions(+)
> 
> 
> Hi Doug - Any comment/feedback ?

Hi,
I'm testing this patch and have found an issue which I have taken up
directly with Kashyap. More to follow.

Doug Gilbert
Douglas Gilbert Nov. 24, 2020, 4:38 p.m. UTC | #3
On 2020-10-15 9:37 a.m., Kashyap Desai wrote:
> Add support of iouring iopoll interface in scsi_debug.
> This feature requires shared hosttag support in kernel and driver.

I am continuing to test this patch. There is one fix shown inline below
plus a question near the end.

Doug Gilbert

> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
> Cc: dgilbert@interlog.com
> Cc: linux-block@vger.kernel.org
> ---
>   drivers/scsi/scsi_debug.c | 123 ++++++++++++++++++++++++++++++++++++++
>   1 file changed, 123 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index a87e40aec11f..4d9cc6af588c 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -826,6 +826,7 @@ static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES;
>   static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;
>   
>   static int submit_queues = DEF_SUBMIT_QUEUES;  /* > 1 for multi-queue (mq) */
> +static int poll_queues; /* iouring iopoll interface.*/
>   static struct sdebug_queue *sdebug_q_arr;  /* ptr to array of submit queues */
>   
>   static DEFINE_RWLOCK(atomic_rw);
> @@ -5422,6 +5423,14 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>   	cmnd->host_scribble = (unsigned char *)sqcp;
>   	sd_dp = sqcp->sd_dp;
>   	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> +
> +	/* Do not complete IO from default completion path.
> +	 * Let it to be on queue.
> +	 * Completion should happen from mq_poll interface.
> +	 */
> +	if ((sqp - sdebug_q_arr) >= (submit_queues - poll_queues))
> +		return 0;
> +
>   	if (!sd_dp) {
>   		sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);
>   		if (!sd_dp) {
> @@ -5604,6 +5613,7 @@ module_param_named(sector_size, sdebug_sector_size, int, S_IRUGO);
>   module_param_named(statistics, sdebug_statistics, bool, S_IRUGO | S_IWUSR);
>   module_param_named(strict, sdebug_strict, bool, S_IRUGO | S_IWUSR);
>   module_param_named(submit_queues, submit_queues, int, S_IRUGO);
> +module_param_named(poll_queues, poll_queues, int, S_IRUGO);
>   module_param_named(tur_ms_to_ready, sdeb_tur_ms_to_ready, int, S_IRUGO);
>   module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO);
>   module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO);
> @@ -5673,6 +5683,7 @@ MODULE_PARM_DESC(sector_size, "logical block size in bytes (def=512)");
>   MODULE_PARM_DESC(statistics, "collect statistics on commands, queues (def=0)");
>   MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");
>   MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");
> +MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");
>   MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial good status (def=0)");
>   MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)");
>   MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)");
> @@ -7140,6 +7151,104 @@ static int resp_not_ready(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
>   	return check_condition_result;
>   }
>   
> +static int sdebug_map_queues(struct Scsi_Host *shost)
> +{
> +	int i, qoff;
> +
> +	if (shost->nr_hw_queues == 1)
> +		return 0;
> +
> +	for (i = 0, qoff = 0; i < HCTX_MAX_TYPES; i++) {
> +		struct blk_mq_queue_map *map = &shost->tag_set.map[i];
> +
> +		map->nr_queues  = 0;
> +
> +		if (i == HCTX_TYPE_DEFAULT)
> +			map->nr_queues = submit_queues - poll_queues;
> +		else if (i == HCTX_TYPE_POLL)
> +			map->nr_queues = poll_queues;
> +
> +		if (!map->nr_queues) {
> +			BUG_ON(i == HCTX_TYPE_DEFAULT);
> +			continue;
> +		}
> +
> +		map->queue_offset = qoff;
> +		blk_mq_map_queues(map);
> +
> +		qoff += map->nr_queues;
> +	}
> +
> +	return 0;
> +
> +}
> +
> +static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
> +{
> +	int qc_idx;
> +	int retiring = 0;
> +	unsigned long iflags;
> +	struct sdebug_queue *sqp;
> +	struct sdebug_queued_cmd *sqcp;
> +	struct scsi_cmnd *scp;
> +	struct sdebug_dev_info *devip;
> +	int num_entries = 0;
> +
> +	sqp = sdebug_q_arr + queue_num;
> +
> +	do {
> +		spin_lock_irqsave(&sqp->qc_lock, iflags);
> +		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
> +		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))

The above line IMO needs to be:
		if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))

If not, when sdebug_max_queue < SDEBUG_CANQUEUE and there is no request waiting
then "scp is NULL, ..." is reported suggesting there is an error.

> +			goto out;
> +
> +		sqcp = &sqp->qc_arr[qc_idx];
> +		scp = sqcp->a_cmnd;
> +		if (unlikely(scp == NULL)) {
> +			pr_err("scp is NULL, queue_num=%d, qc_idx=%d from %s\n",
> +			       queue_num, qc_idx, __func__);
> +			goto out;
> +		}
> +		devip = (struct sdebug_dev_info *)scp->device->hostdata;
> +		if (likely(devip))
> +			atomic_dec(&devip->num_in_q);
> +		else
> +			pr_err("devip=NULL from %s\n", __func__);
> +		if (unlikely(atomic_read(&retired_max_queue) > 0))
> +			retiring = 1;
> +
> +		sqcp->a_cmnd = NULL;
> +		if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
> +			pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%d from %s\n",
> +				sqp, queue_num, qc_idx, __func__);
> +			goto out;
> +		}
> +
> +		if (unlikely(retiring)) {	/* user has reduced max_queue */
> +			int k, retval;
> +
> +			retval = atomic_read(&retired_max_queue);
> +			if (qc_idx >= retval) {
> +				pr_err("index %d too large\n", retval);
> +				goto out;
> +			}
> +			k = find_last_bit(sqp->in_use_bm, retval);
> +			if ((k < sdebug_max_queue) || (k == retval))
> +				atomic_set(&retired_max_queue, 0);
> +			else
> +				atomic_set(&retired_max_queue, k + 1);
> +		}
> +		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> +		scp->scsi_done(scp); /* callback to mid level */
> +		num_entries++;
> +	} while (1);
> +
> +out:
> +	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> +	return num_entries;
> +}
> +
> +
>   static int scsi_debug_queuecommand(struct Scsi_Host *shost,
>   				   struct scsi_cmnd *scp)
>   {
> @@ -7318,6 +7427,8 @@ static struct scsi_host_template sdebug_driver_template = {
>   	.ioctl =		scsi_debug_ioctl,
>   	.queuecommand =		scsi_debug_queuecommand,
>   	.change_queue_depth =	sdebug_change_qdepth,
> +	.map_queues =		sdebug_map_queues,
> +	.mq_poll =		sdebug_blk_mq_poll,
>   	.eh_abort_handler =	scsi_debug_abort,
>   	.eh_device_reset_handler = scsi_debug_device_reset,
>   	.eh_target_reset_handler = scsi_debug_target_reset,
> @@ -7365,6 +7476,18 @@ static int sdebug_driver_probe(struct device *dev)
>   	if (sdebug_host_max_queue)
>   		hpnt->host_tagset = 1;
>   
> +	/* poll queues are possible for nr_hw_queues > 1 */
> +	if (hpnt->nr_hw_queues == 1)
> +		poll_queues = 0;
> +
> +	/* poll queues  */
> +	if (poll_queues >= submit_queues) {

So the above line rules out poll_queues == submit_queues; is that the
intention? If so, a short explanation of why in a comment would be
helpful. Helpful at least to me who would like to document this option.

> +		pr_warn("%s: trim poll_queues to 1\n", my_name);
> +		poll_queues = 1;
> +	}
> +	if (poll_queues)
> +		hpnt->nr_maps = 3;
> +
>   	sdbg_host->shost = hpnt;
>   	*((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host;
>   	if ((hpnt->this_id >= 0) && (sdebug_num_tgts > hpnt->this_id))
>
Kashyap Desai Nov. 30, 2020, 9:06 a.m. UTC | #4
>
> On 2020-10-15 9:37 a.m., Kashyap Desai wrote:
> > Add support of iouring iopoll interface in scsi_debug.
> > This feature requires shared hosttag support in kernel and driver.
>
> I am continuing to test this patch. There is one fix shown inline below
> plus a
> question near the end.

Hi Doug,  I have created add-on patch which includes all your comment. I am
also able to see the issue you reported and below patch fix it.
I will hold V2 revision of the series and I will wait for your Review-by and
Tested-by Tag.

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 4d9cc6af588c..fb328253086d 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -5675,6 +5675,7 @@ MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer
length granularity exponent
 MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout,
8->recovered_err... (def=0)");
 MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get
new store (def=0)");
 MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)");
+MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to
max(submit_queues - 1)");
 MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
 MODULE_PARM_DESC(random, "If set, uniformly randomize command duration
between 0 and delay_in_ns");
 MODULE_PARM_DESC(removable, "claim to have removable media (def=0)");
@@ -5683,7 +5684,6 @@ MODULE_PARM_DESC(sector_size, "logical block size in
bytes (def=512)");
 MODULE_PARM_DESC(statistics, "collect statistics on commands, queues
(def=0)");
 MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");
 MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");
-MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");
 MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial
good status (def=0)");
 MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba
(def=0)");
 MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in
blocks (def=1)");
@@ -7199,7 +7199,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost,
unsigned int queue_num)
        do {
                spin_lock_irqsave(&sqp->qc_lock, iflags);
                qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
-               if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
+               if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
                        goto out;

                sqcp = &sqp->qc_arr[qc_idx];
@@ -7477,10 +7477,17 @@ static int sdebug_driver_probe(struct device *dev)
                hpnt->host_tagset = 1;

        /* poll queues are possible for nr_hw_queues > 1 */
-       if (hpnt->nr_hw_queues == 1)
+       if (hpnt->nr_hw_queues == 1 || (poll_queues < 1)) {
+               pr_warn("%s: trim poll_queues to 0. poll_q/nr_hw = (%d/%d)
\n",
+                        my_name, poll_queues, hpnt->nr_hw_queues);
                poll_queues = 0;
+       }

-       /* poll queues  */
+       /*
+        * Poll queues don't need interrupts, but we need at least one I/O
queue
+        * left over for non-polled I/O.
+        * If condition not met, trim poll_queues to 1 (just for
simplicity).
+        */
        if (poll_queues >= submit_queues) {
                pr_warn("%s: trim poll_queues to 1\n", my_name);
                poll_queues = 1;


> > +	do {
> > +		spin_lock_irqsave(&sqp->qc_lock, iflags);
> > +		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
> > +		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
>
> The above line IMO needs to be:
> 		if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
>
> If not, when sdebug_max_queue < SDEBUG_CANQUEUE and there is no
> request waiting then "scp is NULL, ..." is reported suggesting there is an
> error.

BTW -  Is below piece of code at sdebug_q_cmd_complete() requires similar
change ?
Use sdebug_max_queue instead of SDEBUG_CANQUEUE
        if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {
                pr_err("wild qc_idx=%d\n", qc_idx);
                return;
        }

Kashyap
Douglas Gilbert Nov. 30, 2020, 7:42 p.m. UTC | #5
On 2020-11-30 4:06 a.m., Kashyap Desai wrote:
>>
>> On 2020-10-15 9:37 a.m., Kashyap Desai wrote:
>>> Add support of iouring iopoll interface in scsi_debug.
>>> This feature requires shared hosttag support in kernel and driver.
>>
>> I am continuing to test this patch. There is one fix shown inline below
>> plus a
>> question near the end.
> 
> Hi Doug,  I have created add-on patch which includes all your comment. I am
> also able to see the issue you reported and below patch fix it.
> I will hold V2 revision of the series and I will wait for your Review-by and
> Tested-by Tag.
> 
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index 4d9cc6af588c..fb328253086d 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -5675,6 +5675,7 @@ MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer
> length granularity exponent
>   MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout,
> 8->recovered_err... (def=0)");
>   MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get
> new store (def=0)");
>   MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)");
> +MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to
> max(submit_queues - 1)");
>   MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
>   MODULE_PARM_DESC(random, "If set, uniformly randomize command duration
> between 0 and delay_in_ns");
>   MODULE_PARM_DESC(removable, "claim to have removable media (def=0)");
> @@ -5683,7 +5684,6 @@ MODULE_PARM_DESC(sector_size, "logical block size in
> bytes (def=512)");
>   MODULE_PARM_DESC(statistics, "collect statistics on commands, queues
> (def=0)");
>   MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");
>   MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");
> -MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");
>   MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial
> good status (def=0)");
>   MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba
> (def=0)");
>   MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in
> blocks (def=1)");
> @@ -7199,7 +7199,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost,
> unsigned int queue_num)
>          do {
>                  spin_lock_irqsave(&sqp->qc_lock, iflags);
>                  qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
> -               if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
> +               if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
>                          goto out;
> 
>                  sqcp = &sqp->qc_arr[qc_idx];
> @@ -7477,10 +7477,17 @@ static int sdebug_driver_probe(struct device *dev)
>                  hpnt->host_tagset = 1;
> 
>          /* poll queues are possible for nr_hw_queues > 1 */
> -       if (hpnt->nr_hw_queues == 1)
> +       if (hpnt->nr_hw_queues == 1 || (poll_queues < 1)) {
> +               pr_warn("%s: trim poll_queues to 0. poll_q/nr_hw = (%d/%d)
> \n",
> +                        my_name, poll_queues, hpnt->nr_hw_queues);
>                  poll_queues = 0;
> +       }
> 
> -       /* poll queues  */
> +       /*
> +        * Poll queues don't need interrupts, but we need at least one I/O
> queue
> +        * left over for non-polled I/O.
> +        * If condition not met, trim poll_queues to 1 (just for
> simplicity).
> +        */
>          if (poll_queues >= submit_queues) {
>                  pr_warn("%s: trim poll_queues to 1\n", my_name);
>                  poll_queues = 1;
> 
>

Kashyap,
I struggled with this patch, first the line wrap, then the last two
patch segments not applying. Could you send me the scsi_debug.c file
attached to an email?

>>> +	do {
>>> +		spin_lock_irqsave(&sqp->qc_lock, iflags);
>>> +		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
>>> +		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
>>
>> The above line IMO needs to be:
>> 		if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
>>
>> If not, when sdebug_max_queue < SDEBUG_CANQUEUE and there is no
>> request waiting then "scp is NULL, ..." is reported suggesting there is an
>> error.
> 
> BTW -  Is below piece of code at sdebug_q_cmd_complete() requires similar
> change ?
> Use sdebug_max_queue instead of SDEBUG_CANQUEUE
>          if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {
>                  pr_err("wild qc_idx=%d\n", qc_idx);
>                  return;
>          }

Yes, I need to look at this. sdebug_max_queue is initialized to
SDEBUG_CANQUEUE but then can be overridden by the invocation line parameters.
Several arrays in structures are sized by SDEBUG_CANQUEUE which will
remain. But most SDEBUG_CANQUEUE uses inside driver functions can probably
be replaced by sdebug_max_queue when I confirm that it is safe. Since
sdebug_max_queue <= SDEBUG_CANQUEUE and the fields in between should
always be zero, the current situation just leads to wasted cycles.

Doug Gilbert
Douglas Gilbert Dec. 1, 2020, 4:56 p.m. UTC | #6
On 2020-11-30 4:06 a.m., Kashyap Desai wrote:
>>
>> On 2020-10-15 9:37 a.m., Kashyap Desai wrote:
>>> Add support of iouring iopoll interface in scsi_debug.
>>> This feature requires shared hosttag support in kernel and driver.
>>
>> I am continuing to test this patch. There is one fix shown inline below
>> plus a
>> question near the end.
> 
> Hi Doug,  I have created add-on patch which includes all your comment. I am
> also able to see the issue you reported and below patch fix it.
> I will hold V2 revision of the series and I will wait for your Review-by and
> Tested-by Tag.

Thanks, that is a good explanation of why poll_queues must be less than
submit_queues.

Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a87e40aec11f..4d9cc6af588c 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -826,6 +826,7 @@  static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES;
 static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;
 
 static int submit_queues = DEF_SUBMIT_QUEUES;  /* > 1 for multi-queue (mq) */
+static int poll_queues; /* iouring iopoll interface.*/
 static struct sdebug_queue *sdebug_q_arr;  /* ptr to array of submit queues */
 
 static DEFINE_RWLOCK(atomic_rw);
@@ -5422,6 +5423,14 @@  static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 	cmnd->host_scribble = (unsigned char *)sqcp;
 	sd_dp = sqcp->sd_dp;
 	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+
+	/* Do not complete IO from default completion path.
+	 * Let it to be on queue.
+	 * Completion should happen from mq_poll interface.
+	 */
+	if ((sqp - sdebug_q_arr) >= (submit_queues - poll_queues))
+		return 0;
+
 	if (!sd_dp) {
 		sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);
 		if (!sd_dp) {
@@ -5604,6 +5613,7 @@  module_param_named(sector_size, sdebug_sector_size, int, S_IRUGO);
 module_param_named(statistics, sdebug_statistics, bool, S_IRUGO | S_IWUSR);
 module_param_named(strict, sdebug_strict, bool, S_IRUGO | S_IWUSR);
 module_param_named(submit_queues, submit_queues, int, S_IRUGO);
+module_param_named(poll_queues, poll_queues, int, S_IRUGO);
 module_param_named(tur_ms_to_ready, sdeb_tur_ms_to_ready, int, S_IRUGO);
 module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO);
 module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO);
@@ -5673,6 +5683,7 @@  MODULE_PARM_DESC(sector_size, "logical block size in bytes (def=512)");
 MODULE_PARM_DESC(statistics, "collect statistics on commands, queues (def=0)");
 MODULE_PARM_DESC(strict, "stricter checks: reserved field in cdb (def=0)");
 MODULE_PARM_DESC(submit_queues, "support for block multi-queue (def=1)");
+MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues");
 MODULE_PARM_DESC(tur_ms_to_ready, "TEST UNIT READY millisecs before initial good status (def=0)");
 MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)");
 MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)");
@@ -7140,6 +7151,104 @@  static int resp_not_ready(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	return check_condition_result;
 }
 
+static int sdebug_map_queues(struct Scsi_Host *shost)
+{
+	int i, qoff;
+
+	if (shost->nr_hw_queues == 1)
+		return 0;
+
+	for (i = 0, qoff = 0; i < HCTX_MAX_TYPES; i++) {
+		struct blk_mq_queue_map *map = &shost->tag_set.map[i];
+
+		map->nr_queues  = 0;
+
+		if (i == HCTX_TYPE_DEFAULT)
+			map->nr_queues = submit_queues - poll_queues;
+		else if (i == HCTX_TYPE_POLL)
+			map->nr_queues = poll_queues;
+
+		if (!map->nr_queues) {
+			BUG_ON(i == HCTX_TYPE_DEFAULT);
+			continue;
+		}
+
+		map->queue_offset = qoff;
+		blk_mq_map_queues(map);
+
+		qoff += map->nr_queues;
+	}
+
+	return 0;
+
+}
+
+static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+	int qc_idx;
+	int retiring = 0;
+	unsigned long iflags;
+	struct sdebug_queue *sqp;
+	struct sdebug_queued_cmd *sqcp;
+	struct scsi_cmnd *scp;
+	struct sdebug_dev_info *devip;
+	int num_entries = 0;
+
+	sqp = sdebug_q_arr + queue_num;
+
+	do {
+		spin_lock_irqsave(&sqp->qc_lock, iflags);
+		qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
+		if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE)))
+			goto out;
+
+		sqcp = &sqp->qc_arr[qc_idx];
+		scp = sqcp->a_cmnd;
+		if (unlikely(scp == NULL)) {
+			pr_err("scp is NULL, queue_num=%d, qc_idx=%d from %s\n",
+			       queue_num, qc_idx, __func__);
+			goto out;
+		}
+		devip = (struct sdebug_dev_info *)scp->device->hostdata;
+		if (likely(devip))
+			atomic_dec(&devip->num_in_q);
+		else
+			pr_err("devip=NULL from %s\n", __func__);
+		if (unlikely(atomic_read(&retired_max_queue) > 0))
+			retiring = 1;
+
+		sqcp->a_cmnd = NULL;
+		if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
+			pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%d from %s\n",
+				sqp, queue_num, qc_idx, __func__);
+			goto out;
+		}
+
+		if (unlikely(retiring)) {	/* user has reduced max_queue */
+			int k, retval;
+
+			retval = atomic_read(&retired_max_queue);
+			if (qc_idx >= retval) {
+				pr_err("index %d too large\n", retval);
+				goto out;
+			}
+			k = find_last_bit(sqp->in_use_bm, retval);
+			if ((k < sdebug_max_queue) || (k == retval))
+				atomic_set(&retired_max_queue, 0);
+			else
+				atomic_set(&retired_max_queue, k + 1);
+		}
+		spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+		scp->scsi_done(scp); /* callback to mid level */
+		num_entries++;
+	} while (1);
+
+out:
+	spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+	return num_entries;
+}
+
+
 static int scsi_debug_queuecommand(struct Scsi_Host *shost,
 				   struct scsi_cmnd *scp)
 {
@@ -7318,6 +7427,8 @@  static struct scsi_host_template sdebug_driver_template = {
 	.ioctl =		scsi_debug_ioctl,
 	.queuecommand =		scsi_debug_queuecommand,
 	.change_queue_depth =	sdebug_change_qdepth,
+	.map_queues =		sdebug_map_queues,
+	.mq_poll =		sdebug_blk_mq_poll,
 	.eh_abort_handler =	scsi_debug_abort,
 	.eh_device_reset_handler = scsi_debug_device_reset,
 	.eh_target_reset_handler = scsi_debug_target_reset,
@@ -7365,6 +7476,18 @@  static int sdebug_driver_probe(struct device *dev)
 	if (sdebug_host_max_queue)
 		hpnt->host_tagset = 1;
 
+	/* poll queues are possible for nr_hw_queues > 1 */
+	if (hpnt->nr_hw_queues == 1)
+		poll_queues = 0;
+
+	/* poll queues  */
+	if (poll_queues >= submit_queues) {
+		pr_warn("%s: trim poll_queues to 1\n", my_name);
+		poll_queues = 1;
+	}
+	if (poll_queues)
+		hpnt->nr_maps = 3;
+
 	sdbg_host->shost = hpnt;
 	*((struct sdebug_host_info **)hpnt->hostdata) = sdbg_host;
 	if ((hpnt->this_id >= 0) && (sdebug_num_tgts > hpnt->this_id))