diff mbox series

[V3,15/25] smartpqi: fix host qdepth limit

Message ID 160763254769.26927.9249430312259308888.stgit@brunhilda (mailing list archive)
State Changes Requested
Headers show
Series smartpqi updates | expand

Commit Message

Don Brace Dec. 10, 2020, 8:35 p.m. UTC
From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>

* Correct scsi-mid-layer sending more requests than
  exposed host Q depth causing firmware ASSERT issue.
  * Add host Qdepth counter.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
---
 drivers/scsi/smartpqi/smartpqi.h      |    2 ++
 drivers/scsi/smartpqi/smartpqi_init.c |   19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

Comments

Paul Menzel Dec. 14, 2020, 5:54 p.m. UTC | #1
Dear Don, dear Mahesh,


Am 10.12.20 um 21:35 schrieb Don Brace:
> From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
> 
> * Correct scsi-mid-layer sending more requests than
>    exposed host Q depth causing firmware ASSERT issue.
>    * Add host Qdepth counter.

This supposedly fixes the regression between Linux 5.4 and 5.9, which we 
reported in [1].

     kernel: smartpqi 0000:89:00.0: controller is offline: status code 
0x6100c
     kernel: smartpqi 0000:89:00.0: controller offline

Thank you for looking into this issue and fixing it. We are going to 
test this.

For easily finding these things in the git history or the WWW, it would 
be great if these log messages could be included (in the future).

Also, that means, that the regression is still present in Linux 5.10, 
released yesterday, and this commit does not apply to these versions.

Mahesh, do you have any idea, what commit caused the regression and why 
the issue started to show up?

James, Martin, how are regressions handled for the SCSI subsystem?

Regarding the diff, personally, I find the commit message much too 
terse. `pqi_scsi_queue_command()` will return `SCSI_MLQUEUE_HOST_BUSY` 
for the case of too many requests. Will that be logged by Linux in some 
log level? In my opinion it points to a performance problem, and should 
be at least logged as a notice or warning.

Can `ctrl_info->scsi_ml_can_queue` be queried somehow maybe in the logs? 
`sudo find /sys -name queue` did not display something interesting.

[1]: https://marc.info/?l=linux-scsi&m=160271263114829&w=2
      "Linux 5.9: smartpqi: controller is offline: status code 0x6100c"

> Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
> Reviewed-by: Scott Teel <scott.teel@microchip.com>
> Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
> Signed-off-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
> Signed-off-by: Don Brace <don.brace@microchip.com>
> ---
>   drivers/scsi/smartpqi/smartpqi.h      |    2 ++
>   drivers/scsi/smartpqi/smartpqi_init.c |   19 ++++++++++++++++---
>   2 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
> index 0b94c755a74c..c3b103b15924 100644
> --- a/drivers/scsi/smartpqi/smartpqi.h
> +++ b/drivers/scsi/smartpqi/smartpqi.h
> @@ -1345,6 +1345,8 @@ struct pqi_ctrl_info {
>   	struct work_struct ofa_quiesce_work;
>   	u32		ofa_bytes_requested;
>   	u16		ofa_cancel_reason;
> +
> +	atomic_t	total_scmds_outstanding;
>   };

What is the difference between the already existing

     atomic_t scsi_cmds_outstanding;

and the new counter?

     atomic_t	total_scmds_outstanding;

The names are quite similar, so different names or a comment might be 
useful.

>   
>   enum pqi_ctrl_mode {
> diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
> index 082b17e9bd80..4e088f47d95f 100644
> --- a/drivers/scsi/smartpqi/smartpqi_init.c
> +++ b/drivers/scsi/smartpqi/smartpqi_init.c
> @@ -5578,6 +5578,8 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
>   void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
>   {
>   	struct pqi_scsi_dev *device;
> +	struct pqi_ctrl_info *ctrl_info;
> +	struct Scsi_Host *shost;
>   
>   	if (!scmd->device) {
>   		set_host_byte(scmd, DID_NO_CONNECT);
> @@ -5590,7 +5592,11 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
>   		return;
>   	}
>   
> +	shost = scmd->device->host;

The function already has a variable `device`, which is assigned 
“hostdata” though:

     device = scmd->device->hostdata;

This confuses me. Maybe this should be cleaned up in a followup commit, 
and the variable device be reused above in the `shost` assignment.

> +	ctrl_info = shost_to_hba(shost);
> +
>   	atomic_dec(&device->scsi_cmds_outstanding);
> +	atomic_dec(&ctrl_info->total_scmds_outstanding);
>   }
>   
>   static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info,
> @@ -5678,6 +5684,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
>   	bool raid_bypassed;
>   
>   	device = scmd->device->hostdata;
> +	ctrl_info = shost_to_hba(shost);
>   
>   	if (!device) {
>   		set_host_byte(scmd, DID_NO_CONNECT);
> @@ -5686,8 +5693,11 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
>   	}
>   
>   	atomic_inc(&device->scsi_cmds_outstanding);
> -
> -	ctrl_info = shost_to_hba(shost);

I believe, style changes (re-ordering) in commits fixing regressions 
make it harder to backport it.

> +	if (atomic_inc_return(&ctrl_info->total_scmds_outstanding) >
> +		ctrl_info->scsi_ml_can_queue) {
> +		rc = SCSI_MLQUEUE_HOST_BUSY;
> +		goto out;
> +	}
>   
>   	if (pqi_ctrl_offline(ctrl_info) || pqi_device_in_remove(device)) {
>   		set_host_byte(scmd, DID_NO_CONNECT);
> @@ -5730,8 +5740,10 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
>   	}
>   
>   out:
> -	if (rc)
> +	if (rc) {
>   		atomic_dec(&device->scsi_cmds_outstanding);
> +		atomic_dec(&ctrl_info->total_scmds_outstanding);
> +	}
>   
>   	return rc;
>   }
> @@ -8054,6 +8066,7 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
>   
>   	INIT_WORK(&ctrl_info->event_work, pqi_event_worker);
>   	atomic_set(&ctrl_info->num_interrupts, 0);
> +	atomic_set(&ctrl_info->total_scmds_outstanding, 0);
>   
>   	INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
>   	INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);


Kind regards,

Paul
Don Brace Dec. 15, 2020, 8:23 p.m. UTC | #2
Please see answers below. Hope this helps.

-----Original Message-----
From: Paul Menzel [mailto:pmenzel@molgen.mpg.de] 
Sent: Monday, December 14, 2020 11:54 AM
To: Don Brace - C33706 <Don.Brace@microchip.com>; Kevin Barnett - C33748 <Kevin.Barnett@microchip.com>; Scott Teel - C33730 <Scott.Teel@microchip.com>; Justin Lindley - C33718 <Justin.Lindley@microchip.com>; Scott Benesh - C33703 <Scott.Benesh@microchip.com>; Gerry Morong - C33720 <Gerry.Morong@microchip.com>; Mahesh Rajashekhara - I30583 <Mahesh.Rajashekhara@microchip.com>; hch@infradead.org; joseph.szczypek@hpe.com; POSWALD@suse.com; James E. J. Bottomley <jejb@linux.ibm.com>; Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org; it+linux-scsi@molgen.mpg.de; Donald Buczek <buczek@molgen.mpg.de>; Greg KH <gregkh@linuxfoundation.org>
Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit

EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe

Dear Don, dear Mahesh,


Am 10.12.20 um 21:35 schrieb Don Brace:
> From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
>
> * Correct scsi-mid-layer sending more requests than
>    exposed host Q depth causing firmware ASSERT issue.
>    * Add host Qdepth counter.

This supposedly fixes the regression between Linux 5.4 and 5.9, which we reported in [1].

     kernel: smartpqi 0000:89:00.0: controller is offline: status code 0x6100c
     kernel: smartpqi 0000:89:00.0: controller offline

Thank you for looking into this issue and fixing it. We are going to test this.

For easily finding these things in the git history or the WWW, it would be great if these log messages could be included (in the future).
DON> Thanks for your suggestion. Well add them in the next time.

Also, that means, that the regression is still present in Linux 5.10, released yesterday, and this commit does not apply to these versions.

DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12 depending when all of the patches are applied. The patch in question is among 28 other patches.

Mahesh, do you have any idea, what commit caused the regression and why the issue started to show up?
DON> The smartpqi driver sets two scsi_host_template member fields: .can_queue and .nr_hw_queues. But we have not yet converted to host_tagset. So the queue_depth becomes nr_hw_queues * can_queue, which is more than the hw can support. That can be verified by looking at scsi_host.h.
        /*
         * In scsi-mq mode, the number of hardware queues supported by the LLD.
         *
         * Note: it is assumed that each hardware queue has a queue depth of
         * can_queue. In other words, the total queue depth per host
         * is nr_hw_queues * can_queue. However, for when host_tagset is set,
         * the total queue depth is can_queue.
         */

So, until we make this change, the queue_depth change prevents the above issue from happening.
Note: you will see better performance and more evenly distributed performance with this patch applied.

James, Martin, how are regressions handled for the SCSI subsystem?

Regarding the diff, personally, I find the commit message much too terse. `pqi_scsi_queue_command()` will return `SCSI_MLQUEUE_HOST_BUSY` for the case of too many requests. Will that be logged by Linux in some log level? In my opinion it points to a performance problem, and should be at least logged as a notice or warning.
DON> We could add a ratelimited print, but we did not want to interrupt the CPU for logging these messages.
Also, you should see better and more even performance.

Can `ctrl_info->scsi_ml_can_queue` be queried somehow maybe in the logs?
`sudo find /sys -name queue` did not display something interesting.
All I find is /sys/class/scsi_host/host<X>/{cmd_per_lun, can_queue}, but not nr_hw_queues, but there is one queue for each CPU.

[1]: https://marc.info/?l=linux-scsi&m=160271263114829&w=2
      "Linux 5.9: smartpqi: controller is offline: status code 0x6100c"

> Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
> Reviewed-by: Scott Teel <scott.teel@microchip.com>
> Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
> Signed-off-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
> Signed-off-by: Don Brace <don.brace@microchip.com>
> ---
>   drivers/scsi/smartpqi/smartpqi.h      |    2 ++
>   drivers/scsi/smartpqi/smartpqi_init.c |   19 ++++++++++++++++---
>   2 files changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/scsi/smartpqi/smartpqi.h 
> b/drivers/scsi/smartpqi/smartpqi.h
> index 0b94c755a74c..c3b103b15924 100644
> --- a/drivers/scsi/smartpqi/smartpqi.h
> +++ b/drivers/scsi/smartpqi/smartpqi.h
> @@ -1345,6 +1345,8 @@ struct pqi_ctrl_info {
>       struct work_struct ofa_quiesce_work;
>       u32             ofa_bytes_requested;
>       u16             ofa_cancel_reason;
> +
> +     atomic_t        total_scmds_outstanding;
>   };

What is the difference between the already existing

     atomic_t scsi_cmds_outstanding;

and the new counter?

     atomic_t   total_scmds_outstanding;

The names are quite similar, so different names or a comment might be useful.
DON> total_scmds_outstanding tracks the queue_depth for the entire driver instance.


>
>   enum pqi_ctrl_mode {
> diff --git a/drivers/scsi/smartpqi/smartpqi_init.c 
> b/drivers/scsi/smartpqi/smartpqi_init.c
> index 082b17e9bd80..4e088f47d95f 100644
> --- a/drivers/scsi/smartpqi/smartpqi_init.c
> +++ b/drivers/scsi/smartpqi/smartpqi_init.c
> @@ -5578,6 +5578,8 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
>   void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
>   {
>       struct pqi_scsi_dev *device;
> +     struct pqi_ctrl_info *ctrl_info;
> +     struct Scsi_Host *shost;
>
>       if (!scmd->device) {
>               set_host_byte(scmd, DID_NO_CONNECT); @@ -5590,7 +5592,11 
> @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
>               return;
>       }
>
> +     shost = scmd->device->host;

The function already has a variable `device`, which is assigned “hostdata” though:

     device = scmd->device->hostdata;

This confuses me. Maybe this should be cleaned up in a followup commit, and the variable device be reused above in the `shost` assignment.
DON> host points back to the driver instance of our HW.
DON> hostdata is a driver usable field that points back to our internal device pointer <LUN or HBA>.


> +     ctrl_info = shost_to_hba(shost);
> +
>       atomic_dec(&device->scsi_cmds_outstanding);
> +     atomic_dec(&ctrl_info->total_scmds_outstanding);
>   }
>
>   static bool pqi_is_parity_write_stream(struct pqi_ctrl_info 
> *ctrl_info, @@ -5678,6 +5684,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
>       bool raid_bypassed;
>
>       device = scmd->device->hostdata;
> +     ctrl_info = shost_to_hba(shost);
>
>       if (!device) {
>               set_host_byte(scmd, DID_NO_CONNECT); @@ -5686,8 +5693,11 
> @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
>       }
>
>       atomic_inc(&device->scsi_cmds_outstanding);
> -
> -     ctrl_info = shost_to_hba(shost);

I believe, style changes (re-ordering) in commits fixing regressions make it harder to backport it.

> +     if (atomic_inc_return(&ctrl_info->total_scmds_outstanding) >
> +             ctrl_info->scsi_ml_can_queue) {
> +             rc = SCSI_MLQUEUE_HOST_BUSY;
> +             goto out;
> +     }
>
>       if (pqi_ctrl_offline(ctrl_info) || pqi_device_in_remove(device)) {
>               set_host_byte(scmd, DID_NO_CONNECT); @@ -5730,8 +5740,10 
> @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
>       }
>
>   out:
> -     if (rc)
> +     if (rc) {
>               atomic_dec(&device->scsi_cmds_outstanding);
> +             atomic_dec(&ctrl_info->total_scmds_outstanding);
> +     }
>
>       return rc;
>   }
> @@ -8054,6 +8066,7 @@ static struct pqi_ctrl_info 
> *pqi_alloc_ctrl_info(int numa_node)
>
>       INIT_WORK(&ctrl_info->event_work, pqi_event_worker);
>       atomic_set(&ctrl_info->num_interrupts, 0);
> +     atomic_set(&ctrl_info->total_scmds_outstanding, 0);
>
>       INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
>       INIT_DELAYED_WORK(&ctrl_info->update_time_work, 
> pqi_update_time_worker);


Kind regards,

Paul
Martin Wilck Jan. 7, 2021, 11:43 p.m. UTC | #3
On Tue, 2020-12-15 at 20:23 +0000, Don.Brace@microchip.com wrote:
> Please see answers below. Hope this helps.
> 
> -----Original Message-----
> From: Paul Menzel [mailto:pmenzel@molgen.mpg.de] 
> Sent: Monday, December 14, 2020 11:54 AM
> To: Don Brace - C33706 <Don.Brace@microchip.com>; Kevin Barnett -
> C33748 <Kevin.Barnett@microchip.com>; Scott Teel - C33730 <
> Scott.Teel@microchip.com>; Justin Lindley - C33718 <
> Justin.Lindley@microchip.com>; Scott Benesh - C33703 <
> Scott.Benesh@microchip.com>; Gerry Morong - C33720 <
> Gerry.Morong@microchip.com>; Mahesh Rajashekhara - I30583 <
> Mahesh.Rajashekhara@microchip.com>; hch@infradead.org; 
> joseph.szczypek@hpe.com; POSWALD@suse.com; James E. J. Bottomley <
> jejb@linux.ibm.com>; Martin K. Petersen <martin.petersen@oracle.com>
> Cc: linux-scsi@vger.kernel.org; it+linux-scsi@molgen.mpg.de; Donald
> Buczek <buczek@molgen.mpg.de>; Greg KH <gregkh@linuxfoundation.org>
> Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit
> 
> EXTERNAL EMAIL: Do not click links or open attachments unless you
> know the content is safe
> 
> Dear Don, dear Mahesh,
> 
> 
> Am 10.12.20 um 21:35 schrieb Don Brace:
> > From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
> > 
> > * Correct scsi-mid-layer sending more requests than
> >    exposed host Q depth causing firmware ASSERT issue.
> >    * Add host Qdepth counter.
> 
> This supposedly fixes the regression between Linux 5.4 and 5.9, which
> we reported in [1].
> 
>      kernel: smartpqi 0000:89:00.0: controller is offline: status
> code 0x6100c
>      kernel: smartpqi 0000:89:00.0: controller offline
> 
> Thank you for looking into this issue and fixing it. We are going to
> test this.
> 
> For easily finding these things in the git history or the WWW, it
> would be great if these log messages could be included (in the
> future).
> DON> Thanks for your suggestion. Well add them in the next time.
> 
> Also, that means, that the regression is still present in Linux 5.10,
> released yesterday, and this commit does not apply to these versions.
> 
> DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12
> depending when all of the patches are applied. The patch in question
> is among 28 other patches.
> 
> Mahesh, do you have any idea, what commit caused the regression and
> why the issue started to show up?
> DON> The smartpqi driver sets two scsi_host_template member fields:
> .can_queue and .nr_hw_queues. But we have not yet converted to
> host_tagset. So the queue_depth becomes nr_hw_queues * can_queue,
> which is more than the hw can support. That can be verified by
> looking at scsi_host.h.
>         /*
>          * In scsi-mq mode, the number of hardware queues supported
> by the LLD.
>          *
>          * Note: it is assumed that each hardware queue has a queue
> depth of
>          * can_queue. In other words, the total queue depth per host
>          * is nr_hw_queues * can_queue. However, for when host_tagset
> is set,
>          * the total queue depth is can_queue.
>          */
> 
> So, until we make this change, the queue_depth change prevents the
> above issue from happening.

can_queue and nr_hw_queues have been set like this as long as the
driver existed. Why did Paul observe a regression with 5.9?

And why can't you simply set can_queue to 
(ctrl_info->scsi_ml_can_queue / nr_hw_queues)?

Regards,
Martin
Don Brace Jan. 15, 2021, 9:17 p.m. UTC | #4
-----Original Message-----
From: Martin Wilck [mailto:mwilck@suse.com] 
Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit

EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe

On Tue, 2020-12-15 at 20:23 +0000, Don.Brace@microchip.com wrote:
> Please see answers below. Hope this helps.
>
> -----Original Message-----
> From: Paul Menzel [mailto:pmenzel@molgen.mpg.de]
> Sent: Monday, December 14, 2020 11:54 AM
> To: Don Brace - C33706 <Don.Brace@microchip.com>; Kevin Barnett -
> C33748 <Kevin.Barnett@microchip.com>; Scott Teel - C33730 < 
> Scott.Teel@microchip.com>; Justin Lindley - C33718 < 
> Justin.Lindley@microchip.com>; Scott Benesh - C33703 < 
> Scott.Benesh@microchip.com>; Gerry Morong - C33720 < 
> Gerry.Morong@microchip.com>; Mahesh Rajashekhara - I30583 < 
> Mahesh.Rajashekhara@microchip.com>; hch@infradead.org; 
> joseph.szczypek@hpe.com; POSWALD@suse.com; James E. J. Bottomley < 
> jejb@linux.ibm.com>; Martin K. Petersen <martin.petersen@oracle.com>
> Cc: linux-scsi@vger.kernel.org; it+linux-scsi@molgen.mpg.de; Donald 
> Buczek <buczek@molgen.mpg.de>; Greg KH <gregkh@linuxfoundation.org>
> Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit
>
> EXTERNAL EMAIL: Do not click links or open attachments unless you know 
> the content is safe
>
> Dear Don, dear Mahesh,
>
>
> Am 10.12.20 um 21:35 schrieb Don Brace:
> > From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
> >
> > * Correct scsi-mid-layer sending more requests than
> >    exposed host Q depth causing firmware ASSERT issue.
> >    * Add host Qdepth counter.
>
> This supposedly fixes the regression between Linux 5.4 and 5.9, which 
> we reported in [1].
>
>      kernel: smartpqi 0000:89:00.0: controller is offline: status code 
> 0x6100c
>      kernel: smartpqi 0000:89:00.0: controller offline
>
> Thank you for looking into this issue and fixing it. We are going to 
> test this.
>
> For easily finding these things in the git history or the WWW, it 
> would be great if these log messages could be included (in the 
> future).
> DON> Thanks for your suggestion. Well add them in the next time.
>
> Also, that means, that the regression is still present in Linux 5.10, 
> released yesterday, and this commit does not apply to these versions.
>
> DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12
> depending when all of the patches are applied. The patch in question 
> is among 28 other patches.
>
> Mahesh, do you have any idea, what commit caused the regression and 
> why the issue started to show up?
> DON> The smartpqi driver sets two scsi_host_template member fields:
> .can_queue and .nr_hw_queues. But we have not yet converted to 
> host_tagset. So the queue_depth becomes nr_hw_queues * can_queue, 
> which is more than the hw can support. That can be verified by looking 
> at scsi_host.h.
>         /*
>          * In scsi-mq mode, the number of hardware queues supported by 
> the LLD.
>          *
>          * Note: it is assumed that each hardware queue has a queue 
> depth of
>          * can_queue. In other words, the total queue depth per host
>          * is nr_hw_queues * can_queue. However, for when host_tagset 
> is set,
>          * the total queue depth is can_queue.
>          */
>
> So, until we make this change, the queue_depth change prevents the 
> above issue from happening.

can_queue and nr_hw_queues have been set like this as long as the driver existed. Why did Paul observe a regression with 5.9?

And why can't you simply set can_queue to (ctrl_info->scsi_ml_can_queue / nr_hw_queues)?

Don: I did this in an internal patch, but this patch seemed to work the best for our driver. HBA performance remained steady when running benchmarks.

Regards,
Martin
John Garry Jan. 19, 2021, 10:33 a.m. UTC | #5
>>
>> Am 10.12.20 um 21:35 schrieb Don Brace:
>>> From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
>>>
>>> * Correct scsi-mid-layer sending more requests than
>>>     exposed host Q depth causing firmware ASSERT issue.
>>>     * Add host Qdepth counter.
>>
>> This supposedly fixes the regression between Linux 5.4 and 5.9, which
>> we reported in [1].
>>
>>       kernel: smartpqi 0000:89:00.0: controller is offline: status code
>> 0x6100c
>>       kernel: smartpqi 0000:89:00.0: controller offline
>>
>> Thank you for looking into this issue and fixing it. We are going to
>> test this.
>>
>> For easily finding these things in the git history or the WWW, it
>> would be great if these log messages could be included (in the
>> future).
>> DON> Thanks for your suggestion. Well add them in the next time.
>>
>> Also, that means, that the regression is still present in Linux 5.10,
>> released yesterday, and this commit does not apply to these versions.
>>
>> DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12
>> depending when all of the patches are applied. The patch in question
>> is among 28 other patches.
>>
>> Mahesh, do you have any idea, what commit caused the regression and
>> why the issue started to show up?
>> DON> The smartpqi driver sets two scsi_host_template member fields:
>> .can_queue and .nr_hw_queues. But we have not yet converted to
>> host_tagset. So the queue_depth becomes nr_hw_queues * can_queue,
>> which is more than the hw can support. That can be verified by looking
>> at scsi_host.h.
>>          /*
>>           * In scsi-mq mode, the number of hardware queues supported by
>> the LLD.
>>           *
>>           * Note: it is assumed that each hardware queue has a queue
>> depth of
>>           * can_queue. In other words, the total queue depth per host
>>           * is nr_hw_queues * can_queue. However, for when host_tagset
>> is set,
>>           * the total queue depth is can_queue.
>>           */
>>
>> So, until we make this change, the queue_depth change prevents the
>> above issue from happening.
> 
> can_queue and nr_hw_queues have been set like this as long as the driver existed. Why did Paul observe a regression with 5.9?
> 
> And why can't you simply set can_queue to (ctrl_info->scsi_ml_can_queue / nr_hw_queues)?
> 
> Don: I did this in an internal patch, but this patch seemed to work the best for our driver. HBA performance remained steady when running benchmarks.
> 

I guess that this is a fallout from commit 6eb045e092ef ("scsi:
  core: avoid host-wide host_busy counter for scsi_mq"). But that commit 
is correct.

If .can_queue is set to (ctrl_info->scsi_ml_can_queue / nr_hw_queues), 
then blk-mq can send each hw queue only (ctrl_info->scsi_ml_can_queue / 
nr_hw_queues) commands, while it should be possible to send 
ctrl_info->scsi_ml_can_queue commands.

I think that this can alternatively be solved by setting .host_tagset flag.

Thanks,
John
Martin Wilck Jan. 19, 2021, 2:12 p.m. UTC | #6
On Tue, 2021-01-19 at 10:33 +0000, John Garry wrote:
> > > 
> > > Am 10.12.20 um 21:35 schrieb Don Brace:
> > > > From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
> > > > 
> > > > * Correct scsi-mid-layer sending more requests than
> > > >     exposed host Q depth causing firmware ASSERT issue.
> > > >     * Add host Qdepth counter.
> > > 
> > > This supposedly fixes the regression between Linux 5.4 and 5.9,
> > > which
> > > we reported in [1].
> > > 
> > >       kernel: smartpqi 0000:89:00.0: controller is offline:
> > > status code
> > > 0x6100c
> > >       kernel: smartpqi 0000:89:00.0: controller offline
> > > 
> > > Thank you for looking into this issue and fixing it. We are going
> > > to
> > > test this.
> > > 
> > > For easily finding these things in the git history or the WWW, it
> > > would be great if these log messages could be included (in the
> > > future).
> > > DON> Thanks for your suggestion. Well add them in the next time.
> > > 
> > > Also, that means, that the regression is still present in Linux
> > > 5.10,
> > > released yesterday, and this commit does not apply to these
> > > versions.
> > > 
> > > DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12
> > > depending when all of the patches are applied. The patch in
> > > question
> > > is among 28 other patches.
> > > 
> > > Mahesh, do you have any idea, what commit caused the regression
> > > and
> > > why the issue started to show up?
> > > DON> The smartpqi driver sets two scsi_host_template member
> > > fields:
> > > .can_queue and .nr_hw_queues. But we have not yet converted to
> > > host_tagset. So the queue_depth becomes nr_hw_queues * can_queue,
> > > which is more than the hw can support. That can be verified by
> > > looking
> > > at scsi_host.h.
> > >          /*
> > >           * In scsi-mq mode, the number of hardware queues
> > > supported by
> > > the LLD.
> > >           *
> > >           * Note: it is assumed that each hardware queue has a
> > > queue
> > > depth of
> > >           * can_queue. In other words, the total queue depth per
> > > host
> > >           * is nr_hw_queues * can_queue. However, for when
> > > host_tagset
> > > is set,
> > >           * the total queue depth is can_queue.
> > >           */
> > > 
> > > So, until we make this change, the queue_depth change prevents
> > > the
> > > above issue from happening.
> > 
> > can_queue and nr_hw_queues have been set like this as long as the
> > driver existed. Why did Paul observe a regression with 5.9?
> > 
> > And why can't you simply set can_queue to (ctrl_info-
> > >scsi_ml_can_queue / nr_hw_queues)?
> > 
> > Don: I did this in an internal patch, but this patch seemed to work
> > the best for our driver. HBA performance remained steady when
> > running benchmarks.

That was a stupid suggestion on my part. Sorry.

> I guess that this is a fallout from commit 6eb045e092ef ("scsi:
>   core: avoid host-wide host_busy counter for scsi_mq"). But that
> commit 
> is correct.

It would be good if someone (Paul?) could verify whether that commit
actually caused the regression they saw.

Looking at that 6eb045e092ef, I notice this hunk:

 
-       busy = atomic_inc_return(&shost->host_busy) - 1;
        if (atomic_read(&shost->host_blocked) > 0) {
-               if (busy)
+               if (scsi_host_busy(shost) > 0)
                        goto starved;

Before 6eb045e092ef, the busy count was incremented with membarrier
before looking at "host_blocked". The new code does this instead:

@ -1403,6 +1400,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
                spin_unlock_irq(shost->host_lock);
        }
 
+       __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+

but it happens *after* the "host_blocked" check. Could that perhaps
have caused the regression?

Thanks
Martin
Paul Menzel Jan. 19, 2021, 5:43 p.m. UTC | #7
Dear Martin, dear John, dear Don, dear Linux folks,


Am 19.01.21 um 15:12 schrieb Martin Wilck:
> On Tue, 2021-01-19 at 10:33 +0000, John Garry wrote:
>>>>
>>>> Am 10.12.20 um 21:35 schrieb Don Brace:
>>>>> From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
>>>>>
>>>>> * Correct scsi-mid-layer sending more requests than
>>>>>      exposed host Q depth causing firmware ASSERT issue.
>>>>>      * Add host Qdepth counter.
>>>>
>>>> This supposedly fixes the regression between Linux 5.4 and 5.9,
>>>> which we reported in [1].
>>>>
>>>>        kernel: smartpqi 0000:89:00.0: controller is offline: status code 0x6100c
>>>>        kernel: smartpqi 0000:89:00.0: controller offline
>>>>
>>>> Thank you for looking into this issue and fixing it. We are going
>>>> to test this.
>>>>
>>>> For easily finding these things in the git history or the WWW, it
>>>> would be great if these log messages could be included (in the
>>>> future).
>>>> DON> Thanks for your suggestion. Well add them in the next time.
>>>>
>>>> Also, that means, that the regression is still present in Linux
>>>> 5.10, released yesterday, and this commit does not apply to these
>>>> versions.
>>>>
>>>> DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12
>>>> depending when all of the patches are applied. The patch in
>>>> question is among 28 other patches.
>>>>
>>>> Mahesh, do you have any idea, what commit caused the regression
>>>> and why the issue started to show up?
>>>> DON> The smartpqi driver sets two scsi_host_template member
>>>> fields:
>>>> .can_queue and .nr_hw_queues. But we have not yet converted to
>>>> host_tagset. So the queue_depth becomes nr_hw_queues * can_queue,
>>>> which is more than the hw can support. That can be verified by
>>>> looking at scsi_host.h.
>>>>           /*
>>>>            * In scsi-mq mode, the number of hardware queues supported by the LLD.
>>>>            *
>>>>            * Note: it is assumed that each hardware queue has a queue depth of
>>>>            * can_queue. In other words, the total queue depth per host
>>>>            * is nr_hw_queues * can_queue. However, for when host_tagset is set,
>>>>            * the total queue depth is can_queue.
>>>>            */
>>>>
>>>> So, until we make this change, the queue_depth change prevents
>>>> the above issue from happening.
>>>
>>> can_queue and nr_hw_queues have been set like this as long as the
>>> driver existed. Why did Paul observe a regression with 5.9?
>>>
>>> And why can't you simply set can_queue to (ctrl_info-
>>>> scsi_ml_can_queue / nr_hw_queues)?
>>>
>>> Don: I did this in an internal patch, but this patch seemed to work
>>> the best for our driver. HBA performance remained steady when
>>> running benchmarks.
> 
> That was a stupid suggestion on my part. Sorry.
> 
>> I guess that this is a fallout from commit 6eb045e092ef ("scsi:
>>    core: avoid host-wide host_busy counter for scsi_mq"). But that
>> commit is correct.

John, thank you very much for taking the time to point this out. The 
commit showed first up in Linux 5.5-rc1. (The host template flog 
`host_tagset` was introduced in Linux 5.10-rc1.)

> It would be good if someone (Paul?) could verify whether that commit
> actually caused the regression they saw.
> 
> Looking at that 6eb045e092ef, I notice this hunk:
>   
> -       busy = atomic_inc_return(&shost->host_busy) - 1;
>          if (atomic_read(&shost->host_blocked) > 0) {
> -               if (busy)
> +               if (scsi_host_busy(shost) > 0)
>                          goto starved;
> 
> Before 6eb045e092ef, the busy count was incremented with membarrier
> before looking at "host_blocked". The new code does this instead:
> 
> @ -1403,6 +1400,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
>                  spin_unlock_irq(shost->host_lock);
>          }
>   
> +       __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
> +
> 
> but it happens *after* the "host_blocked" check. Could that perhaps
> have caused the regression?

As we only have production systems with this issue, and Don wrote the 
Microchip team was able to reproduce the issue, it’d be great, if Don 
and his team, could test, if commit 6eb045e092ef introduced the regression.

Also, we still need a path forward how to fix this for the Linux 5.10 
series. Due to the issue dragging on for so long, the 5.9 series has 
reached end of life already.


Kind regards,

Paul
Donald Buczek Jan. 20, 2021, 4:42 p.m. UTC | #8
On 19.01.21 15:12, Martin Wilck wrote:
> On Tue, 2021-01-19 at 10:33 +0000, John Garry wrote:
>>>>
>>>> Am 10.12.20 um 21:35 schrieb Don Brace:
>>>>> From: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com>
>>>>>
>>>>> * Correct scsi-mid-layer sending more requests than
>>>>>      exposed host Q depth causing firmware ASSERT issue.
>>>>>      * Add host Qdepth counter.
>>>>
>>>> This supposedly fixes the regression between Linux 5.4 and 5.9,
>>>> which
>>>> we reported in [1].
>>>>
>>>>        kernel: smartpqi 0000:89:00.0: controller is offline:
>>>> status code
>>>> 0x6100c
>>>>        kernel: smartpqi 0000:89:00.0: controller offline
>>>>
>>>> Thank you for looking into this issue and fixing it. We are going
>>>> to
>>>> test this.
>>>>
>>>> For easily finding these things in the git history or the WWW, it
>>>> would be great if these log messages could be included (in the
>>>> future).
>>>> DON> Thanks for your suggestion. Well add them in the next time.
>>>>
>>>> Also, that means, that the regression is still present in Linux
>>>> 5.10,
>>>> released yesterday, and this commit does not apply to these
>>>> versions.
>>>>
>>>> DON> They have started 5.10-RC7 now. So possibly 5.11 or 5.12
>>>> depending when all of the patches are applied. The patch in
>>>> question
>>>> is among 28 other patches.
>>>>
>>>> Mahesh, do you have any idea, what commit caused the regression
>>>> and
>>>> why the issue started to show up?
>>>> DON> The smartpqi driver sets two scsi_host_template member
>>>> fields:
>>>> .can_queue and .nr_hw_queues. But we have not yet converted to
>>>> host_tagset. So the queue_depth becomes nr_hw_queues * can_queue,
>>>> which is more than the hw can support. That can be verified by
>>>> looking
>>>> at scsi_host.h.
>>>>           /*
>>>>            * In scsi-mq mode, the number of hardware queues
>>>> supported by
>>>> the LLD.
>>>>            *
>>>>            * Note: it is assumed that each hardware queue has a
>>>> queue
>>>> depth of
>>>>            * can_queue. In other words, the total queue depth per
>>>> host
>>>>            * is nr_hw_queues * can_queue. However, for when
>>>> host_tagset
>>>> is set,
>>>>            * the total queue depth is can_queue.
>>>>            */
>>>>
>>>> So, until we make this change, the queue_depth change prevents
>>>> the
>>>> above issue from happening.
>>>
>>> can_queue and nr_hw_queues have been set like this as long as the
>>> driver existed. Why did Paul observe a regression with 5.9?
>>>
>>> And why can't you simply set can_queue to (ctrl_info-
>>>> scsi_ml_can_queue / nr_hw_queues)?
>>>
>>> Don: I did this in an internal patch, but this patch seemed to work
>>> the best for our driver. HBA performance remained steady when
>>> running benchmarks.
> 
> That was a stupid suggestion on my part. Sorry.
> 
>> I guess that this is a fallout from commit 6eb045e092ef ("scsi:
>>    core: avoid host-wide host_busy counter for scsi_mq"). But that
>> commit
>> is correct.
> 
> It would be good if someone (Paul?) could verify whether that commit
> actually caused the regression they saw.

We can reliably trigger the issue with a certain load pattern on a certain hardware.

I've compiled 6eb045e092ef  and got (as with other affected kernels) "controller is offline: status code 0x6100c" after 15 minutes of the test load.
I've compiled 6eb045e092ef^ and the load is running for 3 1/2 hours now.

So you hit it.

> Looking at that 6eb045e092ef, I notice this hunk:
> 
>   
> -       busy = atomic_inc_return(&shost->host_busy) - 1;
>          if (atomic_read(&shost->host_blocked) > 0) {
> -               if (busy)
> +               if (scsi_host_busy(shost) > 0)
>                          goto starved;
> 
> Before 6eb045e092ef, the busy count was incremented with membarrier
> before looking at "host_blocked". The new code does this instead:
> 
> @ -1403,6 +1400,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
>                  spin_unlock_irq(shost->host_lock);
>          }
>   
> +       __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
> +
> 
> but it happens *after* the "host_blocked" check. Could that perhaps
> have caused the regression?

I'm not into this and can't comment on that. But if you need me to test any patch for verification, I'll certainly can do that.

Best
   Donald

>
> 
> Thanks
> Martin
>
Don Brace Jan. 20, 2021, 5:03 p.m. UTC | #9
-----Original Message-----
From: Donald Buczek [mailto:buczek@molgen.mpg.de] 
Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit

>
> It would be good if someone (Paul?) could verify whether that commit 
> actually caused the regression they saw.

We can reliably trigger the issue with a certain load pattern on a certain hardware.

I've compiled 6eb045e092ef  and got (as with other affected kernels) "controller is offline: status code 0x6100c" after 15 minutes of the test load.
I've compiled 6eb045e092ef^ and the load is running for 3 1/2 hours now.

So you hit it.

Don: good news, I was starting my own testing.
Thanks for your help

> Looking at that 6eb045e092ef, I notice this hunk:
>
>
> -       busy = atomic_inc_return(&shost->host_busy) - 1;
>          if (atomic_read(&shost->host_blocked) > 0) {
> -               if (busy)
> +               if (scsi_host_busy(shost) > 0)
>                          goto starved;
>
> Before 6eb045e092ef, the busy count was incremented with membarrier 
> before looking at "host_blocked". The new code does this instead:
>
> @ -1403,6 +1400,8 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
>                  spin_unlock_irq(shost->host_lock);
>          }
>
> +       __set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
> +
>
> but it happens *after* the "host_blocked" check. Could that perhaps 
> have caused the regression?

I'm not into this and can't comment on that. But if you need me to test any patch for verification, I'll certainly can do that.

Best
   Donald

>
>
> Thanks
> Martin
>
Martin Wilck Jan. 20, 2021, 6:35 p.m. UTC | #10
On Wed, 2021-01-20 at 17:42 +0100, Donald Buczek wrote:
> 
> I'm not into this and can't comment on that. But if you need me to
> test any patch for verification, I'll certainly can do that.

I'll send an *experimental* patch.

Thanks
Martin
Don Brace Feb. 10, 2021, 3:27 p.m. UTC | #11
-----Original Message-----
From: John Garry [mailto:john.garry@huawei.com] 
Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit


I think that this can alternatively be solved by setting .host_tagset flag.

Thanks,
John

Don: John, can I add a Suggested-by tag for you for my new patch smartpqi-use-host-wide-tagspace?
John Garry Feb. 10, 2021, 3:42 p.m. UTC | #12
On 10/02/2021 15:27, Don.Brace@microchip.com wrote:
> -----Original Message-----
> From: John Garry [mailto:john.garry@huawei.com]
> Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit
> 
> 
> I think that this can alternatively be solved by setting .host_tagset flag.
> 
> Thanks,
> John
> 
> Don: John, can I add a Suggested-by tag for you for my new patch smartpqi-use-host-wide-tagspace?

I don't mind. And I think that Ming had the same idea.

Thanks,
John
Don Brace Feb. 10, 2021, 4:29 p.m. UTC | #13
-----Original Message-----
From: John Garry [mailto:john.garry@huawei.com] 
Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit

>
>
> I think that this can alternatively be solved by setting .host_tagset flag.
>
> Thanks,
> John
>
> Don: John, can I add a Suggested-by tag for you for my new patch smartpqi-use-host-wide-tagspace?

I don't mind. And I think that Ming had the same idea.

Thanks,
John

Don: Thanks for reminding me. Ming, can I add your Suggested-by tag?
Paul Menzel March 29, 2021, 9:15 p.m. UTC | #14
Dear Linüx folks,


Am 10.02.21 um 17:29 schrieb Don.Brace@microchip.com:
> -----Original Message-----
> From: John Garry [mailto:john.garry@huawei.com]
> Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit
> 
>> I think that this can alternatively be solved by setting .host_tagset flag.
>>
>> Thanks,
>> John
>>
>> Don: John, can I add a Suggested-by tag for you for my new patch smartpqi-use-host-wide-tagspace?
> 
> I don't mind. And I think that Ming had the same idea.

> Don: Thanks for reminding me. Ming, can I add your Suggested-by tag?

It looks like, iterations 4 and 5 of the patch series have been posted 
in the meantime. Unfortunately without the reporters and discussion 
participants in Cc. Linux upstream is still broken since version 5.5.


Kind regards,

Paul


[1]: 
https://lore.kernel.org/linux-scsi/161540568064.19430.11157730901022265360.stgit@brunhilda/
[2]: 
https://lore.kernel.org/linux-scsi/161549045434.25025.17473629602756431540.stgit@brunhilda/
Paul Menzel March 29, 2021, 9:16 p.m. UTC | #15
[Resent from correct address.]

Am 29.03.21 um 23:15 schrieb Paul Menzel:
> Dear Linüx folks,
> 
> 
> Am 10.02.21 um 17:29 schrieb Don.Brace@microchip.com:
>> -----Original Message-----
>> From: John Garry [mailto:john.garry@huawei.com]
>> Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit
>>
>>> I think that this can alternatively be solved by setting .host_tagset 
>>> flag.
>>>
>>> Thanks,
>>> John
>>>
>>> Don: John, can I add a Suggested-by tag for you for my new patch 
>>> smartpqi-use-host-wide-tagspace?
>>
>> I don't mind. And I think that Ming had the same idea.
> 
>> Don: Thanks for reminding me. Ming, can I add your Suggested-by tag?
> 
> It looks like, iterations 4 and 5 of the patch series have been posted 
> in the meantime. Unfortunately without the reporters and discussion 
> participants in Cc. Linux upstream is still broken since version 5.5.
> 
> 
> Kind regards,
> 
> Paul
> 
> 
> [1]: https://lore.kernel.org/linux-scsi/161540568064.19430.11157730901022265360.stgit@brunhilda/
> [2]: https://lore.kernel.org/linux-scsi/161549045434.25025.17473629602756431540.stgit@brunhilda/
Donald Buczek March 30, 2021, 2:37 p.m. UTC | #16
Dear Paul,

On 29.03.21 23:16, Paul Menzel wrote:
> [Resent from correct address.]
> 
> Am 29.03.21 um 23:15 schrieb Paul Menzel:
>> Dear Linüx folks,
>>
>>
>> Am 10.02.21 um 17:29 schrieb Don.Brace@microchip.com:
>>> -----Original Message-----
>>> From: John Garry [mailto:john.garry@huawei.com]
>>> Subject: Re: [PATCH V3 15/25] smartpqi: fix host qdepth limit
>>>
>>>> I think that this can alternatively be solved by setting .host_tagset flag.
>>>>
>>>> Thanks,
>>>> John
>>>>
>>>> Don: John, can I add a Suggested-by tag for you for my new patch smartpqi-use-host-wide-tagspace?
>>>
>>> I don't mind. And I think that Ming had the same idea.
>>
>>> Don: Thanks for reminding me. Ming, can I add your Suggested-by tag?
>>
>> It looks like, iterations 4 and 5 of the patch series have been posted in the meantime. Unfortunately without the reporters and discussion participants in Cc. Linux upstream is still broken since version 5.5.

When "smartpqi: use host wide tagspace" [1] goes into mainline, we can submit it to stable, if nobody else does. This fixes the original problem and we got a patch with the same code change running in our 5.10 kernels.

Best
   Donald

[1]: https://lore.kernel.org/linux-scsi/161549369787.25025.8975999483518581619.stgit@brunhilda/

>>
>>
>> Kind regards,
>>
>> Paul
>>
>>
>> [1]: https://lore.kernel.org/linux-scsi/161540568064.19430.11157730901022265360.stgit@brunhilda/
>> [2]: https://lore.kernel.org/linux-scsi/161549045434.25025.17473629602756431540.stgit@brunhilda/
diff mbox series

Patch

diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index 0b94c755a74c..c3b103b15924 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1345,6 +1345,8 @@  struct pqi_ctrl_info {
 	struct work_struct ofa_quiesce_work;
 	u32		ofa_bytes_requested;
 	u16		ofa_cancel_reason;
+
+	atomic_t	total_scmds_outstanding;
 };
 
 enum pqi_ctrl_mode {
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 082b17e9bd80..4e088f47d95f 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -5578,6 +5578,8 @@  static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
 void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
 {
 	struct pqi_scsi_dev *device;
+	struct pqi_ctrl_info *ctrl_info;
+	struct Scsi_Host *shost;
 
 	if (!scmd->device) {
 		set_host_byte(scmd, DID_NO_CONNECT);
@@ -5590,7 +5592,11 @@  void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
 		return;
 	}
 
+	shost = scmd->device->host;
+	ctrl_info = shost_to_hba(shost);
+
 	atomic_dec(&device->scsi_cmds_outstanding);
+	atomic_dec(&ctrl_info->total_scmds_outstanding);
 }
 
 static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info,
@@ -5678,6 +5684,7 @@  static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 	bool raid_bypassed;
 
 	device = scmd->device->hostdata;
+	ctrl_info = shost_to_hba(shost);
 
 	if (!device) {
 		set_host_byte(scmd, DID_NO_CONNECT);
@@ -5686,8 +5693,11 @@  static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 	}
 
 	atomic_inc(&device->scsi_cmds_outstanding);
-
-	ctrl_info = shost_to_hba(shost);
+	if (atomic_inc_return(&ctrl_info->total_scmds_outstanding) >
+		ctrl_info->scsi_ml_can_queue) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	}
 
 	if (pqi_ctrl_offline(ctrl_info) || pqi_device_in_remove(device)) {
 		set_host_byte(scmd, DID_NO_CONNECT);
@@ -5730,8 +5740,10 @@  static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
 	}
 
 out:
-	if (rc)
+	if (rc) {
 		atomic_dec(&device->scsi_cmds_outstanding);
+		atomic_dec(&ctrl_info->total_scmds_outstanding);
+	}
 
 	return rc;
 }
@@ -8054,6 +8066,7 @@  static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
 
 	INIT_WORK(&ctrl_info->event_work, pqi_event_worker);
 	atomic_set(&ctrl_info->num_interrupts, 0);
+	atomic_set(&ctrl_info->total_scmds_outstanding, 0);
 
 	INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
 	INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);