diff mbox series

[RFC,v3] scsi: ufs: Quiesce all scsi devices before shutdown

Message ID 20200706132218.21171-1-stanley.chu@mediatek.com (mailing list archive)
State New, archived
Headers show
Series [RFC,v3] scsi: ufs: Quiesce all scsi devices before shutdown | expand

Commit Message

Stanley Chu July 6, 2020, 1:22 p.m. UTC
Currently I/O request could be still submitted to UFS device while
UFS is working on shutdown flow. This may lead to racing as below
scenarios and finally system may crash due to unclocked register
accesses.

To fix this kind of issues, specifically quiesce all SCSI devices
before UFS shutdown to block all I/O request sending from block
layer.

Example of racing scenario: While UFS device is runtime-suspended

Thread #1: Executing UFS shutdown flow, e.g.,
           ufshcd_suspend(UFS_SHUTDOWN_PM)
Thread #2: Executing runtime resume flow triggered by I/O request,
           e.g., ufshcd_resume(UFS_RUNTIME_PM)

This breaks the assumption that UFS PM flows can not be running
concurrently and some unexpected racing behavior may happen.

Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
---
 drivers/scsi/ufs/ufshcd.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Stanley Chu July 12, 2020, 1:31 a.m. UTC | #1
Hi Bart, Avri,

May I know if you have any suggestion for this RFC fix?

Very appreciated : )

On Mon, 2020-07-06 at 21:22 +0800, Stanley Chu wrote:
> Currently I/O request could be still submitted to UFS device while
> UFS is working on shutdown flow. This may lead to racing as below
> scenarios and finally system may crash due to unclocked register
> accesses.
> 
> To fix this kind of issues, specifically quiesce all SCSI devices
> before UFS shutdown to block all I/O request sending from block
> layer.
> 
> Example of racing scenario: While UFS device is runtime-suspended
> 
> Thread #1: Executing UFS shutdown flow, e.g.,
>            ufshcd_suspend(UFS_SHUTDOWN_PM)
> Thread #2: Executing runtime resume flow triggered by I/O request,
>            e.g., ufshcd_resume(UFS_RUNTIME_PM)
> 
> This breaks the assumption that UFS PM flows can not be running
> concurrently and some unexpected racing behavior may happen.
> 
> Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
> ---
>  drivers/scsi/ufs/ufshcd.c | 38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 59358bb75014..104173c03492 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -158,6 +158,12 @@ struct ufs_pm_lvl_states ufs_pm_lvl_states[] = {
>  	{UFS_POWERDOWN_PWR_MODE, UIC_LINK_OFF_STATE},
>  };
>  
> +#define ufshcd_scsi_for_each_sdev(fn) \
> +	list_for_each_entry(starget, &hba->host->__targets, siblings) { \
> +		__starget_for_each_device(starget, NULL, \
> +					  fn); \
> +	}
> +
>  static inline enum ufs_dev_pwr_mode
>  ufs_get_pm_lvl_to_dev_pwr_mode(enum ufs_pm_level lvl)
>  {
> @@ -8588,6 +8594,19 @@ int ufshcd_runtime_idle(struct ufs_hba *hba)
>  }
>  EXPORT_SYMBOL(ufshcd_runtime_idle);
>  
> +static void ufshcd_cleanup_queue(struct scsi_device *sdev, void *data)
> +{
> +	if (sdev->request_queue)
> +		blk_cleanup_queue(sdev->request_queue);
> +}
> +
> +static void ufshcd_quiece_sdev(struct scsi_device *sdev, void *data)
> +{
> +	/* Suspended devices are already quiecsed and shall be skipped */
> +	if (!pm_runtime_suspended(&sdev->sdev_gendev))
> +		scsi_device_quiesce(sdev);
> +}
> +
>  /**
>   * ufshcd_shutdown - shutdown routine
>   * @hba: per adapter instance
> @@ -8599,6 +8618,7 @@ EXPORT_SYMBOL(ufshcd_runtime_idle);
>  int ufshcd_shutdown(struct ufs_hba *hba)
>  {
>  	int ret = 0;
> +	struct scsi_target *starget;
>  
>  	if (!hba->is_powered)
>  		goto out;
> @@ -8612,7 +8632,25 @@ int ufshcd_shutdown(struct ufs_hba *hba)
>  			goto out;
>  	}
>  
> +	/*
> +	 * Quiesce all SCSI devices to prevent any non-PM requests sending
> +	 * from block layer during and after shutdown.
> +	 *
> +	 * Here we can not use blk_cleanup_queue() since PM requests
> +	 * (with BLK_MQ_REQ_PREEMPT flag) are still required to be sent
> +	 * through block layer. Therefore SCSI command queued after the
> +	 * scsi_target_quiesce() call returned will block until
> +	 * blk_cleanup_queue() is called.
> +	 *
> +	 * Besides, scsi_target_"un"quiesce (e.g., scsi_target_resume) can
> +	 * be ignored since shutdown is one-way flow.
> +	 */
> +	ufshcd_scsi_for_each_sdev(ufshcd_quiece_sdev);
> +
>  	ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM);
> +
> +	/* Set queue as dying to not block queueing commands */
> +	ufshcd_scsi_for_each_sdev(ufshcd_cleanup_queue);
>  out:
>  	if (ret)
>  		dev_err(hba->dev, "%s failed, err %d\n", __func__, ret);
Bart Van Assche July 12, 2020, 3:21 a.m. UTC | #2
On 2020-07-06 06:22, Stanley Chu wrote:
> +static void ufshcd_cleanup_queue(struct scsi_device *sdev, void *data)
> +{
> +	if (sdev->request_queue)
> +		blk_cleanup_queue(sdev->request_queue);
> +}

No SCSI LLD should ever call blk_cleanup_queue() directly for
sdev->request_queue. Only the SCSI core should call blk_cleanup_queue()
directly for that queue.

>  int ufshcd_shutdown(struct ufs_hba *hba)
>  {
>  	int ret = 0;
> +	struct scsi_target *starget;
>  
>  	if (!hba->is_powered)
>  		goto out;
> @@ -8612,7 +8632,25 @@ int ufshcd_shutdown(struct ufs_hba *hba)
>  			goto out;
>  	}
>  
> +	/*
> +	 * Quiesce all SCSI devices to prevent any non-PM requests sending
> +	 * from block layer during and after shutdown.
> +	 *
> +	 * Here we can not use blk_cleanup_queue() since PM requests
> +	 * (with BLK_MQ_REQ_PREEMPT flag) are still required to be sent
> +	 * through block layer. Therefore SCSI command queued after the
> +	 * scsi_target_quiesce() call returned will block until
> +	 * blk_cleanup_queue() is called.
> +	 *
> +	 * Besides, scsi_target_"un"quiesce (e.g., scsi_target_resume) can
> +	 * be ignored since shutdown is one-way flow.
> +	 */
> +	ufshcd_scsi_for_each_sdev(ufshcd_quiece_sdev);
> +
>  	ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM);
> +
> +	/* Set queue as dying to not block queueing commands */
> +	ufshcd_scsi_for_each_sdev(ufshcd_cleanup_queue);
>  out:
>  	if (ret)
>  		dev_err(hba->dev, "%s failed, err %d\n", __func__, ret);
> 

What is the purpose of ufshcd_shutdown()? Why does this function exist?
How about removing the calls to ufshcd_shutdown() and invoking power down
code from inside sd_suspend_common() instead?

Thanks,

Bart.
Stanley Chu July 22, 2020, 9:18 a.m. UTC | #3
Hi Bart,

On Sat, 2020-07-11 at 20:21 -0700, Bart Van Assche wrote:
> On 2020-07-06 06:22, Stanley Chu wrote:
> > +static void ufshcd_cleanup_queue(struct scsi_device *sdev, void *data)
> > +{
> > +	if (sdev->request_queue)
> > +		blk_cleanup_queue(sdev->request_queue);
> > +}
> 
> No SCSI LLD should ever call blk_cleanup_queue() directly for
> sdev->request_queue. Only the SCSI core should call blk_cleanup_queue()
> directly for that queue.

Got it.

So may I focus on fixing racing first by quiecsing all SCSI devices only
and do not touch blk_cleanup_queue() in UFS driver, just like v2?


> >  int ufshcd_shutdown(struct ufs_hba *hba)
> >  {
> >  	int ret = 0;
> > +	struct scsi_target *starget;
> >  
> >  	if (!hba->is_powered)
> >  		goto out;
> > @@ -8612,7 +8632,25 @@ int ufshcd_shutdown(struct ufs_hba *hba)
> >  			goto out;
> >  	}
> >  
> > +	/*
> > +	 * Quiesce all SCSI devices to prevent any non-PM requests sending
> > +	 * from block layer during and after shutdown.
> > +	 *
> > +	 * Here we can not use blk_cleanup_queue() since PM requests
> > +	 * (with BLK_MQ_REQ_PREEMPT flag) are still required to be sent
> > +	 * through block layer. Therefore SCSI command queued after the
> > +	 * scsi_target_quiesce() call returned will block until
> > +	 * blk_cleanup_queue() is called.
> > +	 *
> > +	 * Besides, scsi_target_"un"quiesce (e.g., scsi_target_resume) can
> > +	 * be ignored since shutdown is one-way flow.
> > +	 */
> > +	ufshcd_scsi_for_each_sdev(ufshcd_quiece_sdev);
> > +
> >  	ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM);
> > +
> > +	/* Set queue as dying to not block queueing commands */
> > +	ufshcd_scsi_for_each_sdev(ufshcd_cleanup_queue);
> >  out:
> >  	if (ret)
> >  		dev_err(hba->dev, "%s failed, err %d\n", __func__, ret);
> > 
> 
> What is the purpose of ufshcd_shutdown()? Why does this function exist?
> How about removing the calls to ufshcd_shutdown() and invoking power down
> code from inside sd_suspend_common() instead?

ufshcd_shutdown() configures below things different from or more than
what sd_suspend_common() can do now,

- Set link as OFF state
- Regulator and clock toggling according to required low-power state for
shutdown
- Auto BKOP toggling
- Vendor-specific shutdown flow ...etc.

Therefore UFS shutdown callback would be still required.

Thanks,
Stanley Chu
Avri Altman July 27, 2020, 10:46 a.m. UTC | #4
> 
> Hi Bart,
> 
> On Sat, 2020-07-11 at 20:21 -0700, Bart Van Assche wrote:
> > On 2020-07-06 06:22, Stanley Chu wrote:
> > > +static void ufshcd_cleanup_queue(struct scsi_device *sdev, void *data)
> > > +{
> > > +   if (sdev->request_queue)
> > > +           blk_cleanup_queue(sdev->request_queue);
> > > +}
> >
> > No SCSI LLD should ever call blk_cleanup_queue() directly for
> > sdev->request_queue. Only the SCSI core should call blk_cleanup_queue()
> > directly for that queue.
> 
> Got it.
> 
> So may I focus on fixing racing first by quiecsing all SCSI devices only
> and do not touch blk_cleanup_queue() in UFS driver, just like v2?
> 
> 
> > >  int ufshcd_shutdown(struct ufs_hba *hba)
> > >  {
> > >     int ret = 0;
> > > +   struct scsi_target *starget;
> > >
> > >     if (!hba->is_powered)
> > >             goto out;
> > > @@ -8612,7 +8632,25 @@ int ufshcd_shutdown(struct ufs_hba *hba)
> > >                     goto out;
> > >     }
> > >
> > > +   /*
> > > +    * Quiesce all SCSI devices to prevent any non-PM requests sending
> > > +    * from block layer during and after shutdown.
> > > +    *
> > > +    * Here we can not use blk_cleanup_queue() since PM requests
> > > +    * (with BLK_MQ_REQ_PREEMPT flag) are still required to be sent
> > > +    * through block layer. Therefore SCSI command queued after the
> > > +    * scsi_target_quiesce() call returned will block until
> > > +    * blk_cleanup_queue() is called.
> > > +    *
> > > +    * Besides, scsi_target_"un"quiesce (e.g., scsi_target_resume) can
> > > +    * be ignored since shutdown is one-way flow.
> > > +    */
> > > +   ufshcd_scsi_for_each_sdev(ufshcd_quiece_sdev);
> > > +
> > >     ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM);
> > > +
> > > +   /* Set queue as dying to not block queueing commands */
> > > +   ufshcd_scsi_for_each_sdev(ufshcd_cleanup_queue);
> > >  out:
> > >     if (ret)
> > >             dev_err(hba->dev, "%s failed, err %d\n", __func__, ret);
> > >
> >
> > What is the purpose of ufshcd_shutdown()? Why does this function exist?
> > How about removing the calls to ufshcd_shutdown() and invoking power
> down
> > code from inside sd_suspend_common() instead?
> 
> ufshcd_shutdown() configures below things different from or more than
> what sd_suspend_common() can do now,
> 
> - Set link as OFF state
> - Regulator and clock toggling according to required low-power state for
> shutdown
> - Auto BKOP toggling
> - Vendor-specific shutdown flow ...etc.
> 
> Therefore UFS shutdown callback would be still required.
And this is also why, that each chipset vendor implement its own dev_pm_ops.

Thanks,
Avri

> 
> Thanks,
> Stanley Chu
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 59358bb75014..104173c03492 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -158,6 +158,12 @@  struct ufs_pm_lvl_states ufs_pm_lvl_states[] = {
 	{UFS_POWERDOWN_PWR_MODE, UIC_LINK_OFF_STATE},
 };
 
+#define ufshcd_scsi_for_each_sdev(fn) \
+	list_for_each_entry(starget, &hba->host->__targets, siblings) { \
+		__starget_for_each_device(starget, NULL, \
+					  fn); \
+	}
+
 static inline enum ufs_dev_pwr_mode
 ufs_get_pm_lvl_to_dev_pwr_mode(enum ufs_pm_level lvl)
 {
@@ -8588,6 +8594,19 @@  int ufshcd_runtime_idle(struct ufs_hba *hba)
 }
 EXPORT_SYMBOL(ufshcd_runtime_idle);
 
+static void ufshcd_cleanup_queue(struct scsi_device *sdev, void *data)
+{
+	if (sdev->request_queue)
+		blk_cleanup_queue(sdev->request_queue);
+}
+
+static void ufshcd_quiece_sdev(struct scsi_device *sdev, void *data)
+{
+	/* Suspended devices are already quiecsed and shall be skipped */
+	if (!pm_runtime_suspended(&sdev->sdev_gendev))
+		scsi_device_quiesce(sdev);
+}
+
 /**
  * ufshcd_shutdown - shutdown routine
  * @hba: per adapter instance
@@ -8599,6 +8618,7 @@  EXPORT_SYMBOL(ufshcd_runtime_idle);
 int ufshcd_shutdown(struct ufs_hba *hba)
 {
 	int ret = 0;
+	struct scsi_target *starget;
 
 	if (!hba->is_powered)
 		goto out;
@@ -8612,7 +8632,25 @@  int ufshcd_shutdown(struct ufs_hba *hba)
 			goto out;
 	}
 
+	/*
+	 * Quiesce all SCSI devices to prevent any non-PM requests sending
+	 * from block layer during and after shutdown.
+	 *
+	 * Here we can not use blk_cleanup_queue() since PM requests
+	 * (with BLK_MQ_REQ_PREEMPT flag) are still required to be sent
+	 * through block layer. Therefore SCSI command queued after the
+	 * scsi_target_quiesce() call returned will block until
+	 * blk_cleanup_queue() is called.
+	 *
+	 * Besides, scsi_target_"un"quiesce (e.g., scsi_target_resume) can
+	 * be ignored since shutdown is one-way flow.
+	 */
+	ufshcd_scsi_for_each_sdev(ufshcd_quiece_sdev);
+
 	ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM);
+
+	/* Set queue as dying to not block queueing commands */
+	ufshcd_scsi_for_each_sdev(ufshcd_cleanup_queue);
 out:
 	if (ret)
 		dev_err(hba->dev, "%s failed, err %d\n", __func__, ret);