diff mbox series

[2/2] scsi: ufs: Do not exit ufshcd_err_handler() unless operational or dead

Message ID 20211002154550.128511-3-adrian.hunter@intel.com (mailing list archive)
State Accepted
Headers show
Series scsi: ufs: Do not exit reset of error functions unless operational | expand

Commit Message

Adrian Hunter Oct. 2, 2021, 3:45 p.m. UTC
Callers of ufshcd_err_handler() expect it to return in an operational
state. However, the code does not check the state before exiting.

Add a check for the state and perform retries until either success or the
maximum number of retries is reached.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

Comments

Avri Altman Oct. 3, 2021, 6:47 a.m. UTC | #1
> Callers of ufshcd_err_handler() expect it to return in an operational
> state. However, the code does not check the state before exiting.
> 
> Add a check for the state and perform retries until either success or the
> maximum number of retries is reached.
> 
> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
> ---
>  drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++-----
>  1 file changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 16492779d3a6..33f55ecf43de 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -64,6 +64,9 @@
>  /* maximum number of reset retries before giving up */
>  #define MAX_HOST_RESET_RETRIES 5
> 
> +/* Maximum number of error handler retries before giving up */
> +#define MAX_ERR_HANDLER_RETRIES 5
> +
>  /* Expose the flag value from utp_upiu_query.value */
>  #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
> 
> @@ -6070,12 +6073,14 @@ static bool
> ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
>  static void ufshcd_err_handler(struct Scsi_Host *host)
>  {
>         struct ufs_hba *hba = shost_priv(host);
> +       int retries = MAX_ERR_HANDLER_RETRIES;
>         unsigned long flags;
> -       bool err_xfer = false;
> -       bool err_tm = false;
> -       int err = 0, pmc_err;
> -       int tag;
> -       bool needs_reset = false, needs_restore = false;
> +       bool needs_restore;
> +       bool needs_reset;
> +       bool err_xfer;
> +       bool err_tm;
> +       int pmc_err;
> +       int tag;
> 
>         down(&hba->host_sem);
>         spin_lock_irqsave(hba->host->host_lock, flags);
> @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host
> *host)
>         /* Complete requests that have door-bell cleared by h/w */
>         ufshcd_complete_requests(hba);
>         spin_lock_irqsave(hba->host->host_lock, flags);
> +again:
> +       needs_restore = false;
> +       needs_reset = false;
> +       err_xfer = false;
> +       err_tm = false;
> +
>         if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
>                 hba->ufshcd_state = UFSHCD_STATE_RESET;
>         /*
> @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host
> *host)
>  do_reset:
>         /* Fatal errors need reset */
>         if (needs_reset) {
> +               int err;
> +
>                 hba->force_reset = false;
>                 spin_unlock_irqrestore(hba->host->host_lock, flags);
>                 err = ufshcd_reset_and_restore(hba);
> @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host
> *host)
>                         dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x
> saved_uic_err 0x%x",
>                             __func__, hba->saved_err, hba->saved_uic_err);
>         }
> +       /* Exit in an operational state or dead */
> +       if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL &&
> +           hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> +               if (--retries)
> +                       goto again;
Why do you need to retry here as well?
ufshcd_reset_and_restore() already exists only if operational or dead?

Thanks,
Avri

> +               hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +       }
>         ufshcd_clear_eh_in_progress(hba);
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
>         ufshcd_err_handling_unprepare(hba);
> --
> 2.25.1
Adrian Hunter Oct. 3, 2021, 7:10 a.m. UTC | #2
On 03/10/2021 09:47, Avri Altman wrote:
>> Callers of ufshcd_err_handler() expect it to return in an operational
>> state. However, the code does not check the state before exiting.
>>
>> Add a check for the state and perform retries until either success or the
>> maximum number of retries is reached.
>>
>> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
>> ---
>>  drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++-----
>>  1 file changed, 25 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index 16492779d3a6..33f55ecf43de 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -64,6 +64,9 @@
>>  /* maximum number of reset retries before giving up */
>>  #define MAX_HOST_RESET_RETRIES 5
>>
>> +/* Maximum number of error handler retries before giving up */
>> +#define MAX_ERR_HANDLER_RETRIES 5
>> +
>>  /* Expose the flag value from utp_upiu_query.value */
>>  #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
>>
>> @@ -6070,12 +6073,14 @@ static bool
>> ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
>>  static void ufshcd_err_handler(struct Scsi_Host *host)
>>  {
>>         struct ufs_hba *hba = shost_priv(host);
>> +       int retries = MAX_ERR_HANDLER_RETRIES;
>>         unsigned long flags;
>> -       bool err_xfer = false;
>> -       bool err_tm = false;
>> -       int err = 0, pmc_err;
>> -       int tag;
>> -       bool needs_reset = false, needs_restore = false;
>> +       bool needs_restore;
>> +       bool needs_reset;
>> +       bool err_xfer;
>> +       bool err_tm;
>> +       int pmc_err;
>> +       int tag;
>>
>>         down(&hba->host_sem);
>>         spin_lock_irqsave(hba->host->host_lock, flags);
>> @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host
>> *host)
>>         /* Complete requests that have door-bell cleared by h/w */
>>         ufshcd_complete_requests(hba);
>>         spin_lock_irqsave(hba->host->host_lock, flags);
>> +again:
>> +       needs_restore = false;
>> +       needs_reset = false;
>> +       err_xfer = false;
>> +       err_tm = false;
>> +
>>         if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
>>                 hba->ufshcd_state = UFSHCD_STATE_RESET;
>>         /*
>> @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host
>> *host)
>>  do_reset:
>>         /* Fatal errors need reset */
>>         if (needs_reset) {
>> +               int err;
>> +
>>                 hba->force_reset = false;
>>                 spin_unlock_irqrestore(hba->host->host_lock, flags);
>>                 err = ufshcd_reset_and_restore(hba);
>> @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host
>> *host)
>>                         dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x
>> saved_uic_err 0x%x",
>>                             __func__, hba->saved_err, hba->saved_uic_err);
>>         }
>> +       /* Exit in an operational state or dead */
>> +       if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL &&
>> +           hba->ufshcd_state != UFSHCD_STATE_ERROR) {
>> +               if (--retries)
>> +                       goto again;
> Why do you need to retry here as well?

Thanks for looking at this.

It shouldn't hurt to retry bringing the device back to life.  The
alternative is UFSHCD_STATE_ERROR which means dead.

> ufshcd_reset_and_restore() already exists only if operational or dead?

ufshcd_reset_and_restore() isn't the only path.  There are also
ufshcd_quirk_dl_nac_errors() and ufshcd_config_pwr_mode() and in
the future perhaps others.

This seems the right place to ensure that the error handler
guarantees operational (or dead) status.

> 
> Thanks,
> Avri
> 
>> +               hba->ufshcd_state = UFSHCD_STATE_ERROR;
>> +       }
>>         ufshcd_clear_eh_in_progress(hba);
>>         spin_unlock_irqrestore(hba->host->host_lock, flags);
>>         ufshcd_err_handling_unprepare(hba);
>> --
>> 2.25.1
>
Avri Altman Oct. 3, 2021, 7:25 a.m. UTC | #3
> >>                 hba->force_reset = false;
> >>                 spin_unlock_irqrestore(hba->host->host_lock, flags);
> >>                 err = ufshcd_reset_and_restore(hba); @@ -6232,6
> >> +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host
> >> *host)
> >>                         dev_err_ratelimited(hba->dev, "%s: exit:
> >> saved_err 0x%x saved_uic_err 0x%x",
> >>                             __func__, hba->saved_err, hba->saved_uic_err);
> >>         }
> >> +       /* Exit in an operational state or dead */
> >> +       if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL &&
> >> +           hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> >> +               if (--retries)
> >> +                       goto again;
> > Why do you need to retry here as well?
> 
> Thanks for looking at this.
> 
> It shouldn't hurt to retry bringing the device back to life.  The alternative is
> UFSHCD_STATE_ERROR which means dead.
> 
> > ufshcd_reset_and_restore() already exists only if operational or dead?
> 
> ufshcd_reset_and_restore() isn't the only path.  There are also
> ufshcd_quirk_dl_nac_errors() and ufshcd_config_pwr_mode() and in the
> future perhaps others.
> 
> This seems the right place to ensure that the error handler guarantees
> operational (or dead) status.
OK.  Thanks.
Avri
Avri Altman Oct. 3, 2021, 7:26 a.m. UTC | #4
> 
> Callers of ufshcd_err_handler() expect it to return in an operational
> state. However, the code does not check the state before exiting.
> 
> Add a check for the state and perform retries until either success or the
> maximum number of retries is reached.
> 
> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Avri Altman <avri.altman@wdc.com>


> ---
>  drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++-----
>  1 file changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 16492779d3a6..33f55ecf43de 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -64,6 +64,9 @@
>  /* maximum number of reset retries before giving up */
>  #define MAX_HOST_RESET_RETRIES 5
> 
> +/* Maximum number of error handler retries before giving up */
> +#define MAX_ERR_HANDLER_RETRIES 5
> +
>  /* Expose the flag value from utp_upiu_query.value */
>  #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
> 
> @@ -6070,12 +6073,14 @@ static bool
> ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
>  static void ufshcd_err_handler(struct Scsi_Host *host)
>  {
>         struct ufs_hba *hba = shost_priv(host);
> +       int retries = MAX_ERR_HANDLER_RETRIES;
>         unsigned long flags;
> -       bool err_xfer = false;
> -       bool err_tm = false;
> -       int err = 0, pmc_err;
> -       int tag;
> -       bool needs_reset = false, needs_restore = false;
> +       bool needs_restore;
> +       bool needs_reset;
> +       bool err_xfer;
> +       bool err_tm;
> +       int pmc_err;
> +       int tag;
> 
>         down(&hba->host_sem);
>         spin_lock_irqsave(hba->host->host_lock, flags);
> @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host
> *host)
>         /* Complete requests that have door-bell cleared by h/w */
>         ufshcd_complete_requests(hba);
>         spin_lock_irqsave(hba->host->host_lock, flags);
> +again:
> +       needs_restore = false;
> +       needs_reset = false;
> +       err_xfer = false;
> +       err_tm = false;
> +
>         if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
>                 hba->ufshcd_state = UFSHCD_STATE_RESET;
>         /*
> @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host
> *host)
>  do_reset:
>         /* Fatal errors need reset */
>         if (needs_reset) {
> +               int err;
> +
>                 hba->force_reset = false;
>                 spin_unlock_irqrestore(hba->host->host_lock, flags);
>                 err = ufshcd_reset_and_restore(hba);
> @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host
> *host)
>                         dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x
> saved_uic_err 0x%x",
>                             __func__, hba->saved_err, hba->saved_uic_err);
>         }
> +       /* Exit in an operational state or dead */
> +       if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL &&
> +           hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> +               if (--retries)
> +                       goto again;
> +               hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +       }
>         ufshcd_clear_eh_in_progress(hba);
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
>         ufshcd_err_handling_unprepare(hba);
> --
> 2.25.1
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 16492779d3a6..33f55ecf43de 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -64,6 +64,9 @@ 
 /* maximum number of reset retries before giving up */
 #define MAX_HOST_RESET_RETRIES 5
 
+/* Maximum number of error handler retries before giving up */
+#define MAX_ERR_HANDLER_RETRIES 5
+
 /* Expose the flag value from utp_upiu_query.value */
 #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
 
@@ -6070,12 +6073,14 @@  static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
 static void ufshcd_err_handler(struct Scsi_Host *host)
 {
 	struct ufs_hba *hba = shost_priv(host);
+	int retries = MAX_ERR_HANDLER_RETRIES;
 	unsigned long flags;
-	bool err_xfer = false;
-	bool err_tm = false;
-	int err = 0, pmc_err;
-	int tag;
-	bool needs_reset = false, needs_restore = false;
+	bool needs_restore;
+	bool needs_reset;
+	bool err_xfer;
+	bool err_tm;
+	int pmc_err;
+ 	int tag;
 
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
@@ -6093,6 +6098,12 @@  static void ufshcd_err_handler(struct Scsi_Host *host)
 	/* Complete requests that have door-bell cleared by h/w */
 	ufshcd_complete_requests(hba);
 	spin_lock_irqsave(hba->host->host_lock, flags);
+again:
+	needs_restore = false;
+	needs_reset = false;
+	err_xfer = false;
+	err_tm = false;
+
 	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
 		hba->ufshcd_state = UFSHCD_STATE_RESET;
 	/*
@@ -6213,6 +6224,8 @@  static void ufshcd_err_handler(struct Scsi_Host *host)
 do_reset:
 	/* Fatal errors need reset */
 	if (needs_reset) {
+		int err;
+
 		hba->force_reset = false;
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		err = ufshcd_reset_and_restore(hba);
@@ -6232,6 +6245,13 @@  static void ufshcd_err_handler(struct Scsi_Host *host)
 			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
 			    __func__, hba->saved_err, hba->saved_uic_err);
 	}
+	/* Exit in an operational state or dead */
+	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL &&
+	    hba->ufshcd_state != UFSHCD_STATE_ERROR) {
+		if (--retries)
+			goto again;
+		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+	}
 	ufshcd_clear_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	ufshcd_err_handling_unprepare(hba);