Message ID | 20211002154550.128511-3-adrian.hunter@intel.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | scsi: ufs: Do not exit reset of error functions unless operational | expand |
> Callers of ufshcd_err_handler() expect it to return in an operational > state. However, the code does not check the state before exiting. > > Add a check for the state and perform retries until either success or the > maximum number of retries is reached. > > Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> > --- > drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++----- > 1 file changed, 25 insertions(+), 5 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 16492779d3a6..33f55ecf43de 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -64,6 +64,9 @@ > /* maximum number of reset retries before giving up */ > #define MAX_HOST_RESET_RETRIES 5 > > +/* Maximum number of error handler retries before giving up */ > +#define MAX_ERR_HANDLER_RETRIES 5 > + > /* Expose the flag value from utp_upiu_query.value */ > #define MASK_QUERY_UPIU_FLAG_LOC 0xFF > > @@ -6070,12 +6073,14 @@ static bool > ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) > static void ufshcd_err_handler(struct Scsi_Host *host) > { > struct ufs_hba *hba = shost_priv(host); > + int retries = MAX_ERR_HANDLER_RETRIES; > unsigned long flags; > - bool err_xfer = false; > - bool err_tm = false; > - int err = 0, pmc_err; > - int tag; > - bool needs_reset = false, needs_restore = false; > + bool needs_restore; > + bool needs_reset; > + bool err_xfer; > + bool err_tm; > + int pmc_err; > + int tag; > > down(&hba->host_sem); > spin_lock_irqsave(hba->host->host_lock, flags); > @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host > *host) > /* Complete requests that have door-bell cleared by h/w */ > ufshcd_complete_requests(hba); > spin_lock_irqsave(hba->host->host_lock, flags); > +again: > + needs_restore = false; > + needs_reset = false; > + err_xfer = false; > + err_tm = false; > + > if (hba->ufshcd_state != UFSHCD_STATE_ERROR) > hba->ufshcd_state = UFSHCD_STATE_RESET; > /* > @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host > *host) > do_reset: > /* Fatal errors need reset */ > if (needs_reset) { > + int err; > + > hba->force_reset = false; > spin_unlock_irqrestore(hba->host->host_lock, flags); > err = ufshcd_reset_and_restore(hba); > @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host > *host) > dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x > saved_uic_err 0x%x", > __func__, hba->saved_err, hba->saved_uic_err); > } > + /* Exit in an operational state or dead */ > + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && > + hba->ufshcd_state != UFSHCD_STATE_ERROR) { > + if (--retries) > + goto again; Why do you need to retry here as well? ufshcd_reset_and_restore() already exists only if operational or dead? Thanks, Avri > + hba->ufshcd_state = UFSHCD_STATE_ERROR; > + } > ufshcd_clear_eh_in_progress(hba); > spin_unlock_irqrestore(hba->host->host_lock, flags); > ufshcd_err_handling_unprepare(hba); > -- > 2.25.1
On 03/10/2021 09:47, Avri Altman wrote: >> Callers of ufshcd_err_handler() expect it to return in an operational >> state. However, the code does not check the state before exiting. >> >> Add a check for the state and perform retries until either success or the >> maximum number of retries is reached. >> >> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> >> --- >> drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++----- >> 1 file changed, 25 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c >> index 16492779d3a6..33f55ecf43de 100644 >> --- a/drivers/scsi/ufs/ufshcd.c >> +++ b/drivers/scsi/ufs/ufshcd.c >> @@ -64,6 +64,9 @@ >> /* maximum number of reset retries before giving up */ >> #define MAX_HOST_RESET_RETRIES 5 >> >> +/* Maximum number of error handler retries before giving up */ >> +#define MAX_ERR_HANDLER_RETRIES 5 >> + >> /* Expose the flag value from utp_upiu_query.value */ >> #define MASK_QUERY_UPIU_FLAG_LOC 0xFF >> >> @@ -6070,12 +6073,14 @@ static bool >> ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) >> static void ufshcd_err_handler(struct Scsi_Host *host) >> { >> struct ufs_hba *hba = shost_priv(host); >> + int retries = MAX_ERR_HANDLER_RETRIES; >> unsigned long flags; >> - bool err_xfer = false; >> - bool err_tm = false; >> - int err = 0, pmc_err; >> - int tag; >> - bool needs_reset = false, needs_restore = false; >> + bool needs_restore; >> + bool needs_reset; >> + bool err_xfer; >> + bool err_tm; >> + int pmc_err; >> + int tag; >> >> down(&hba->host_sem); >> spin_lock_irqsave(hba->host->host_lock, flags); >> @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host >> *host) >> /* Complete requests that have door-bell cleared by h/w */ >> ufshcd_complete_requests(hba); >> spin_lock_irqsave(hba->host->host_lock, flags); >> +again: >> + needs_restore = false; >> + needs_reset = false; >> + err_xfer = false; >> + err_tm = false; >> + >> if (hba->ufshcd_state != UFSHCD_STATE_ERROR) >> hba->ufshcd_state = UFSHCD_STATE_RESET; >> /* >> @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host >> *host) >> do_reset: >> /* Fatal errors need reset */ >> if (needs_reset) { >> + int err; >> + >> hba->force_reset = false; >> spin_unlock_irqrestore(hba->host->host_lock, flags); >> err = ufshcd_reset_and_restore(hba); >> @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host >> *host) >> dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x >> saved_uic_err 0x%x", >> __func__, hba->saved_err, hba->saved_uic_err); >> } >> + /* Exit in an operational state or dead */ >> + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && >> + hba->ufshcd_state != UFSHCD_STATE_ERROR) { >> + if (--retries) >> + goto again; > Why do you need to retry here as well? Thanks for looking at this. It shouldn't hurt to retry bringing the device back to life. The alternative is UFSHCD_STATE_ERROR which means dead. > ufshcd_reset_and_restore() already exists only if operational or dead? ufshcd_reset_and_restore() isn't the only path. There are also ufshcd_quirk_dl_nac_errors() and ufshcd_config_pwr_mode() and in the future perhaps others. This seems the right place to ensure that the error handler guarantees operational (or dead) status. > > Thanks, > Avri > >> + hba->ufshcd_state = UFSHCD_STATE_ERROR; >> + } >> ufshcd_clear_eh_in_progress(hba); >> spin_unlock_irqrestore(hba->host->host_lock, flags); >> ufshcd_err_handling_unprepare(hba); >> -- >> 2.25.1 >
> >> hba->force_reset = false; > >> spin_unlock_irqrestore(hba->host->host_lock, flags); > >> err = ufshcd_reset_and_restore(hba); @@ -6232,6 > >> +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host > >> *host) > >> dev_err_ratelimited(hba->dev, "%s: exit: > >> saved_err 0x%x saved_uic_err 0x%x", > >> __func__, hba->saved_err, hba->saved_uic_err); > >> } > >> + /* Exit in an operational state or dead */ > >> + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && > >> + hba->ufshcd_state != UFSHCD_STATE_ERROR) { > >> + if (--retries) > >> + goto again; > > Why do you need to retry here as well? > > Thanks for looking at this. > > It shouldn't hurt to retry bringing the device back to life. The alternative is > UFSHCD_STATE_ERROR which means dead. > > > ufshcd_reset_and_restore() already exists only if operational or dead? > > ufshcd_reset_and_restore() isn't the only path. There are also > ufshcd_quirk_dl_nac_errors() and ufshcd_config_pwr_mode() and in the > future perhaps others. > > This seems the right place to ensure that the error handler guarantees > operational (or dead) status. OK. Thanks. Avri
> > Callers of ufshcd_err_handler() expect it to return in an operational > state. However, the code does not check the state before exiting. > > Add a check for the state and perform retries until either success or the > maximum number of retries is reached. > > Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Reviewed-by: Avri Altman <avri.altman@wdc.com> > --- > drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++----- > 1 file changed, 25 insertions(+), 5 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 16492779d3a6..33f55ecf43de 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -64,6 +64,9 @@ > /* maximum number of reset retries before giving up */ > #define MAX_HOST_RESET_RETRIES 5 > > +/* Maximum number of error handler retries before giving up */ > +#define MAX_ERR_HANDLER_RETRIES 5 > + > /* Expose the flag value from utp_upiu_query.value */ > #define MASK_QUERY_UPIU_FLAG_LOC 0xFF > > @@ -6070,12 +6073,14 @@ static bool > ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) > static void ufshcd_err_handler(struct Scsi_Host *host) > { > struct ufs_hba *hba = shost_priv(host); > + int retries = MAX_ERR_HANDLER_RETRIES; > unsigned long flags; > - bool err_xfer = false; > - bool err_tm = false; > - int err = 0, pmc_err; > - int tag; > - bool needs_reset = false, needs_restore = false; > + bool needs_restore; > + bool needs_reset; > + bool err_xfer; > + bool err_tm; > + int pmc_err; > + int tag; > > down(&hba->host_sem); > spin_lock_irqsave(hba->host->host_lock, flags); > @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host > *host) > /* Complete requests that have door-bell cleared by h/w */ > ufshcd_complete_requests(hba); > spin_lock_irqsave(hba->host->host_lock, flags); > +again: > + needs_restore = false; > + needs_reset = false; > + err_xfer = false; > + err_tm = false; > + > if (hba->ufshcd_state != UFSHCD_STATE_ERROR) > hba->ufshcd_state = UFSHCD_STATE_RESET; > /* > @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host > *host) > do_reset: > /* Fatal errors need reset */ > if (needs_reset) { > + int err; > + > hba->force_reset = false; > spin_unlock_irqrestore(hba->host->host_lock, flags); > err = ufshcd_reset_and_restore(hba); > @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host > *host) > dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x > saved_uic_err 0x%x", > __func__, hba->saved_err, hba->saved_uic_err); > } > + /* Exit in an operational state or dead */ > + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && > + hba->ufshcd_state != UFSHCD_STATE_ERROR) { > + if (--retries) > + goto again; > + hba->ufshcd_state = UFSHCD_STATE_ERROR; > + } > ufshcd_clear_eh_in_progress(hba); > spin_unlock_irqrestore(hba->host->host_lock, flags); > ufshcd_err_handling_unprepare(hba); > -- > 2.25.1
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 16492779d3a6..33f55ecf43de 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -64,6 +64,9 @@ /* maximum number of reset retries before giving up */ #define MAX_HOST_RESET_RETRIES 5 +/* Maximum number of error handler retries before giving up */ +#define MAX_ERR_HANDLER_RETRIES 5 + /* Expose the flag value from utp_upiu_query.value */ #define MASK_QUERY_UPIU_FLAG_LOC 0xFF @@ -6070,12 +6073,14 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) static void ufshcd_err_handler(struct Scsi_Host *host) { struct ufs_hba *hba = shost_priv(host); + int retries = MAX_ERR_HANDLER_RETRIES; unsigned long flags; - bool err_xfer = false; - bool err_tm = false; - int err = 0, pmc_err; - int tag; - bool needs_reset = false, needs_restore = false; + bool needs_restore; + bool needs_reset; + bool err_xfer; + bool err_tm; + int pmc_err; + int tag; down(&hba->host_sem); spin_lock_irqsave(hba->host->host_lock, flags); @@ -6093,6 +6098,12 @@ static void ufshcd_err_handler(struct Scsi_Host *host) /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba); spin_lock_irqsave(hba->host->host_lock, flags); +again: + needs_restore = false; + needs_reset = false; + err_xfer = false; + err_tm = false; + if (hba->ufshcd_state != UFSHCD_STATE_ERROR) hba->ufshcd_state = UFSHCD_STATE_RESET; /* @@ -6213,6 +6224,8 @@ static void ufshcd_err_handler(struct Scsi_Host *host) do_reset: /* Fatal errors need reset */ if (needs_reset) { + int err; + hba->force_reset = false; spin_unlock_irqrestore(hba->host->host_lock, flags); err = ufshcd_reset_and_restore(hba); @@ -6232,6 +6245,13 @@ static void ufshcd_err_handler(struct Scsi_Host *host) dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x", __func__, hba->saved_err, hba->saved_uic_err); } + /* Exit in an operational state or dead */ + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && + hba->ufshcd_state != UFSHCD_STATE_ERROR) { + if (--retries) + goto again; + hba->ufshcd_state = UFSHCD_STATE_ERROR; + } ufshcd_clear_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); ufshcd_err_handling_unprepare(hba);
Callers of ufshcd_err_handler() expect it to return in an operational state. However, the code does not check the state before exiting. Add a check for the state and perform retries until either success or the maximum number of retries is reached. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> --- drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-)