diff mbox series

[RFC,4/4] ufs: Make host controller state change handling more systematic

Message ID 20210619005228.28569-5-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series UFS patches for Linux kernel v5.14 | expand

Commit Message

Bart Van Assche June 19, 2021, 12:52 a.m. UTC
Introduce a function that reports whether or not a controller state change
is allowed instead of duplicating these checks in every context that
changes the host controller state. This patch includes a behavior change:
ufshcd_link_recovery() no longer can change the host controller state from
UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.

Cc: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 23 deletions(-)

Comments

Avri Altman June 21, 2021, 8:55 a.m. UTC | #1
> Introduce a function that reports whether or not a controller state change
> is allowed instead of duplicating these checks in every context that
> changes the host controller state. This patch includes a behavior change:
> ufshcd_link_recovery() no longer can change the host controller state from
> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.
> 
> Cc: Can Guo <cang@codeaurora.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index c213daec20f7..a10c8ec28468 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct
> ufs_hba *hba, u8 mode)
>         return ret;
>  }
> 
> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state
> new_state)
> +{
> +       lockdep_assert_held(hba->host->host_lock);
> +
> +       if (old_state != UFSHCD_STATE_ERROR || new_state ==
> UFSHCD_STATE_ERROR)
old_state ?

Thanks,
Avri

> +               hba->ufshcd_state = new_state;
> +               return true;
> +       }
> +       return false;
> +}
> +
>  int ufshcd_link_recovery(struct ufs_hba *hba)
>  {
>         int ret;
>         unsigned long flags;
> +       bool proceed;
> 
>         spin_lock_irqsave(hba->host->host_lock, flags);
> -       hba->ufshcd_state = UFSHCD_STATE_RESET;
> -       ufshcd_set_eh_in_progress(hba);
> +       proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);
> +       if (proceed)
> +               ufshcd_set_eh_in_progress(hba);
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> +       if (!proceed)
> +               return -EPERM;
> +
>         /* Reset the attached device */
>         ufshcd_device_reset(hba);
> 
> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
> 
>         spin_lock_irqsave(hba->host->host_lock, flags);
>         if (ret)
> -               hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +               ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>         ufshcd_clear_eh_in_progress(hba);
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> @@ -5856,15 +5872,17 @@ static inline bool
> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
>  /* host lock must be held before calling this func */
>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
>  {
> -       /* handle fatal errors only when link is not in error state */
> -       if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> -               if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> -                   ufshcd_is_saved_err_fatal(hba))
> -                       hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
> -               else
> -                       hba->ufshcd_state =
> UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
> +       bool proceed;
> +
> +       if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> +           ufshcd_is_saved_err_fatal(hba))
> +               proceed = ufshcd_set_state(hba,
> +                                          UFSHCD_STATE_EH_SCHEDULED_FATAL);
> +       else
> +               proceed = ufshcd_set_state(hba,
> +                                          UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
> +       if (proceed)
>                 queue_work(hba->eh_wq, &hba->eh_work);
> -       }
>  }
> 
>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct
> *work)
>         down(&hba->host_sem);
>         spin_lock_irqsave(hba->host->host_lock, flags);
>         if (ufshcd_err_handling_should_stop(hba)) {
> -               if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -                       hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +               ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>                 spin_unlock_irqrestore(hba->host->host_lock, flags);
>                 up(&hba->host_sem);
>                 return;
> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct
> *work)
>         /* Complete requests that have door-bell cleared by h/w */
>         ufshcd_complete_requests(hba);
>         spin_lock_irqsave(hba->host->host_lock, flags);
> -       if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -               hba->ufshcd_state = UFSHCD_STATE_RESET;
> +       ufshcd_set_state(hba, UFSHCD_STATE_RESET);
>         /*
>          * A full reset and restore might have happened after preparation
>          * is finished, double check whether we should stop.
> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct
> *work)
> 
>  skip_err_handling:
>         if (!needs_reset) {
> -               if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -                       hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +               ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>                 if (hba->saved_err || hba->saved_uic_err)
>                         dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x
> saved_uic_err 0x%x",
>                             __func__, hba->saved_err, hba->saved_uic_err);
> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba
> *hba)
>          */
>         scsi_report_bus_reset(hba->host, 0);
>         if (err) {
> -               hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +               ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>                 hba->saved_err |= saved_err;
>                 hba->saved_uic_err |= saved_uic_err;
>         }
> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,
> bool init_dev_params)
>         unsigned long flags;
>         ktime_t start = ktime_get();
> 
> -       hba->ufshcd_state = UFSHCD_STATE_RESET;
> +       ufshcd_set_state(hba, UFSHCD_STATE_RESET);
> 
>         ret = ufshcd_link_startup(hba);
>         if (ret)
> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,
> bool init_dev_params)
> 
>  out:
>         spin_lock_irqsave(hba->host->host_lock, flags);
> -       if (ret)
> -               hba->ufshcd_state = UFSHCD_STATE_ERROR;
> -       else if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -               hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +       ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :
> +                        UFSHCD_STATE_OPERATIONAL);
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
>         trace_ufshcd_init(dev_name(hba->dev), ret,
Bart Van Assche June 21, 2021, 5:38 p.m. UTC | #2
On 6/21/21 1:55 AM, Avri Altman wrote:
>> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state
>> new_state)
>> +{
>> +       lockdep_assert_held(hba->host->host_lock);
>> +
>> +       if (old_state != UFSHCD_STATE_ERROR || new_state ==
>> UFSHCD_STATE_ERROR)
>
> old_state ?

Thanks for having taken a look. This function should look as follows:

static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state new_state)
{
	lockdep_assert_held(hba->host->host_lock);

	if (hba->ufshcd_state != UFSHCD_STATE_ERROR ||
	    new_state == UFSHCD_STATE_ERROR) {
		hba->ufshcd_state = new_state;
		return true;
	}
	return false;
}
Can Guo June 23, 2021, 8:08 a.m. UTC | #3
Hi Bart,

On 2021-06-19 08:52, Bart Van Assche wrote:
> Introduce a function that reports whether or not a controller state 
> change
> is allowed instead of duplicating these checks in every context that
> changes the host controller state. This patch includes a behavior 
> change:
> ufshcd_link_recovery() no longer can change the host controller state 
> from
> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.
> 
> Cc: Can Guo <cang@codeaurora.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index c213daec20f7..a10c8ec28468 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct
> ufs_hba *hba, u8 mode)
>  	return ret;
>  }
> 
> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state 
> new_state)
> +{
> +	lockdep_assert_held(hba->host->host_lock);
> +
> +	if (old_state != UFSHCD_STATE_ERROR || new_state == 
> UFSHCD_STATE_ERROR)
> +		hba->ufshcd_state = new_state;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  int ufshcd_link_recovery(struct ufs_hba *hba)
>  {
>  	int ret;
>  	unsigned long flags;
> +	bool proceed;
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	hba->ufshcd_state = UFSHCD_STATE_RESET;
> -	ufshcd_set_eh_in_progress(hba);
> +	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);
> +	if (proceed)
> +		ufshcd_set_eh_in_progress(hba);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> +	if (!proceed)
> +		return -EPERM;
> +
>  	/* Reset the attached device */
>  	ufshcd_device_reset(hba);
> 
> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)

I want to remove this function since it is only used by ufs-mediatek.c
through ufshcd_vops_resume(). Althrough it does not race with error
handling as of now, by removing it we can reduce one more caller of
full reset. I will ask Stanley help comment on this.

> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (ret)
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>  	ufshcd_clear_eh_in_progress(hba);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> @@ -5856,15 +5872,17 @@ static inline bool
> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
>  /* host lock must be held before calling this func */
>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
>  {
> -	/* handle fatal errors only when link is not in error state */
> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> -		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> -		    ufshcd_is_saved_err_fatal(hba))
> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
> -		else
> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
> +	bool proceed;
> +
> +	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> +	    ufshcd_is_saved_err_fatal(hba))
> +		proceed = ufshcd_set_state(hba,
> +					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
> +	else
> +		proceed = ufshcd_set_state(hba,
> +					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
> +	if (proceed)
>  		queue_work(hba->eh_wq, &hba->eh_work);
> -	}
>  }
> 
>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	down(&hba->host_sem);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (ufshcd_err_handling_should_stop(hba)) {
> -		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
>  		up(&hba->host_sem);
>  		return;
> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	/* Complete requests that have door-bell cleared by h/w */
>  	ufshcd_complete_requests(hba);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -		hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
>  	/*
>  	 * A full reset and restore might have happened after preparation
>  	 * is finished, double check whether we should stop.
> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
> 
>  skip_err_handling:
>  	if (!needs_reset) {
> -		if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		if (hba->saved_err || hba->saved_uic_err)
>  			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x 
> saved_uic_err 0x%x",
>  			    __func__, hba->saved_err, hba->saved_uic_err);
> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct 
> ufs_hba *hba)
>  	 */
>  	scsi_report_bus_reset(hba->host, 0);
>  	if (err) {
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>  		hba->saved_err |= saved_err;
>  		hba->saved_uic_err |= saved_uic_err;
>  	}
> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,
> bool init_dev_params)
>  	unsigned long flags;
>  	ktime_t start = ktime_get();
> 
> -	hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);

1. We don't hold host lock here, but ufshcd_set_state() is expecting it.

2. Here we need to override the state to RESET anyways even state is 
ERROR,
because in ufshcd_reset_and_restore(), ufshcd_probe_hba() can be called 
5
times (retries == 5, see below code snippet). Otherwise, say on the 2nd
retry of ufshcd_probe_hba(), even if ufshcd_probe_hba() pass through 
without
an error (ret == 0), the ufshcd_set_state() (down below) cannot set the
state back to OPERATIONAL.

7042 	do {
7043 		/* Reset the attached device */
7044 		ufshcd_vops_device_reset(hba);
7045
7046 		err = ufshcd_host_reset_and_restore(hba);
7047 	} while (err && --retries);

Thanks,

Can Guo.

> 
>  	ret = ufshcd_link_startup(hba);
>  	if (ret)
> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba
> *hba, bool init_dev_params)
> 
>  out:
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	if (ret)
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> -	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :
> +			 UFSHCD_STATE_OPERATIONAL);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
>  	trace_ufshcd_init(dev_name(hba->dev), ret,
Can Guo June 23, 2021, 12:02 p.m. UTC | #4
Hi Bart,

On 2021-06-19 08:52, Bart Van Assche wrote:
> Introduce a function that reports whether or not a controller state 
> change
> is allowed instead of duplicating these checks in every context that
> changes the host controller state. This patch includes a behavior 
> change:
> ufshcd_link_recovery() no longer can change the host controller state 
> from
> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.
> 
> Cc: Can Guo <cang@codeaurora.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index c213daec20f7..a10c8ec28468 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct
> ufs_hba *hba, u8 mode)
>  	return ret;
>  }
> 
> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state 
> new_state)
> +{
> +	lockdep_assert_held(hba->host->host_lock);
> +
> +	if (old_state != UFSHCD_STATE_ERROR || new_state == 
> UFSHCD_STATE_ERROR)
> +		hba->ufshcd_state = new_state;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  int ufshcd_link_recovery(struct ufs_hba *hba)
>  {
>  	int ret;
>  	unsigned long flags;
> +	bool proceed;
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	hba->ufshcd_state = UFSHCD_STATE_RESET;
> -	ufshcd_set_eh_in_progress(hba);
> +	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);
> +	if (proceed)
> +		ufshcd_set_eh_in_progress(hba);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> +	if (!proceed)
> +		return -EPERM;
> +
>  	/* Reset the attached device */
>  	ufshcd_device_reset(hba);
> 
> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (ret)
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>  	ufshcd_clear_eh_in_progress(hba);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> @@ -5856,15 +5872,17 @@ static inline bool
> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
>  /* host lock must be held before calling this func */
>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
>  {
> -	/* handle fatal errors only when link is not in error state */
> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> -		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> -		    ufshcd_is_saved_err_fatal(hba))
> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
> -		else
> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
> +	bool proceed;
> +
> +	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> +	    ufshcd_is_saved_err_fatal(hba))
> +		proceed = ufshcd_set_state(hba,
> +					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
> +	else
> +		proceed = ufshcd_set_state(hba,
> +					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
> +	if (proceed)
>  		queue_work(hba->eh_wq, &hba->eh_work);
> -	}
>  }
> 
>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	down(&hba->host_sem);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (ufshcd_err_handling_should_stop(hba)) {
> -		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
>  		up(&hba->host_sem);
>  		return;
> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	/* Complete requests that have door-bell cleared by h/w */
>  	ufshcd_complete_requests(hba);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -		hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
>  	/*
>  	 * A full reset and restore might have happened after preparation
>  	 * is finished, double check whether we should stop.
> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
> 
>  skip_err_handling:
>  	if (!needs_reset) {
> -		if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

State transition to OPERATIONAL should only be from RESET, this
is to make sure that we don't miss any new error (fatal or non-fatal)
occurs while error handling is running.

> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		if (hba->saved_err || hba->saved_uic_err)
>  			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x 
> saved_uic_err 0x%x",
>  			    __func__, hba->saved_err, hba->saved_uic_err);
> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct 
> ufs_hba *hba)
>  	 */
>  	scsi_report_bus_reset(hba->host, 0);
>  	if (err) {
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>  		hba->saved_err |= saved_err;
>  		hba->saved_uic_err |= saved_uic_err;
>  	}
> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,
> bool init_dev_params)
>  	unsigned long flags;
>  	ktime_t start = ktime_get();
> 
> -	hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
> 
>  	ret = ufshcd_link_startup(hba);
>  	if (ret)
> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba
> *hba, bool init_dev_params)
> 
>  out:
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	if (ret)
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> -	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;

State transition to OPERATIONAL should only be from RESET, this
is to make sure that we don't miss any new error (fatal or non-fatal)
occurs during re-probe.

Thanks,

Can Guo.

> +	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :
> +			 UFSHCD_STATE_OPERATIONAL);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
>  	trace_ufshcd_init(dev_name(hba->dev), ret,
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c213daec20f7..a10c8ec28468 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4070,16 +4070,32 @@  static int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
 	return ret;
 }
 
+static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state new_state)
+{
+	lockdep_assert_held(hba->host->host_lock);
+
+	if (old_state != UFSHCD_STATE_ERROR || new_state == UFSHCD_STATE_ERROR)
+		hba->ufshcd_state = new_state;
+		return true;
+	}
+	return false;
+}
+
 int ufshcd_link_recovery(struct ufs_hba *hba)
 {
 	int ret;
 	unsigned long flags;
+	bool proceed;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	hba->ufshcd_state = UFSHCD_STATE_RESET;
-	ufshcd_set_eh_in_progress(hba);
+	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);
+	if (proceed)
+		ufshcd_set_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
+	if (!proceed)
+		return -EPERM;
+
 	/* Reset the attached device */
 	ufshcd_device_reset(hba);
 
@@ -4087,7 +4103,7 @@  int ufshcd_link_recovery(struct ufs_hba *hba)
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ret)
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
 	ufshcd_clear_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
@@ -5856,15 +5872,17 @@  static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
 /* host lock must be held before calling this func */
 static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
 {
-	/* handle fatal errors only when link is not in error state */
-	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
-		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
-		    ufshcd_is_saved_err_fatal(hba))
-			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
-		else
-			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
+	bool proceed;
+
+	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
+	    ufshcd_is_saved_err_fatal(hba))
+		proceed = ufshcd_set_state(hba,
+					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
+	else
+		proceed = ufshcd_set_state(hba,
+					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
+	if (proceed)
 		queue_work(hba->eh_wq, &hba->eh_work);
-	}
 }
 
 static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
@@ -6017,8 +6035,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ufshcd_err_handling_should_stop(hba)) {
-		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
-			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		up(&hba->host_sem);
 		return;
@@ -6029,8 +6046,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 	/* Complete requests that have door-bell cleared by h/w */
 	ufshcd_complete_requests(hba);
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
-		hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
 	/*
 	 * A full reset and restore might have happened after preparation
 	 * is finished, double check whether we should stop.
@@ -6163,8 +6179,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 
 skip_err_handling:
 	if (!needs_reset) {
-		if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
 		if (hba->saved_err || hba->saved_uic_err)
 			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
 			    __func__, hba->saved_err, hba->saved_uic_err);
@@ -7135,7 +7150,7 @@  static int ufshcd_reset_and_restore(struct ufs_hba *hba)
 	 */
 	scsi_report_bus_reset(hba->host, 0);
 	if (err) {
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
 		hba->saved_err |= saved_err;
 		hba->saved_uic_err |= saved_uic_err;
 	}
@@ -7951,7 +7966,7 @@  static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
 	unsigned long flags;
 	ktime_t start = ktime_get();
 
-	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
 
 	ret = ufshcd_link_startup(hba);
 	if (ret)
@@ -8024,10 +8039,8 @@  static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
 
 out:
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (ret)
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
-	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :
+			 UFSHCD_STATE_OPERATIONAL);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	trace_ufshcd_init(dev_name(hba->dev), ret,