diff mbox series

[v5,4/4] ufs: Simplify the clock scaling mechanism implementation

Message ID 20191112173743.141503-5-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series Simplify and optimize the UFS driver | expand

Commit Message

Bart Van Assche Nov. 12, 2019, 5:37 p.m. UTC
Scaling the clock is only safe while no commands are in progress. Use
blk_mq_{un,}freeze_queue() to block submission of new commands and to
wait for ongoing commands to complete. Remove "_scsi" from the name of
the functions that block and unblock requests since these functions
now block both SCSI commands and non-SCSI commands.

Cc: Bean Huo <beanhuo@micron.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 141 +++++++++++++-------------------------
 drivers/scsi/ufs/ufshcd.h |   3 -
 2 files changed, 46 insertions(+), 98 deletions(-)

Comments

Can Guo Nov. 13, 2019, 12:11 a.m. UTC | #1
On 2019-11-13 01:37, Bart Van Assche wrote:
> Scaling the clock is only safe while no commands are in progress. Use
> blk_mq_{un,}freeze_queue() to block submission of new commands and to
> wait for ongoing commands to complete. Remove "_scsi" from the name of
> the functions that block and unblock requests since these functions
> now block both SCSI commands and non-SCSI commands.
> 
> Cc: Bean Huo <beanhuo@micron.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 141 +++++++++++++-------------------------
>  drivers/scsi/ufs/ufshcd.h |   3 -
>  2 files changed, 46 insertions(+), 98 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 810b997f55fc..717ba24a2d40 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -290,16 +290,50 @@ static inline void ufshcd_disable_irq(struct 
> ufs_hba *hba)
>  	}
>  }
> 
> -static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba)
> +static void ufshcd_unblock_requests(struct ufs_hba *hba)
>  {
> -	if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt))
> -		scsi_unblock_requests(hba->host);
> +	struct scsi_device *sdev;
> +
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
>  }
> 
> -static void ufshcd_scsi_block_requests(struct ufs_hba *hba)
> +static int ufshcd_block_requests(struct ufs_hba *hba, unsigned long 
> timeout)
>  {
> -	if (atomic_inc_return(&hba->scsi_block_reqs_cnt) == 1)
> -		scsi_block_requests(hba->host);
> +	struct scsi_device *sdev;
> +	unsigned long deadline = jiffies + timeout;
> +
> +	if (timeout == ULONG_MAX) {
> +		shost_for_each_device(sdev, hba->host)
> +			blk_mq_freeze_queue(sdev->request_queue);
> +		blk_mq_freeze_queue(hba->cmd_queue);
> +		blk_mq_freeze_queue(hba->tmf_queue);
> +		return 0;
> +	}
> +
> +	shost_for_each_device(sdev, hba->host)
> +		blk_freeze_queue_start(sdev->request_queue);
> +	blk_freeze_queue_start(hba->cmd_queue);
> +	blk_freeze_queue_start(hba->tmf_queue);
> +	shost_for_each_device(sdev, hba->host) {
> +		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0) {
> +			goto err;
> +		}
> +	}
> +	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0)
> +		goto err;
> +	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0)
> +		goto err;
> +	return 0;
> +
> +err:
> +	ufshcd_unblock_requests(hba);
> +	return -ETIMEDOUT;
>  }
> 
>  static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned 
> int tag,
> @@ -971,65 +1005,6 @@ static bool
> ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>  	return false;
>  }
> 
> -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -					u64 wait_timeout_us)
> -{
> -	unsigned long flags;
> -	int ret = 0;
> -	u32 tm_doorbell;
> -	u32 tr_doorbell;
> -	bool timeout = false, do_last_check = false;
> -	ktime_t start;
> -
> -	ufshcd_hold(hba, false);
> -	spin_lock_irqsave(hba->host->host_lock, flags);
> -	/*
> -	 * Wait for all the outstanding tasks/transfer requests.
> -	 * Verify by checking the doorbell registers are clear.
> -	 */
> -	start = ktime_get();
> -	do {
> -		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> -			ret = -EBUSY;
> -			goto out;
> -		}
> -
> -		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -		if (!tm_doorbell && !tr_doorbell) {
> -			timeout = false;
> -			break;
> -		} else if (do_last_check) {
> -			break;
> -		}
> -
> -		spin_unlock_irqrestore(hba->host->host_lock, flags);
> -		schedule();
> -		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> -		    wait_timeout_us) {
> -			timeout = true;
> -			/*
> -			 * We might have scheduled out for long time so make
> -			 * sure to check if doorbells are cleared by this time
> -			 * or not.
> -			 */
> -			do_last_check = true;
> -		}
> -		spin_lock_irqsave(hba->host->host_lock, flags);
> -	} while (tm_doorbell || tr_doorbell);
> -
> -	if (timeout) {
> -		dev_err(hba->dev,
> -			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -			__func__, tm_doorbell, tr_doorbell);
> -		ret = -EBUSY;
> -	}
> -out:
> -	spin_unlock_irqrestore(hba->host->host_lock, flags);
> -	ufshcd_release(hba);
> -	return ret;
> -}
> -
>  /**
>   * ufshcd_scale_gear - scale up/down UFS gear
>   * @hba: per adapter instance
> @@ -1079,27 +1054,16 @@ static int ufshcd_scale_gear(struct ufs_hba
> *hba, bool scale_up)
> 
>  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
>  {
> -	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
> -	int ret = 0;
>  	/*
>  	 * make sure that there are no outstanding requests when
>  	 * clock scaling is in progress
>  	 */
> -	ufshcd_scsi_block_requests(hba);
> -	down_write(&hba->clk_scaling_lock);
> -	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
> -		ret = -EBUSY;
> -		up_write(&hba->clk_scaling_lock);
> -		ufshcd_scsi_unblock_requests(hba);
> -	}
> -
> -	return ret;
> +	return ufshcd_block_requests(hba, HZ);
>  }
> 
>  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
>  {
> -	up_write(&hba->clk_scaling_lock);
> -	ufshcd_scsi_unblock_requests(hba);
> +	ufshcd_unblock_requests(hba);
>  }
> 
>  /**
> @@ -1471,7 +1435,7 @@ static void ufshcd_ungate_work(struct work_struct 
> *work)
>  		hba->clk_gating.is_suspended = false;
>  	}
>  unblock_reqs:
> -	ufshcd_scsi_unblock_requests(hba);
> +	ufshcd_unblock_requests(hba);
>  }
> 
>  /**
> @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async)
>  		 */
>  		/* fallthrough */
>  	case CLKS_OFF:
> -		ufshcd_scsi_block_requests(hba);
> +		ufshcd_block_requests(hba, ULONG_MAX);

Hi Bart,

ufshcd_hold(async == true) is used in ufshcd_queuecommand() path because
ufshcd_queuecommand() can be entered under atomic contexts.
Thus ufshcd_block_requests() here has the same risk causing scheduling 
while atomic.
FYI, it is not easy to hit above scenario in small scale of test.

Best Regards,
Can Guo.

>  		hba->clk_gating.state = REQ_CLKS_ON;
>  		trace_ufshcd_clk_gating(dev_name(hba->dev),
>  					hba->clk_gating.state);
> @@ -2395,9 +2359,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  		BUG();
>  	}
> 
> -	if (!down_read_trylock(&hba->clk_scaling_lock))
> -		return SCSI_MLQUEUE_HOST_BUSY;
> -
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	switch (hba->ufshcd_state) {
>  	case UFSHCD_STATE_OPERATIONAL:
> @@ -2463,7 +2424,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  out_unlock:
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  out:
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -2617,8 +2577,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
> *hba,
>  	struct completion wait;
>  	unsigned long flags;
> 
> -	down_read(&hba->clk_scaling_lock);
> -
>  	/*
>  	 * Get free slot, sleep if slots are unavailable.
>  	 * Even though we use wait_event() which sleeps indefinitely,
> @@ -2654,7 +2612,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
> *hba,
> 
>  out_put_tag:
>  	blk_put_request(req);
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -5342,7 +5299,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  	if (starget)
>  		scsi_target_unblock(&starget->dev, SDEV_RUNNING);
> -	ufshcd_scsi_unblock_requests(hba);
> +	scsi_unblock_requests(hba->host);
>  	ufshcd_release(hba);
>  	pm_runtime_put_sync(hba->dev);
>  }
> @@ -5466,7 +5423,7 @@ static void ufshcd_check_errors(struct ufs_hba 
> *hba)
>  		/* handle fatal errors only when link is functional */
>  		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
>  			/* block commands from scsi mid-layer */
> -			ufshcd_scsi_block_requests(hba);
> +			scsi_block_requests(hba->host);
> 
>  			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
> 
> @@ -5772,8 +5729,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  	unsigned long flags;
>  	u32 upiu_flags;
> 
> -	down_read(&hba->clk_scaling_lock);
> -
>  	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
>  	if (IS_ERR(req))
>  		return PTR_ERR(req);
> @@ -5853,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  	}
> 
>  	blk_put_request(req);
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -8322,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
>  	/* Initialize mutex for device management commands */
>  	mutex_init(&hba->dev_cmd.lock);
> 
> -	init_rwsem(&hba->clk_scaling_lock);
> -
>  	ufshcd_init_clk_gating(hba);
> 
>  	ufshcd_init_clk_scaling(hba);
> @@ -8410,7 +8362,6 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
> 
>  	/* Hold auto suspend until async scan completes */
>  	pm_runtime_get_sync(dev);
> -	atomic_set(&hba->scsi_block_reqs_cnt, 0);
>  	/*
>  	 * We are assuming that device wasn't put in sleep/power-down
>  	 * state exclusively during the boot stage before kernel.
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 5865e16f53a6..c44bfcdb26a3 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -520,7 +520,6 @@ struct ufs_stats {
>   * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>   * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level 
> for
>   *  device is known or not.
> - * @scsi_block_reqs_cnt: reference counting for scsi block requests
>   */
>  struct ufs_hba {
>  	void __iomem *mmio_base;
> @@ -724,9 +723,7 @@ struct ufs_hba {
>  	enum bkops_status urgent_bkops_lvl;
>  	bool is_urgent_bkops_lvl_checked;
> 
> -	struct rw_semaphore clk_scaling_lock;
>  	struct ufs_desc_size desc_size;
> -	atomic_t scsi_block_reqs_cnt;
> 
>  	struct device		bsg_dev;
>  	struct request_queue	*bsg_queue;
Bart Van Assche Nov. 13, 2019, 12:55 a.m. UTC | #2
On 11/12/19 4:11 PM, cang@codeaurora.org wrote:
> On 2019-11-13 01:37, Bart Van Assche wrote:
>> @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async)
>>           */
>>          /* fallthrough */
>>      case CLKS_OFF:
>> -        ufshcd_scsi_block_requests(hba);
>> +        ufshcd_block_requests(hba, ULONG_MAX);
> 
> ufshcd_hold(async == true) is used in ufshcd_queuecommand() path because
> ufshcd_queuecommand() can be entered under atomic contexts.
> Thus ufshcd_block_requests() here has the same risk causing scheduling 
> while atomic.
> FYI, it is not easy to hit above scenario in small scale of test.

Hi Bean,

How about replacing patch 4/4 with the attached patch?

Thanks,

Bart.
Bean Huo Nov. 13, 2019, 4:03 p.m. UTC | #3
Hi, Bart

I think, you are asking for comment from Can.  As for me, attached patch is better. 
Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell register, Now
using block layer blk_mq_{un,}freeze_queue(), looks good. I tested your V5 patches,
didn't find problem yet.

Since my available platform doesn't support dynamic clk scaling, I think, now seems only
Qcom UFS controllers support this feature. So we need Can Guo to finally confirm it.

Thanks,

//Bean

    

> -----Original Message-----
> From: Bart Van Assche <bvanassche@acm.org>
> Sent: Wednesday, November 13, 2019 1:56 AM
> To: cang@codeaurora.org
> Cc: Martin K . Petersen <martin.petersen@oracle.com>; James E . J . Bottomley
> <jejb@linux.vnet.ibm.com>; Bean Huo (beanhuo) <beanhuo@micron.com>; Avri
> Altman <avri.altman@wdc.com>; Asutosh Das <asutoshd@codeaurora.org>;
> Vignesh Raghavendra <vigneshr@ti.com>; linux-scsi@vger.kernel.org; Stanley
> Chu <stanley.chu@mediatek.com>; Tomas Winkler <tomas.winkler@intel.com>
> Subject: [EXT] Re: [PATCH v5 4/4] ufs: Simplify the clock scaling mechanism
> implementation
> 
> On 11/12/19 4:11 PM, cang@codeaurora.org wrote:
> > On 2019-11-13 01:37, Bart Van Assche wrote:
> >> @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool
> >> async)
> >>           */
> >>          /* fallthrough */
> >>      case CLKS_OFF:
> >> -        ufshcd_scsi_block_requests(hba);
> >> +        ufshcd_block_requests(hba, ULONG_MAX);
> >
> > ufshcd_hold(async == true) is used in ufshcd_queuecommand() path
> > because
> > ufshcd_queuecommand() can be entered under atomic contexts.
> > Thus ufshcd_block_requests() here has the same risk causing scheduling
> > while atomic.
> > FYI, it is not easy to hit above scenario in small scale of test.
> 
> Hi Bean,
> 
> How about replacing patch 4/4 with the attached patch?
> 
> Thanks,
> 
> Bart.
Bart Van Assche Nov. 14, 2019, 4:11 p.m. UTC | #4
On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote:
> I think, you are asking for comment from Can.  As for me, attached patch is better.
> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell register, Now
> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested your V5 patches,
> didn't find problem yet.
> 
> Since my available platform doesn't support dynamic clk scaling, I think, now seems only
> Qcom UFS controllers support this feature. So we need Can Guo to finally confirm it.

Hi Can,

Do you agree with this patch series if patch 4 is replaced by the patch 
attached to my previous e-mail? The entire (revised) series is available 
at https://github.com/bvanassche/linux/tree/ufs-for-next.

Thanks,

Bart.
Can Guo Nov. 15, 2019, 6:01 a.m. UTC | #5
On 2019-11-15 00:11, Bart Van Assche wrote:
> On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote:
>> I think, you are asking for comment from Can.  As for me, attached 
>> patch is better.
>> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell 
>> register, Now
>> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested 
>> your V5 patches,
>> didn't find problem yet.
>> 
>> Since my available platform doesn't support dynamic clk scaling, I 
>> think, now seems only
>> Qcom UFS controllers support this feature. So we need Can Guo to 
>> finally confirm it.
> 
> Hi Can,
> 
> Do you agree with this patch series if patch 4 is replaced by the
> patch attached to my previous e-mail? The entire (revised) series is
> available at https://github.com/bvanassche/linux/tree/ufs-for-next.
> 
> Thanks,
> 
> Bart.

Hi Bart,

After ufshcd_clock_scaling_prepare() returns(no error), all request 
queues are frozen. If failure
happens(say power mode change command fails) after this point and error 
handler kicks off,
we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back to 
functionality.
However, as the hba->cmd_queue is frozen, dev commands cannot be sent, 
the error handler shall fail.

Thanks,

Can Guo.
Bart Van Assche Nov. 15, 2019, 4:23 p.m. UTC | #6
On 11/14/19 10:01 PM, Can Guo wrote:
> After ufshcd_clock_scaling_prepare() returns(no error), all request queues are frozen. If failure
> happens(say power mode change command fails) after this point and error handler kicks off,
> we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back to functionality.
> However, as the hba->cmd_queue is frozen, dev commands cannot be sent, the error handler shall fail.

Hi Can,

I think that's easy to address. How about replacing patch 4/4 with the patch below?

Thanks,

Bart.


Subject: [PATCH] ufs: Simplify the clock scaling mechanism implementation

Scaling the clock is only safe while no commands are in progress. Use
blk_mq_{un,}freeze_queue() to block submission of new commands and to
wait for ongoing commands to complete. This patch removes a semaphore
down and up operation pair from the hot path.

Cc: Bean Huo <beanhuo@micron.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
  drivers/scsi/ufs/ufshcd.c | 115 +++++++++++---------------------------
  drivers/scsi/ufs/ufshcd.h |   1 -
  2 files changed, 33 insertions(+), 83 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5314e8bfeeb6..343f9b746b70 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -971,65 +971,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
  	return false;
  }

-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	unsigned long flags;
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_doorbell;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
-		ret = -EBUSY;
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_release(hba);
-	return ret;
-}
-
  /**
   * ufshcd_scale_gear - scale up/down UFS gear
   * @hba: per adapter instance
@@ -1079,27 +1020,49 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)

  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
  {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
-	int ret = 0;
+	unsigned long deadline = jiffies + HZ;
+	struct scsi_device *sdev;
+	int res = 0;
+
  	/*
  	 * make sure that there are no outstanding requests when
  	 * clock scaling is in progress
  	 */
-	ufshcd_scsi_block_requests(hba);
-	down_write(&hba->clk_scaling_lock);
-	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	blk_freeze_queue_start(hba->cmd_queue);
+	blk_freeze_queue_start(hba->tmf_queue);
+	shost_for_each_device(sdev, hba->host) {
+		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0) {
+			goto err;
+		}
  	}
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;

-	return ret;
+out:
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	return res;
+
+err:
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
+	res = -ETIMEDOUT;
+	goto out;
  }

  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
  {
-	up_write(&hba->clk_scaling_lock);
-	ufshcd_scsi_unblock_requests(hba);
+	struct scsi_device *sdev;
+
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
  }

  /**
@@ -2394,9 +2357,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
  		BUG();
  	}

-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
  	spin_lock_irqsave(hba->host->host_lock, flags);
  	switch (hba->ufshcd_state) {
  	case UFSHCD_STATE_OPERATIONAL:
@@ -2462,7 +2422,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
  out_unlock:
  	spin_unlock_irqrestore(hba->host->host_lock, flags);
  out:
-	up_read(&hba->clk_scaling_lock);
  	return err;
  }

@@ -2616,8 +2575,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
  	struct completion wait;
  	unsigned long flags;

-	down_read(&hba->clk_scaling_lock);
-
  	/*
  	 * Get free slot, sleep if slots are unavailable.
  	 * Even though we use wait_event() which sleeps indefinitely,
@@ -2653,7 +2610,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,

  out_put_tag:
  	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
  	return err;
  }

@@ -5771,8 +5727,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
  	unsigned long flags;
  	u32 upiu_flags;

-	down_read(&hba->clk_scaling_lock);
-
  	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
  	if (IS_ERR(req))
  		return PTR_ERR(req);
@@ -5854,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
  	}

  	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
  	return err;
  }

@@ -8323,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
  	/* Initialize mutex for device management commands */
  	mutex_init(&hba->dev_cmd.lock);

-	init_rwsem(&hba->clk_scaling_lock);
-
  	ufshcd_init_clk_gating(hba);

  	ufshcd_init_clk_scaling(hba);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 5865e16f53a6..5ebb920ae874 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -724,7 +724,6 @@ struct ufs_hba {
  	enum bkops_status urgent_bkops_lvl;
  	bool is_urgent_bkops_lvl_checked;

-	struct rw_semaphore clk_scaling_lock;
  	struct ufs_desc_size desc_size;
  	atomic_t scsi_block_reqs_cnt;
Can Guo Nov. 18, 2019, 3:49 a.m. UTC | #7
On 2019-11-16 00:23, Bart Van Assche wrote:
> On 11/14/19 10:01 PM, Can Guo wrote:
>> After ufshcd_clock_scaling_prepare() returns(no error), all request 
>> queues are frozen. If failure
>> happens(say power mode change command fails) after this point and 
>> error handler kicks off,
>> we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back 
>> to functionality.
>> However, as the hba->cmd_queue is frozen, dev commands cannot be sent, 
>> the error handler shall fail.
> 
> Hi Can,
> 
> I think that's easy to address. How about replacing patch 4/4 with the
> patch below?
> 
> Thanks,
> 
> Bart.
> 
> 
> Subject: [PATCH] ufs: Simplify the clock scaling mechanism 
> implementation
> 
> Scaling the clock is only safe while no commands are in progress. Use
> blk_mq_{un,}freeze_queue() to block submission of new commands and to
> wait for ongoing commands to complete. This patch removes a semaphore
> down and up operation pair from the hot path.
> 
> Cc: Bean Huo <beanhuo@micron.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 115 +++++++++++---------------------------
>  drivers/scsi/ufs/ufshcd.h |   1 -
>  2 files changed, 33 insertions(+), 83 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 5314e8bfeeb6..343f9b746b70 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -971,65 +971,6 @@ static bool
> ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>  	return false;
>  }
> 
> -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -					u64 wait_timeout_us)
> -{
> -	unsigned long flags;
> -	int ret = 0;
> -	u32 tm_doorbell;
> -	u32 tr_doorbell;
> -	bool timeout = false, do_last_check = false;
> -	ktime_t start;
> -
> -	ufshcd_hold(hba, false);
> -	spin_lock_irqsave(hba->host->host_lock, flags);
> -	/*
> -	 * Wait for all the outstanding tasks/transfer requests.
> -	 * Verify by checking the doorbell registers are clear.
> -	 */
> -	start = ktime_get();
> -	do {
> -		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> -			ret = -EBUSY;
> -			goto out;
> -		}
> -
> -		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -		if (!tm_doorbell && !tr_doorbell) {
> -			timeout = false;
> -			break;
> -		} else if (do_last_check) {
> -			break;
> -		}
> -
> -		spin_unlock_irqrestore(hba->host->host_lock, flags);
> -		schedule();
> -		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> -		    wait_timeout_us) {
> -			timeout = true;
> -			/*
> -			 * We might have scheduled out for long time so make
> -			 * sure to check if doorbells are cleared by this time
> -			 * or not.
> -			 */
> -			do_last_check = true;
> -		}
> -		spin_lock_irqsave(hba->host->host_lock, flags);
> -	} while (tm_doorbell || tr_doorbell);
> -
> -	if (timeout) {
> -		dev_err(hba->dev,
> -			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -			__func__, tm_doorbell, tr_doorbell);
> -		ret = -EBUSY;
> -	}
> -out:
> -	spin_unlock_irqrestore(hba->host->host_lock, flags);
> -	ufshcd_release(hba);
> -	return ret;
> -}
> -
>  /**
>   * ufshcd_scale_gear - scale up/down UFS gear
>   * @hba: per adapter instance
> @@ -1079,27 +1020,49 @@ static int ufshcd_scale_gear(struct ufs_hba
> *hba, bool scale_up)
> 
>  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
>  {
> -	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
> -	int ret = 0;
> +	unsigned long deadline = jiffies + HZ;
> +	struct scsi_device *sdev;
> +	int res = 0;
> +
>  	/*
>  	 * make sure that there are no outstanding requests when
>  	 * clock scaling is in progress
>  	 */
> -	ufshcd_scsi_block_requests(hba);
> -	down_write(&hba->clk_scaling_lock);
> -	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
> -		ret = -EBUSY;
> -		up_write(&hba->clk_scaling_lock);
> -		ufshcd_scsi_unblock_requests(hba);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_freeze_queue_start(sdev->request_queue);
> +	blk_freeze_queue_start(hba->cmd_queue);
> +	blk_freeze_queue_start(hba->tmf_queue);
> +	shost_for_each_device(sdev, hba->host) {
> +		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0) {
> +			goto err;
> +		}
>  	}
> +	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0)
> +		goto err;
> +	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0)
> +		goto err;
> 
> -	return ret;
> +out:
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);

Hi Bart,

After this point, dev commands can be sent on hba->cmd_queue.
But there are multiple entries which users can send dev commands through 
in ufs-sysfs.c.
So the clock scaling sequence lost its protection from being disturbed.

Best Regards,
Can Guo.

> +	return res;
> +
> +err:
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
> +	res = -ETIMEDOUT;
> +	goto out;
>  }
> 
>  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
>  {
> -	up_write(&hba->clk_scaling_lock);
> -	ufshcd_scsi_unblock_requests(hba);
> +	struct scsi_device *sdev;
> +
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
>  }
> 
>  /**
> @@ -2394,9 +2357,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  		BUG();
>  	}
> 
> -	if (!down_read_trylock(&hba->clk_scaling_lock))
> -		return SCSI_MLQUEUE_HOST_BUSY;
> -
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	switch (hba->ufshcd_state) {
>  	case UFSHCD_STATE_OPERATIONAL:
> @@ -2462,7 +2422,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  out_unlock:
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  out:
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -2616,8 +2575,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
> *hba,
>  	struct completion wait;
>  	unsigned long flags;
> 
> -	down_read(&hba->clk_scaling_lock);
> -
>  	/*
>  	 * Get free slot, sleep if slots are unavailable.
>  	 * Even though we use wait_event() which sleeps indefinitely,
> @@ -2653,7 +2610,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
> *hba,
> 
>  out_put_tag:
>  	blk_put_request(req);
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -5771,8 +5727,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  	unsigned long flags;
>  	u32 upiu_flags;
> 
> -	down_read(&hba->clk_scaling_lock);
> -
>  	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
>  	if (IS_ERR(req))
>  		return PTR_ERR(req);
> @@ -5854,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  	}
> 
>  	blk_put_request(req);
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -8323,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
>  	/* Initialize mutex for device management commands */
>  	mutex_init(&hba->dev_cmd.lock);
> 
> -	init_rwsem(&hba->clk_scaling_lock);
> -
>  	ufshcd_init_clk_gating(hba);
> 
>  	ufshcd_init_clk_scaling(hba);
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 5865e16f53a6..5ebb920ae874 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -724,7 +724,6 @@ struct ufs_hba {
>  	enum bkops_status urgent_bkops_lvl;
>  	bool is_urgent_bkops_lvl_checked;
> 
> -	struct rw_semaphore clk_scaling_lock;
>  	struct ufs_desc_size desc_size;
>  	atomic_t scsi_block_reqs_cnt;
Bart Van Assche Nov. 18, 2019, 5:49 p.m. UTC | #8
On 11/17/19 7:49 PM, cang@codeaurora.org wrote:
> After this point, dev commands can be sent on hba->cmd_queue.
> But there are multiple entries which users can send dev commands through in ufs-sysfs.c.
> So the clock scaling sequence lost its protection from being disturbed.

Hi Can,

My understanding of the current implementation (Martin's 5.5/scsi-queue branch)
is that hba->clk_scaling_lock serializes clock scaling and submission of SCSI
and device commands but not the submission of TMFs. I think that the following
call chain allows user space to submit TMFs while clock scaling is in progress:

submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue
-> ufs_bsg_request()
   -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ)
     -> ufshcd_exec_raw_upiu_cmd()
       -> __ufshcd_issue_tm_cmd()

Is that a feature or is that rather unintended behavior?

Anyway, can you have a look at the patch below and verify whether it preserves
existing behavior?

Thank you,

Bart.


Subject: [PATCH] ufs: Simplify the clock scaling mechanism implementation

Scaling the clock is only safe while no commands are in progress. The
current clock scaling implementation uses hba->clk_scaling_lock to
serialize clock scaling against the following three functions:
* ufshcd_queuecommand()          (uses sdev->request_queue)
* ufshcd_exec_dev_cmd()          (uses hba->cmd_queue)
* ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue)

__ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not serialized
against clock scaling.

Use blk_mq_{un,}freeze_queue() to block submission of new commands and to
wait for ongoing commands to complete. This patch removes a semaphore
down and up operation pair from the hot path.

Cc: Bean Huo <beanhuo@micron.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
  drivers/scsi/ufs/ufshcd.c | 116 +++++++++++---------------------------
  drivers/scsi/ufs/ufshcd.h |   1 -
  2 files changed, 34 insertions(+), 83 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5314e8bfeeb6..46950cc87dc5 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -971,65 +971,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
  	return false;
  }

-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	unsigned long flags;
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_doorbell;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
-		ret = -EBUSY;
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_release(hba);
-	return ret;
-}
-
  /**
   * ufshcd_scale_gear - scale up/down UFS gear
   * @hba: per adapter instance
@@ -1079,27 +1020,50 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)

  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
  {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
-	int ret = 0;
+	unsigned long deadline = jiffies + HZ;
+	struct scsi_device *sdev;
+	int res = 0;
+
  	/*
  	 * make sure that there are no outstanding requests when
  	 * clock scaling is in progress
  	 */
-	ufshcd_scsi_block_requests(hba);
-	down_write(&hba->clk_scaling_lock);
-	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	blk_freeze_queue_start(hba->cmd_queue);
+	blk_freeze_queue_start(hba->tmf_queue);
+	shost_for_each_device(sdev, hba->host) {
+		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0) {
+			goto err;
+		}
  	}
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;

-	return ret;
+out:
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	return res;
+
+err:
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
+	res = -ETIMEDOUT;
+	goto out;
  }

  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
  {
-	up_write(&hba->clk_scaling_lock);
-	ufshcd_scsi_unblock_requests(hba);
+	struct scsi_device *sdev;
+
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
  }

  /**
@@ -2394,9 +2358,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
  		BUG();
  	}

-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
  	spin_lock_irqsave(hba->host->host_lock, flags);
  	switch (hba->ufshcd_state) {
  	case UFSHCD_STATE_OPERATIONAL:
@@ -2462,7 +2423,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
  out_unlock:
  	spin_unlock_irqrestore(hba->host->host_lock, flags);
  out:
-	up_read(&hba->clk_scaling_lock);
  	return err;
  }

@@ -2616,8 +2576,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
  	struct completion wait;
  	unsigned long flags;

-	down_read(&hba->clk_scaling_lock);
-
  	/*
  	 * Get free slot, sleep if slots are unavailable.
  	 * Even though we use wait_event() which sleeps indefinitely,
@@ -2653,7 +2611,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,

  out_put_tag:
  	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
  	return err;
  }

@@ -5771,8 +5728,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
  	unsigned long flags;
  	u32 upiu_flags;

-	down_read(&hba->clk_scaling_lock);
-
  	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
  	if (IS_ERR(req))
  		return PTR_ERR(req);
@@ -5854,7 +5809,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
  	}

  	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
  	return err;
  }

@@ -8323,8 +8277,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
  	/* Initialize mutex for device management commands */
  	mutex_init(&hba->dev_cmd.lock);

-	init_rwsem(&hba->clk_scaling_lock);
-
  	ufshcd_init_clk_gating(hba);

  	ufshcd_init_clk_scaling(hba);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 5865e16f53a6..5ebb920ae874 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -724,7 +724,6 @@ struct ufs_hba {
  	enum bkops_status urgent_bkops_lvl;
  	bool is_urgent_bkops_lvl_checked;

-	struct rw_semaphore clk_scaling_lock;
  	struct ufs_desc_size desc_size;
  	atomic_t scsi_block_reqs_cnt;
Bart Van Assche Nov. 18, 2019, 6:13 p.m. UTC | #9
On 11/14/19 10:01 PM, Can Guo wrote:
> On 2019-11-15 00:11, Bart Van Assche wrote:
>> On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote:
>>> I think, you are asking for comment from Can.  As for me, attached 
>>> patch is better.
>>> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell 
>>> register, Now
>>> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested 
>>> your V5 patches,
>>> didn't find problem yet.
>>>
>>> Since my available platform doesn't support dynamic clk scaling, I 
>>> think, now seems only
>>> Qcom UFS controllers support this feature. So we need Can Guo to 
>>> finally confirm it.
>>
>> Do you agree with this patch series if patch 4 is replaced by the
>> patch attached to my previous e-mail? The entire (revised) series is
>> available at https://github.com/bvanassche/linux/tree/ufs-for-next.
>
> After ufshcd_clock_scaling_prepare() returns(no error), all request 
> queues are frozen. If failure
> happens(say power mode change command fails) after this point and error 
> handler kicks off,
> we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back to 
> functionality.
> However, as the hba->cmd_queue is frozen, dev commands cannot be sent, 
> the error handler shall fail.

Hi Can,

My understanding of the current UFS driver code is that
ufshcd_clock_scaling_prepare() waits for ongoing commands to finish but 
not for SCSI error handling to finish. Would you agree with changing 
that behavior such that ufshcd_clock_scaling_prepare() returns an error
code if SCSI error handling is in progress? Do you agree that once that 
change has been made that it is fine to invoke blk_freeze_queue_start() 
for all three types of block layer request queues (SCSI commands, device 
management commands and TMFs)? Do you agree that this would fix the 
issue that it is possible today to submit TMFs from user space using 
through the BSG queue while clock scaling is in progress?

Thanks,

Bart.
Can Guo Nov. 19, 2019, 5:33 a.m. UTC | #10
On 2019-11-19 02:13, Bart Van Assche wrote:
> On 11/14/19 10:01 PM, Can Guo wrote:
>> On 2019-11-15 00:11, Bart Van Assche wrote:
>>> On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote:
>>>> I think, you are asking for comment from Can.  As for me, attached 
>>>> patch is better.
>>>> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell 
>>>> register, Now
>>>> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested 
>>>> your V5 patches,
>>>> didn't find problem yet.
>>>> 
>>>> Since my available platform doesn't support dynamic clk scaling, I 
>>>> think, now seems only
>>>> Qcom UFS controllers support this feature. So we need Can Guo to 
>>>> finally confirm it.
>>> 
>>> Do you agree with this patch series if patch 4 is replaced by the
>>> patch attached to my previous e-mail? The entire (revised) series is
>>> available at https://github.com/bvanassche/linux/tree/ufs-for-next.
>> 
>> After ufshcd_clock_scaling_prepare() returns(no error), all request 
>> queues are frozen. If failure
>> happens(say power mode change command fails) after this point and 
>> error handler kicks off,
>> we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back 
>> to functionality.
>> However, as the hba->cmd_queue is frozen, dev commands cannot be sent, 
>> the error handler shall fail.
> 
> Hi Can,
> 
> My understanding of the current UFS driver code is that
> ufshcd_clock_scaling_prepare() waits for ongoing commands to finish
> but not for SCSI error handling to finish. Would you agree with
> changing that behavior such that ufshcd_clock_scaling_prepare()
> returns an error
> code if SCSI error handling is in progress? Do you agree that once
> that change has been made that it is fine to invoke
> blk_freeze_queue_start() for all three types of block layer request
> queues (SCSI commands, device management commands and TMFs)? Do you
> agree that this would fix the issue that it is possible today to
> submit TMFs from user space using through the BSG queue while clock
> scaling is in progress?
> 
> Thanks,
> 
> Bart.

Hi Bart,

Your understanding is correct.

> Would you agree with changing that behavior such that 
> ufshcd_clock_scaling_prepare()
> returns an error code if SCSI error handling is in progress?

I believe we already have the check of ufshcd_eh_in_progress() in
ufshcd_devfreq_target() before call ufshcd_devfreq_scale().

> Do you agree that once that change has been made that it is fine to 
> invoke
> blk_freeze_queue_start() for all three types of block layer request
> queues (SCSI commands, device management commands and TMFs)?

I agree. As err handling work always runs in ASYNC way in current code 
base, it is fine to freeze all the queues.
If there is err handling work, scheduled during scaling, would 
eventually be unblocked when hba->cmd_queue
is unfrozen after scaling returns or fails.

Sorry for making you confused. My previous comment is based on that if 
we need to do hibern8
enter/exit in vendor ops (see 
https://lore.kernel.org/patchwork/patch/1143579/), and if hibern8
enter/exit fails during scaling, we need to call ufshcd_link_recovery() 
to recovery everything in SYNC way.
And as hba->cmd_queue is frozen, ufshcd_link_recovery() would hang when 
it needs to send device
manangement commands. In this case, we need to make sure hba->cmd_queue 
is NOT frozen.
So it seems contraditory.

FYI, even with the patch 
https://lore.kernel.org/patchwork/patch/1143579/, current
implementation also has this problem. And we have another change to 
address it by
making change to ufshcd_exec_dev_cmd() like below.

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c2a2ff2..81a000e 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4068,6 +4068,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
*hba,
         int tag;
         struct completion wait;
         unsigned long flags;
+       bool has_read_lock = false;

@@ -4075,8 +4076,10 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
*hba,

+       if (!ufshcd_eh_in_progress(hba)) {
                 down_read(&hba->lock);
+               has_read_lock = true;
+       }

         /*
          * Get free slot, sleep if slots are unavailable.
@@ -4110,7 +4113,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
*hba,
  out_put_tag:
         ufshcd_put_dev_cmd_tag(hba, tag);
         wake_up(&hba->dev_cmd.tag_wq);
+       if (has_read_lock)
                 up_read(&hba->lock);
         return err;
  }


Aside the functionality, we need to take this change carefully as it may 
relate with performance.
With the old implementation, when devfreq decides to scale up, we can 
make sure that all requests
sent to UFS device after this point will go through the highest UFS 
Gear. Scaling up will happen
right after doorbell is cleared, not even need to wait till the requests 
are freed from block layer.
And 1 sec is long enough to clean out the doorbell by HW.

In the new implementation of ufshcd_clock_scaling_prepare(), after 
blk_freeze_queue_start(), we call
blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to 
those requests which have already
been queued to HW doorbell, more requests will be dispatched within 1 
sec, through the lowest Gear.
My first impression is that it might be a bit lazy and I am not sure how 
much it may affect the benchmarks.

And if the request queues are heavily loaded(many bios are waiting for a 
free tag to become a request),
is 1 sec long enough to freeze all these request queues? If no, 
performance would be affected if scaling up
fails frequently.
As per my understanding, the request queue will become frozen only after 
all the existing requests and
bios(which are already waiting for a free tag on the queue) to be 
completed and freed from block layer.
Please correct me if I am wrong.

While, in the old implementatoin, when devfreq decides to scale up, 
scaling can always
happen for majority chances, execept for those unexpected HW issues.

Best Regards,
Can Guo
Bart Van Assche Nov. 19, 2019, 11:16 p.m. UTC | #11
On 11/18/19 9:33 PM, cang@codeaurora.org wrote:
> Aside the functionality, we need to take this change carefully as it may 
> relate with performance.
> With the old implementation, when devfreq decides to scale up, we can 
> make sure that all requests
> sent to UFS device after this point will go through the highest UFS 
> Gear. Scaling up will happen
> right after doorbell is cleared, not even need to wait till the requests 
> are freed from block layer.
> And 1 sec is long enough to clean out the doorbell by HW.
> 
> In the new implementation of ufshcd_clock_scaling_prepare(), after 
> blk_freeze_queue_start(), we call
> blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to 
> those requests which have already
> been queued to HW doorbell, more requests will be dispatched within 1 
> sec, through the lowest Gear.
> My first impression is that it might be a bit lazy and I am not sure how 
> much it may affect the benchmarks.
> 
> And if the request queues are heavily loaded(many bios are waiting for a 
> free tag to become a request),
> is 1 sec long enough to freeze all these request queues? If no, 
> performance would be affected if scaling up
> fails frequently.
> As per my understanding, the request queue will become frozen only after 
> all the existing requests and
> bios(which are already waiting for a free tag on the queue) to be 
> completed and freed from block layer.
> Please correct me if I am wrong.
> 
> While, in the old implementation, when devfreq decides to scale up, 
> scaling can always
> happen for majority chances, except for those unexpected HW issues.

Hi Can,

I agree with the description above of the current behavior of the UFS 
driver. But I do not agree with the interpretation of the behavior of 
the changes I proposed. The implementation in the block layer of request 
queue freezing is as follows:

void blk_freeze_queue_start(struct request_queue *q)
{
	mutex_lock(&q->mq_freeze_lock);
	if (++q->mq_freeze_depth == 1) {
		percpu_ref_kill(&q->q_usage_counter);
		mutex_unlock(&q->mq_freeze_lock);
		if (queue_is_mq(q))
			blk_mq_run_hw_queues(q, false);
	} else {
		mutex_unlock(&q->mq_freeze_lock);
	}
}

As long as calls from other threads to 
blk_mq_freeze_queue()/blk_mq_unfreeze_queue() are balanced, after 
blk_freeze_queue_start() returns it is guaranteed that 
q->q_usage_counter is in __PERCPU_REF_DEAD mode. Calls to 
blk_get_request() etc. block in that mode until the request queue is 
unfrozen. See also blk_queue_enter().

In other words, calling blk_freeze_queue() from inside 
ufshcd_clock_scaling_prepare() should preserve the current behavior, 
namely that ufshcd_clock_scaling_prepare() waits for ongoing requests to 
finish and also that submission of new requests is postponed until clock 
scaling has finished. Do you agree with this analysis?

Thanks,

Bart.
Can Guo Nov. 20, 2019, 3:38 a.m. UTC | #12
On 2019-11-20 07:16, Bart Van Assche wrote:
> On 11/18/19 9:33 PM, cang@codeaurora.org wrote:
>> Aside the functionality, we need to take this change carefully as it 
>> may relate with performance.
>> With the old implementation, when devfreq decides to scale up, we can 
>> make sure that all requests
>> sent to UFS device after this point will go through the highest UFS 
>> Gear. Scaling up will happen
>> right after doorbell is cleared, not even need to wait till the 
>> requests are freed from block layer.
>> And 1 sec is long enough to clean out the doorbell by HW.
>> 
>> In the new implementation of ufshcd_clock_scaling_prepare(), after 
>> blk_freeze_queue_start(), we call
>> blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to 
>> those requests which have already
>> been queued to HW doorbell, more requests will be dispatched within 1 
>> sec, through the lowest Gear.
>> My first impression is that it might be a bit lazy and I am not sure 
>> how much it may affect the benchmarks.
>> 
>> And if the request queues are heavily loaded(many bios are waiting for 
>> a free tag to become a request),
>> is 1 sec long enough to freeze all these request queues? If no, 
>> performance would be affected if scaling up
>> fails frequently.
>> As per my understanding, the request queue will become frozen only 
>> after all the existing requests and
>> bios(which are already waiting for a free tag on the queue) to be 
>> completed and freed from block layer.
>> Please correct me if I am wrong.
>> 
>> While, in the old implementation, when devfreq decides to scale up, 
>> scaling can always
>> happen for majority chances, except for those unexpected HW issues.
> 
> Hi Can,
> 
> I agree with the description above of the current behavior of the UFS
> driver. But I do not agree with the interpretation of the behavior of
> the changes I proposed. The implementation in the block layer of
> request queue freezing is as follows:
> 
> void blk_freeze_queue_start(struct request_queue *q)
> {
> 	mutex_lock(&q->mq_freeze_lock);
> 	if (++q->mq_freeze_depth == 1) {
> 		percpu_ref_kill(&q->q_usage_counter);
> 		mutex_unlock(&q->mq_freeze_lock);
> 		if (queue_is_mq(q))
> 			blk_mq_run_hw_queues(q, false);
> 	} else {
> 		mutex_unlock(&q->mq_freeze_lock);
> 	}
> }
> 
> As long as calls from other threads to
> blk_mq_freeze_queue()/blk_mq_unfreeze_queue() are balanced, after
> blk_freeze_queue_start() returns it is guaranteed that
> q->q_usage_counter is in __PERCPU_REF_DEAD mode. Calls to
> blk_get_request() etc. block in that mode until the request queue is
> unfrozen. See also blk_queue_enter().
> 
> In other words, calling blk_freeze_queue() from inside
> ufshcd_clock_scaling_prepare() should preserve the current behavior,
> namely that ufshcd_clock_scaling_prepare() waits for ongoing requests
> to finish and also that submission of new requests is postponed until
> clock scaling has finished. Do you agree with this analysis?
> 
> Thanks,
> 
> Bart.

Hi Bart,

I am still not quite clear about the blk_freeze_queue() part.

>> In the new implementation of ufshcd_clock_scaling_prepare(), after 
>> blk_freeze_queue_start(), we call
>> blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to 
>> those requests which have already
>> been queued to HW doorbell, more requests will be dispatched within 1 
>> sec, through the lowest Gear.
>> My first impression is that it might be a bit lazy and I am not sure 
>> how much it may affect the benchmarks.

First of all, reg above lines, do you agree that there can be latency in 
scaling up/down
comparing with the old implementation?

I can understand that after queue is frozen, all calls to 
blk_get_request()
are blocked, no submission can be made after this point, due to
blk_queue_enter() shall wait on q->mq_freeze_wq.

<--code snippet-->
wait_event(q->mq_freeze_wq,
            (atomic_read(&q->mq_freeze_depth) == 0 &&
<--code snippet-->

>> And if the request queues are heavily loaded(many bios are waiting for 
>> a free tag to become a request),
>> is 1 sec long enough to freeze all these request queue?

But here I meant those bios, before we call blk_freeze_queue_start(), 
sent through
submit_bio() calls which have already entered blk_mq_get_request() and 
already
returned from the blk_queue_enter_live(). These bios are waiting for a 
free tag
(waiting on func blk_mq_get_tag() when queue is full).
Shall the request queue be frozen before these bios are handled?

void blk_mq_freeze_queue_wait(struct request_queue *q)
{
  	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);

Per my understanding, these bios have already increased 
&q->q_usage_counter.
And the &q->q_usage_counter will only become 0 when all of the requests 
in the
queue and these bios(after they get free tags and turned into requests) 
are
completed from block layer, meaning after blk_queue_exit() is called in
__blk_mq_free_request() for all of them. Please correct me if I am 
wrong.

Thanks,

Can Guo.
Bart Van Assche Nov. 20, 2019, 5:58 p.m. UTC | #13
On 11/19/19 7:38 PM, cang@codeaurora.org wrote:
> On 2019-11-20 07:16, Bart Van Assche wrote:
>> On 11/18/19 9:33 PM, cang@codeaurora.org wrote:
[ ... ]
> 
> I am still not quite clear about the blk_freeze_queue() part.
> 
>>> In the new implementation of ufshcd_clock_scaling_prepare(), after 
>>> blk_freeze_queue_start(), we call blk_mq_freeze_queue_wait_timeout()
>>> to wait for 1 sec. In addition to those requests which have already
>>> been queued to HW doorbell, more requests will be dispatched within 1 
>>> sec, through the lowest Gear. My first impression is that it might be >>> a bit lazy and I am not sure how much it may affect the benchmarks.
> 
> First of all, reg above lines, do you agree that there can be latency in 
> scaling up/down
> comparing with the old implementation?
> 
> I can understand that after queue is frozen, all calls to blk_get_request()
> are blocked, no submission can be made after this point, due to
> blk_queue_enter() shall wait on q->mq_freeze_wq.
> 
> <--code snippet-->
> wait_event(q->mq_freeze_wq,
>             (atomic_read(&q->mq_freeze_depth) == 0 &&
> <--code snippet-->
> 
>>> And if the request queues are heavily loaded(many bios are waiting 
>>> for a free tag to become a request),
>>> is 1 sec long enough to freeze all these request queue?
> 
> But here I meant those bios, before we call blk_freeze_queue_start(), 
> sent through
> submit_bio() calls which have already entered blk_mq_get_request() and 
> already
> returned from the blk_queue_enter_live(). These bios are waiting for a 
> free tag
> (waiting on func blk_mq_get_tag() when queue is full).
> Shall the request queue be frozen before these bios are handled?
> 
> void blk_mq_freeze_queue_wait(struct request_queue *q)
> {
>       wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
> }
> EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
> 
> Per my understanding, these bios have already increased 
> &q->q_usage_counter.
> And the &q->q_usage_counter will only become 0 when all of the requests 
> in the
> queue and these bios(after they get free tags and turned into requests) are
> completed from block layer, meaning after blk_queue_exit() is called in
> __blk_mq_free_request() for all of them. Please correct me if I am wrong.

Hi Can,

Please have another look at the request queue freezing mechanism in the 
block layer. If blk_queue_enter() sleeps in wait_event() that implies 
either that the percpu_ref_tryget_live() call failed or that the 
percpu_ref_tryget_live() succeeded and that it was followed by a 
percpu_ref_put() call. Ignoring concurrent q_usage_counter changes, in 
both cases q->q_usage_counter will have the same value as before 
blk_queue_enter() started.

In other words, there shouldn't be a latency difference between the 
current and the proposed approach since neither approach waits for 
completion of bios or SCSI commands that are queued after clock scaling 
started.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 810b997f55fc..717ba24a2d40 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -290,16 +290,50 @@  static inline void ufshcd_disable_irq(struct ufs_hba *hba)
 	}
 }
 
-static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba)
+static void ufshcd_unblock_requests(struct ufs_hba *hba)
 {
-	if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt))
-		scsi_unblock_requests(hba->host);
+	struct scsi_device *sdev;
+
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
 }
 
-static void ufshcd_scsi_block_requests(struct ufs_hba *hba)
+static int ufshcd_block_requests(struct ufs_hba *hba, unsigned long timeout)
 {
-	if (atomic_inc_return(&hba->scsi_block_reqs_cnt) == 1)
-		scsi_block_requests(hba->host);
+	struct scsi_device *sdev;
+	unsigned long deadline = jiffies + timeout;
+
+	if (timeout == ULONG_MAX) {
+		shost_for_each_device(sdev, hba->host)
+			blk_mq_freeze_queue(sdev->request_queue);
+		blk_mq_freeze_queue(hba->cmd_queue);
+		blk_mq_freeze_queue(hba->tmf_queue);
+		return 0;
+	}
+
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	blk_freeze_queue_start(hba->cmd_queue);
+	blk_freeze_queue_start(hba->tmf_queue);
+	shost_for_each_device(sdev, hba->host) {
+		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0) {
+			goto err;
+		}
+	}
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	return 0;
+
+err:
+	ufshcd_unblock_requests(hba);
+	return -ETIMEDOUT;
 }
 
 static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
@@ -971,65 +1005,6 @@  static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
 	return false;
 }
 
-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	unsigned long flags;
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_doorbell;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
-		ret = -EBUSY;
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_release(hba);
-	return ret;
-}
-
 /**
  * ufshcd_scale_gear - scale up/down UFS gear
  * @hba: per adapter instance
@@ -1079,27 +1054,16 @@  static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
 
 static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
-	int ret = 0;
 	/*
 	 * make sure that there are no outstanding requests when
 	 * clock scaling is in progress
 	 */
-	ufshcd_scsi_block_requests(hba);
-	down_write(&hba->clk_scaling_lock);
-	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
-	}
-
-	return ret;
+	return ufshcd_block_requests(hba, HZ);
 }
 
 static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
 {
-	up_write(&hba->clk_scaling_lock);
-	ufshcd_scsi_unblock_requests(hba);
+	ufshcd_unblock_requests(hba);
 }
 
 /**
@@ -1471,7 +1435,7 @@  static void ufshcd_ungate_work(struct work_struct *work)
 		hba->clk_gating.is_suspended = false;
 	}
 unblock_reqs:
-	ufshcd_scsi_unblock_requests(hba);
+	ufshcd_unblock_requests(hba);
 }
 
 /**
@@ -1528,7 +1492,7 @@  int ufshcd_hold(struct ufs_hba *hba, bool async)
 		 */
 		/* fallthrough */
 	case CLKS_OFF:
-		ufshcd_scsi_block_requests(hba);
+		ufshcd_block_requests(hba, ULONG_MAX);
 		hba->clk_gating.state = REQ_CLKS_ON;
 		trace_ufshcd_clk_gating(dev_name(hba->dev),
 					hba->clk_gating.state);
@@ -2395,9 +2359,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		BUG();
 	}
 
-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	switch (hba->ufshcd_state) {
 	case UFSHCD_STATE_OPERATIONAL:
@@ -2463,7 +2424,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -2617,8 +2577,6 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	struct completion wait;
 	unsigned long flags;
 
-	down_read(&hba->clk_scaling_lock);
-
 	/*
 	 * Get free slot, sleep if slots are unavailable.
 	 * Even though we use wait_event() which sleeps indefinitely,
@@ -2654,7 +2612,6 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
 out_put_tag:
 	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -5342,7 +5299,7 @@  static void ufshcd_err_handler(struct work_struct *work)
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	if (starget)
 		scsi_target_unblock(&starget->dev, SDEV_RUNNING);
-	ufshcd_scsi_unblock_requests(hba);
+	scsi_unblock_requests(hba->host);
 	ufshcd_release(hba);
 	pm_runtime_put_sync(hba->dev);
 }
@@ -5466,7 +5423,7 @@  static void ufshcd_check_errors(struct ufs_hba *hba)
 		/* handle fatal errors only when link is functional */
 		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
 			/* block commands from scsi mid-layer */
-			ufshcd_scsi_block_requests(hba);
+			scsi_block_requests(hba->host);
 
 			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
 
@@ -5772,8 +5729,6 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	unsigned long flags;
 	u32 upiu_flags;
 
-	down_read(&hba->clk_scaling_lock);
-
 	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -5853,7 +5808,6 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	}
 
 	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -8322,8 +8276,6 @@  int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	/* Initialize mutex for device management commands */
 	mutex_init(&hba->dev_cmd.lock);
 
-	init_rwsem(&hba->clk_scaling_lock);
-
 	ufshcd_init_clk_gating(hba);
 
 	ufshcd_init_clk_scaling(hba);
@@ -8410,7 +8362,6 @@  int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	/* Hold auto suspend until async scan completes */
 	pm_runtime_get_sync(dev);
-	atomic_set(&hba->scsi_block_reqs_cnt, 0);
 	/*
 	 * We are assuming that device wasn't put in sleep/power-down
 	 * state exclusively during the boot stage before kernel.
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 5865e16f53a6..c44bfcdb26a3 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -520,7 +520,6 @@  struct ufs_stats {
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
- * @scsi_block_reqs_cnt: reference counting for scsi block requests
  */
 struct ufs_hba {
 	void __iomem *mmio_base;
@@ -724,9 +723,7 @@  struct ufs_hba {
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
 
-	struct rw_semaphore clk_scaling_lock;
 	struct ufs_desc_size desc_size;
-	atomic_t scsi_block_reqs_cnt;
 
 	struct device		bsg_dev;
 	struct request_queue	*bsg_queue;