diff mbox series

[v6,4/4] ufs: Simplify the clock scaling mechanism implementation

Message ID 20191121220850.16195-5-bvanassche@acm.org (mailing list archive)
State Changes Requested
Headers show
Series Simplify and optimize the UFS driver | expand

Commit Message

Bart Van Assche Nov. 21, 2019, 10:08 p.m. UTC
Scaling the clock is only safe while no commands are in progress. The
current clock scaling implementation uses hba->clk_scaling_lock to
serialize clock scaling against the following three functions:
* ufshcd_queuecommand()          (uses sdev->request_queue)
* ufshcd_exec_dev_cmd()          (uses hba->cmd_queue)
* ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue)

__ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not yet serialized
against clock scaling.

Use blk_mq_{un,}freeze_queue() to block submission of new commands and to
wait for ongoing commands to complete. This patch removes a semaphore
down and up operation pair from the hot path. This patch fixes a bug by
disallowing that TMFs are submitted as follows from user space while
clock scaling is in progress:

submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue
-> ufs_bsg_request()
  -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ)
    -> ufshcd_exec_raw_upiu_cmd()
      -> __ufshcd_issue_tm_cmd()

Cc: Can Guo <cang@codeaurora.org>
Cc: Bean Huo <beanhuo@micron.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 124 ++++++++++++--------------------------
 drivers/scsi/ufs/ufshcd.h |   1 -
 2 files changed, 40 insertions(+), 85 deletions(-)

Comments

Can Guo Nov. 25, 2019, 7:38 a.m. UTC | #1
On 2019-11-22 06:08, Bart Van Assche wrote:
> Scaling the clock is only safe while no commands are in progress. The
> current clock scaling implementation uses hba->clk_scaling_lock to
> serialize clock scaling against the following three functions:
> * ufshcd_queuecommand()          (uses sdev->request_queue)
> * ufshcd_exec_dev_cmd()          (uses hba->cmd_queue)
> * ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue)
> 
> __ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not yet 
> serialized
> against clock scaling.
> 
> Use blk_mq_{un,}freeze_queue() to block submission of new commands and 
> to
> wait for ongoing commands to complete. This patch removes a semaphore
> down and up operation pair from the hot path. This patch fixes a bug by
> disallowing that TMFs are submitted as follows from user space while
> clock scaling is in progress:
> 
> submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue
> -> ufs_bsg_request()
>   -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ)
>     -> ufshcd_exec_raw_upiu_cmd()
>       -> __ufshcd_issue_tm_cmd()
> 
> Cc: Can Guo <cang@codeaurora.org>
> Cc: Bean Huo <beanhuo@micron.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 124 ++++++++++++--------------------------
>  drivers/scsi/ufs/ufshcd.h |   1 -
>  2 files changed, 40 insertions(+), 85 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 99337e0b54f6..472d164f6ae6 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -971,65 +971,6 @@ static bool
> ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>  	return false;
>  }
> 
> -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -					u64 wait_timeout_us)
> -{
> -	unsigned long flags;
> -	int ret = 0;
> -	u32 tm_doorbell;
> -	u32 tr_doorbell;
> -	bool timeout = false, do_last_check = false;
> -	ktime_t start;
> -
> -	ufshcd_hold(hba, false);
> -	spin_lock_irqsave(hba->host->host_lock, flags);
> -	/*
> -	 * Wait for all the outstanding tasks/transfer requests.
> -	 * Verify by checking the doorbell registers are clear.
> -	 */
> -	start = ktime_get();
> -	do {
> -		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> -			ret = -EBUSY;
> -			goto out;
> -		}
> -
> -		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -		if (!tm_doorbell && !tr_doorbell) {
> -			timeout = false;
> -			break;
> -		} else if (do_last_check) {
> -			break;
> -		}
> -
> -		spin_unlock_irqrestore(hba->host->host_lock, flags);
> -		schedule();
> -		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> -		    wait_timeout_us) {
> -			timeout = true;
> -			/*
> -			 * We might have scheduled out for long time so make
> -			 * sure to check if doorbells are cleared by this time
> -			 * or not.
> -			 */
> -			do_last_check = true;
> -		}
> -		spin_lock_irqsave(hba->host->host_lock, flags);
> -	} while (tm_doorbell || tr_doorbell);
> -
> -	if (timeout) {
> -		dev_err(hba->dev,
> -			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -			__func__, tm_doorbell, tr_doorbell);
> -		ret = -EBUSY;
> -	}
> -out:
> -	spin_unlock_irqrestore(hba->host->host_lock, flags);
> -	ufshcd_release(hba);
> -	return ret;
> -}
> -
>  /**
>   * ufshcd_scale_gear - scale up/down UFS gear
>   * @hba: per adapter instance
> @@ -1079,27 +1020,54 @@ static int ufshcd_scale_gear(struct ufs_hba
> *hba, bool scale_up)
> 
>  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
>  {
> -	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
> -	int ret = 0;
> +	unsigned long deadline = jiffies + HZ;
> +	struct scsi_device *sdev;
> +	int res = 0;
> +
>  	/*
> -	 * make sure that there are no outstanding requests when
> -	 * clock scaling is in progress
> +	 * Make sure that no requests are outstanding while clock scaling is 
> in
> +	 * progress. If SCSI error recovery would start while this function 
> is
> +	 * in progress then
> +	 * blk_mq_freeze_queue_wait_timeout(sdev->request_queue) will either
> +	 * wait until error handling has finished or will time out.
>  	 */
> -	ufshcd_scsi_block_requests(hba);
> -	down_write(&hba->clk_scaling_lock);
> -	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
> -		ret = -EBUSY;
> -		up_write(&hba->clk_scaling_lock);
> -		ufshcd_scsi_unblock_requests(hba);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_freeze_queue_start(sdev->request_queue);
> +	blk_freeze_queue_start(hba->cmd_queue);
> +	blk_freeze_queue_start(hba->tmf_queue);
> +	shost_for_each_device(sdev, hba->host) {
> +		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0) {
> +			goto err;
> +		}
>  	}
> +	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0)
> +		goto err;
> +	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
> +				max_t(long, 0, deadline - jiffies)) <= 0)
> +		goto err;
> 
> -	return ret;
> +out:
> +	return res;
> +
> +err:
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
> +	res = -ETIMEDOUT;
> +	goto out;
>  }
> 
>  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
>  {
> -	up_write(&hba->clk_scaling_lock);
> -	ufshcd_scsi_unblock_requests(hba);
> +	struct scsi_device *sdev;
> +
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
>  }
> 
>  /**
> @@ -2394,9 +2362,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  		BUG();
>  	}
> 
> -	if (!down_read_trylock(&hba->clk_scaling_lock))
> -		return SCSI_MLQUEUE_HOST_BUSY;
> -
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	switch (hba->ufshcd_state) {
>  	case UFSHCD_STATE_OPERATIONAL:
> @@ -2462,7 +2427,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  out_unlock:
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  out:
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -2616,8 +2580,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
> *hba,
>  	struct completion wait;
>  	unsigned long flags;
> 
> -	down_read(&hba->clk_scaling_lock);
> -
>  	/*
>  	 * Get free slot, sleep if slots are unavailable.
>  	 * Even though we use wait_event() which sleeps indefinitely,
> @@ -2653,7 +2615,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba 
> *hba,
> 
>  out_put_tag:
>  	blk_put_request(req);
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -5845,8 +5806,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  	unsigned long flags;
>  	u32 upiu_flags;
> 
> -	down_read(&hba->clk_scaling_lock);
> -
>  	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
>  	if (IS_ERR(req))
>  		return PTR_ERR(req);
> @@ -5928,7 +5887,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  	}
> 
>  	blk_put_request(req);
> -	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> 
> @@ -8397,8 +8355,6 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
>  	/* Initialize mutex for device management commands */
>  	mutex_init(&hba->dev_cmd.lock);
> 
> -	init_rwsem(&hba->clk_scaling_lock);
> -
>  	ufshcd_init_clk_gating(hba);
> 
>  	ufshcd_init_clk_scaling(hba);
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 53bfe175342c..bbdef1153257 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -724,7 +724,6 @@ struct ufs_hba {
>  	enum bkops_status urgent_bkops_lvl;
>  	bool is_urgent_bkops_lvl_checked;
> 
> -	struct rw_semaphore clk_scaling_lock;
>  	struct ufs_desc_size desc_size;
>  	atomic_t scsi_block_reqs_cnt;

Hi Bart,

Sorry to bring back the old discussion we had on V5 of this series here.
I spent some time understanding the mq freeze queue implementation but I
still have the same concern regards the your new implementation of clock
scaling prepare/unprepare.

I attached two logs I collected on my setups(the kernel is 5.4).

In log1.txt, I pretty much ported your changes to our code base and copy
a large file to partition /dev/sde1(mounted to /mnt). You can see the
debug logs I added show that the -ETIMEDOUT returned from 
ufshcd_devfreq_scale(),
although scaling UP/DOWN succeed some times.

To make it more controllable, I made a new sysfs entry, namely 
freeze_sde_queue,
so that I can freeze the queue of /dev/sde whenever I do "echo 1 > 
freeze_sde_queue".
After call blk_freeze_queue_start(), call 
blk_mq_freeze_queue_wait_timeout() to wait for 1 sec.
In the end, blk_mq_unfreeze_queue() would be called no matter timeout 
happened or not.

I also added a flag, called enable_print, to request_queue, and set it 
after I call
blk_freeze_queue_start(). Block layer, when this flag is set, will print 
logs from
blk_mq_start_request() and blk_mq_get_request().

I tried multiple times during copy large a file to partition 
/dev/sde1(mounted to /mnt).

In log2.txt, there are still wait timeout, and you can see after 
blk_freeze_queue_start()
is called for request queue of /dev/sde, there still can be requests 
sent down to UFS driver
and these requests are the "latency" that I mentioned in our previous 
discussion. I know
these requests are not "new" requests reach block layer after freeze 
starts,  but the ones
in the queue before freeze starts. However, they are the difference from 
the old implementation.
When scaling up should happen, these requests are still sent through the 
lowest HS Gear,
while when scaling down should happen, these requests are sent through 
the highest HS Gear.
The former may cause performance issue while the later may cause power 
penalty, this is our
major concern.

Regards,

Can Guo
/ # mkdir mnt
/ # mount -t ext2 /dev/sde1 /mnt
[  198.754458] EXT4-fs (sde1): mounting ext2 file system using the ext4 subsystem
[  198.822427] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  198.975093] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  199.059635] EXT4-fs (sde1): mounted filesystem without journal. Opts: (null)
/ # dd if=/dev/zero of=/mnt/out.bin bs=4k count=2000000
[  220.138645] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  224.594479] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  225.626135] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return 0
[  229.359427] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  229.438562] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling DOWN
[  229.587416] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling DOWN return 0
[  230.962677] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  244.959885] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  258.791583] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  259.978750] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  260.130802] devfreq devfreq0: dvfs failed with (-110) error
[  260.304187] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  261.446437] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  261.666333] devfreq devfreq0: dvfs failed with (-110) error
[  261.890562] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  263.107531] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  263.322333] devfreq devfreq0: dvfs failed with (-110) error
[  263.546395] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  264.742125] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  264.910260] devfreq devfreq0: dvfs failed with (-110) error
[  265.082708] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  266.243177] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  266.394197] devfreq devfreq0: dvfs failed with (-110) error
[  266.563729] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  267.858395] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  268.014208] devfreq devfreq0: dvfs failed with (-110) error
[  268.182677] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  269.315166] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  269.470760] devfreq devfreq0: dvfs failed with (-110) error
[  269.638635] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  270.786989] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  271.005979] devfreq devfreq0: dvfs failed with (-110) error
[  271.230572] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  272.387104] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  272.612770] devfreq devfreq0: dvfs failed with (-110) error
[  272.834979] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  274.051291] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  274.278187] devfreq devfreq0: dvfs failed with (-110) error
[  274.494604] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  276.125739] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  276.333937] devfreq devfreq0: dvfs failed with (-110) error
[  276.506343] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  277.652718] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  277.877906] devfreq devfreq0: dvfs failed with (-110) error
[  278.102343] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  279.299031] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  279.518197] devfreq devfreq0: dvfs failed with (-110) error
[  279.783635] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  280.931010] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  281.198197] devfreq devfreq0: dvfs failed with (-110) error
[  281.379187] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  282.595187] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  282.818166] devfreq devfreq0: dvfs failed with (-110) error
[  283.046208] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  284.290968] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  284.509979] devfreq devfreq0: dvfs failed with (-110) error
[  284.770520] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  285.986916] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  286.274187] devfreq devfreq0: dvfs failed with (-110) error
[  286.494343] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  287.682947] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  287.914156] devfreq devfreq0: dvfs failed with (-110) error
[  288.179000] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  289.314833] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  289.598145] devfreq devfreq0: dvfs failed with (-110) error
[  289.818312] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  291.042885] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  291.265937] devfreq devfreq0: dvfs failed with (-110) error
[  291.486270] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  292.706885] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  292.929937] devfreq devfreq0: dvfs failed with (-110) error
[  293.174541] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  294.338906] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  294.625927] devfreq devfreq0: dvfs failed with (-110) error
[  294.818562] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  295.970729] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  296.118187] devfreq devfreq0: dvfs failed with (-110) error
[  296.290531] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  297.442687] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  297.590166] devfreq devfreq0: dvfs failed with (-110) error
[  297.758583] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  298.914927] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return -110
[  299.058166] devfreq devfreq0: dvfs failed with (-110) error
[  299.226500] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  299.396447] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return 0
[  299.602895] ufshcd-qcom ufshc: __ufshcd_suspend_clkscaling: suspend scaling
[  299.672531] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling DOWN
[  299.986218] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling DOWN return 0
[  300.270437] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP
[  301.474229] ufshcd-qcom ufshc: ufshcd_devfreq_target: scaling UP return 0
/ # mkdir mnt
/ # mount -t ext2 /dev/sde1 /mnt
[  181.163458] EXT4-fs (sde1): mounting ext2 file system using the ext4 subsystem
[  181.319166] EXT4-fs (sde1): mounted filesystem without journal. Opts: (null)
/ # dd if=/dev/zero of=/mnt/out.bin bs=4k count=1000000 &
/ # cd sys/devices/platform/soc/ufshc/
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  223.523229] ufshcd-qcom ufshc: before start, queue hctx queued 235
[  223.650625] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  223.778802] ufshcd-qcom ufshc: after start, queue hctx queued 235
[  223.910354] ufshcd-qcom ufshc: queue forzen, queue usage counter: 0
[  224.038729] ufshcd-qcom ufshc: after queue frozen, queue hctx queued 235
[  224.178625] ufshcd-qcom ufshc: unfreeze it
[  224.274364] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  227.492604] ufshcd-qcom ufshc: before start, queue hctx queued 235
[  227.626635] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  227.722291] ufshcd-qcom ufshc: after start, queue hctx queued 236
[  227.850156] ufshcd-qcom ufshc: queue forzen, queue usage counter: 0
[  227.978604] ufshcd-qcom ufshc: after queue frozen, queue hctx queued 236
[  228.118583] ufshcd-qcom ufshc: unfreeze it
[  228.210250] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  234.104083] ufshcd-qcom ufshc: before start, queue hctx queued 236
[  234.242031] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  234.366291] ufshcd-qcom ufshc: after start, queue hctx queued 236
[  234.494114] ufshcd-qcom ufshc: queue forzen, queue usage counter: 0
[  234.626104] ufshcd-qcom ufshc: after queue frozen, queue hctx queued 236
[  234.762020] ufshcd-qcom ufshc: unfreeze it
[  234.850927] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  236.615562] ufshcd-qcom ufshc: before start, queue hctx queued 236
[  236.746041] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  236.870291] ufshcd-qcom ufshc: after start, queue hctx queued 236
[  237.002156] ufshcd-qcom ufshc: queue forzen, queue usage counter: 0
[  237.130604] ufshcd-qcom ufshc: after queue frozen, queue hctx queued 236
[  237.266041] ufshcd-qcom ufshc: unfreeze it
[  237.354208] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  240.903010] ufshcd-qcom ufshc: before start, queue hctx queued 318
[  241.042052] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  241.170322] ufshcd-qcom ufshc: after start, queue hctx queued 318
[  241.236625] blk_mq_start_request: Issue rq tag #6, internal-tag #62 on queue 0:0:0:4
[  241.311802] blk_mq_start_request: Issue rq tag #7, internal-tag #63 on queue 0:0:0:4
[  241.386489] blk_mq_start_request: Issue rq tag #8, internal-tag #0 on queue 0:0:0:4
[  241.462031] blk_mq_start_request: Issue rq tag #9, internal-tag #1 on queue 0:0:0:4
[  241.547739] blk_mq_start_request: Issue rq tag #10, internal-tag #2 on queue 0:0:0:4
[  241.635062] blk_mq_start_request: Issue rq tag #11, internal-tag #3 on queue 0:0:0:4
[  241.722114] blk_mq_start_request: Issue rq tag #12, internal-tag #4 on queue 0:0:0:4
[  241.808489] blk_mq_start_request: Issue rq tag #13, internal-tag #5 on queue 0:0:0:4
[  241.895708] blk_mq_start_request: Issue rq tag #14, internal-tag #6 on queue 0:0:0:4
[  241.975239] blk_mq_get_request: Create new rq tag #-1, internal-tag #32 on queue 0:0:0:4
[  242.052218] blk_mq_start_request: Issue rq tag #15, internal-tag #7 on queue 0:0:0:4
[  242.126250] blk_mq_start_request: Issue rq tag #16, internal-tag #8 on queue 0:0:0:4
[  242.200583] blk_mq_start_request: Issue rq tag #17, internal-tag #9 on queue 0:0:0:4
[  242.274572] blk_mq_start_request: Issue rq tag #18, internal-tag #10 on queue 0:0:0:4
[  242.352375] blk_mq_start_request: Issue rq tag #19, internal-tag #11 on queue 0:0:0:4
[  242.426781] blk_mq_start_request: Issue rq tag #20, internal-tag #12 on queue 0:0:0:4
[  242.508093] blk_mq_start_request: Issue rq tag #21, internal-tag #13 on queue 0:0:0:4
[  242.595218] blk_mq_start_request: Issue rq tag #22, internal-tag #14 on queue 0:0:0:4
[  242.682479] blk_mq_start_request: Issue rq tag #23, internal-tag #20 on queue 0:0:0:4
[  242.768677] blk_mq_start_request: Issue rq tag #29, internal-tag #15 on queue 0:0:0:4
[  242.855843] blk_mq_start_request: Issue rq tag #31, internal-tag #21 on queue 0:0:0:4
[  242.930010] ufshcd-qcom ufshc: waiting timed out! queue usage counter: 41, hctx queued 318, unfreeze it
[  243.024562] blk_mq_start_request: Issue rq tag #24, internal-tag #22 on queue 0:0:0:4
[  243.099458] blk_mq_start_request: Issue rq tag #25, internal-tag #23 on queue 0:0:0:4
[  243.174427] blk_mq_start_request: Issue rq tag #26, internal-tag #24 on queue 0:0:0:4
[  243.248020] blk_mq_start_request: Issue rq tag #27, internal-tag #25 on queue 0:0:0:4
[  243.324031] blk_mq_start_request: Issue rq tag #28, internal-tag #26 on queue 0:0:0:4
[  243.398229] blk_mq_start_request: Issue rq tag #30, internal-tag #27 on queue 0:0:0:4
[  243.472364] blk_mq_start_request: Issue rq tag #0, internal-tag #28 on queue 0:0:0:4
[  243.555427] blk_mq_start_request: Issue rq tag #1, internal-tag #32 on queue 0:0:0:4
[  243.713864] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  252.571864] ufshcd-qcom ufshc: before start, queue hctx queued 449
[  252.702343] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  252.870291] ufshcd-qcom ufshc: after start, queue hctx queued 452
[  252.936531] blk_mq_start_request: Issue rq tag #12, internal-tag #60 on queue 0:0:0:4
[  253.012052] blk_mq_start_request: Issue rq tag #13, internal-tag #61 on queue 0:0:0:4
[  253.087562] blk_mq_start_request: Issue rq tag #14, internal-tag #62 on queue 0:0:0:4
[  253.162125] blk_mq_start_request: Issue rq tag #15, internal-tag #63 on queue 0:0:0:4
[  253.243677] blk_mq_start_request: Issue rq tag #24, internal-tag #2 on queue 0:0:0:4
[  253.331062] blk_mq_start_request: Issue rq tag #16, internal-tag #9 on queue 0:0:0:4
[  253.419395] blk_mq_start_request: Issue rq tag #17, internal-tag #10 on queue 0:0:0:4
[  253.506552] blk_mq_start_request: Issue rq tag #18, internal-tag #11 on queue 0:0:0:4
[  253.593833] blk_mq_start_request: Issue rq tag #25, internal-tag #48 on queue 0:0:0:4
[  253.680479] blk_mq_start_request: Issue rq tag #26, internal-tag #49 on queue 0:0:0:4
[  253.767750] blk_mq_start_request: Issue rq tag #29, internal-tag #50 on queue 0:0:0:4
[  253.855885] blk_mq_start_request: Issue rq tag #31, internal-tag #51 on queue 0:0:0:4
[  253.943510] blk_mq_start_request: Issue rq tag #19, internal-tag #12 on queue 0:0:0:4
[  254.030812] blk_mq_start_request: Issue rq tag #20, internal-tag #26 on queue 0:0:0:4
[  254.118500] blk_mq_start_request: Issue rq tag #21, internal-tag #27 on queue 0:0:0:4
[  254.204281] blk_mq_start_request: Issue rq tag #22, internal-tag #13 on queue 0:0:0:4
[  254.291479] blk_mq_start_request: Issue rq tag #23, internal-tag #14 on queue 0:0:0:4
[  254.379531] blk_mq_start_request: Issue rq tag #27, internal-tag #15 on queue 0:0:0:4
[  254.467291] blk_mq_start_request: Issue rq tag #28, internal-tag #52 on queue 0:0:0:4
[  254.554489] blk_mq_start_request: Issue rq tag #30, internal-tag #53 on queue 0:0:0:4
[  254.641791] blk_mq_start_request: Issue rq tag #0, internal-tag #54 on queue 0:0:0:4
[  254.728041] blk_mq_start_request: Issue rq tag #1, internal-tag #8 on queue 0:0:0:4
[  254.815572] blk_mq_start_request: Issue rq tag #2, internal-tag #3 on queue 0:0:0:4
[  254.903697] blk_mq_start_request: Issue rq tag #3, internal-tag #4 on queue 0:0:0:4
[  254.991114] blk_mq_start_request: Issue rq tag #4, internal-tag #5 on queue 0:0:0:4
[  255.062916] ufshcd-qcom ufshc: waiting timed out! queue usage counter: 34, hctx queued 452, unfreeze it
[  255.156281] blk_mq_start_request: Issue rq tag #5, internal-tag #6 on queue 0:0:0:4
[  255.228458] blk_mq_start_request: Issue rq tag #6, internal-tag #7 on queue 0:0:0:4
[  255.401906] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  269.744395] ufshcd-qcom ufshc: before start, queue hctx queued 661
[  269.883302] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  270.014958] ufshcd-qcom ufshc: after start, queue hctx queued 661
[  270.146031] blk_mq_start_request: Issue rq tag #22, internal-tag #4 on queue 0:0:0:4
[  270.286281] blk_mq_start_request: Issue rq tag #27, internal-tag #52 on queue 0:0:0:4
[  270.359208] blk_mq_start_request: Issue rq tag #23, internal-tag #53 on queue 0:0:0:4
[  270.434458] blk_mq_start_request: Issue rq tag #28, internal-tag #14 on queue 0:0:0:4
[  270.506604] blk_mq_start_request: Issue rq tag #30, internal-tag #40 on queue 0:0:0:4
[  270.585781] blk_mq_start_request: Issue rq tag #0, internal-tag #47 on queue 0:0:0:4
[  270.658250] blk_mq_start_request: Issue rq tag #1, internal-tag #32 on queue 0:0:0:4
[  270.731364] blk_mq_start_request: Issue rq tag #2, internal-tag #33 on queue 0:0:0:4
[  270.866937] blk_mq_start_request: Issue rq tag #3, internal-tag #34 on queue 0:0:0:4
[  271.006010] blk_mq_start_request: Issue rq tag #4, internal-tag #54 on queue 0:0:0:4
[  271.076541] blk_mq_start_request: Issue rq tag #5, internal-tag #16 on queue 0:0:0:4
[  271.155729] blk_mq_start_request: Issue rq tag #6, internal-tag #17 on queue 0:0:0:4
[  271.230083] blk_mq_start_request: Issue rq tag #7, internal-tag #18 on queue 0:0:0:4
[  271.304635] blk_mq_start_request: Issue rq tag #8, internal-tag #19 on queue 0:0:0:4
[  271.379385] blk_mq_start_request: Issue rq tag #9, internal-tag #20 on queue 0:0:0:4
[  271.514822] blk_mq_start_request: Issue rq tag #10, internal-tag #35 on queue 0:0:0:4
[  271.586781] blk_mq_start_request: Issue rq tag #11, internal-tag #55 on queue 0:0:0:4
[  271.724593] blk_mq_start_request: Issue rq tag #12, internal-tag #36 on queue 0:0:0:4
[  271.798916] blk_mq_start_request: Issue rq tag #13, internal-tag #37 on queue 0:0:0:4
[  271.870697] blk_mq_start_request: Issue rq tag #14, internal-tag #38 on queue 0:0:0:4
[  271.943812] ufshcd-qcom ufshc: waiting timed out! queue usage counter: 42, hctx queued 661, unfreeze it
[  272.040614] blk_mq_start_request: Issue rq tag #16, internal-tag #56 on queue 0:0:0:4
[  272.115145] blk_mq_start_request: Issue rq tag #24, internal-tag #57 on queue 0:0:0:4
[  272.187260] blk_mq_start_request: Issue rq tag #15, internal-tag #58 on queue 0:0:0:4
[  272.323072] blk_mq_start_request: Issue rq tag #17, internal-tag #0 on queue 0:0:0:4
[  272.393875] blk_mq_start_request: Issue rq tag #18, internal-tag #21 on queue 0:0:0:4
[  272.466552] blk_mq_start_request: Issue rq tag #19, internal-tag #59 on queue 0:0:0:4
[  272.540156] blk_mq_get_request: Create new rq tag #-1, internal-tag #23 on queue 0:0:0:4
[  272.617875] blk_mq_start_request: Issue rq tag #25, internal-tag #60 on queue 0:0:0:4
[  272.691135] blk_mq_start_request: Issue rq tag #26, internal-tag #22 on queue 0:0:0:4
[  272.766687] blk_mq_start_request: Issue rq tag #20, internal-tag #23 on queue 0:0:0:4
[  273.002062] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  275.350739] ufshcd-qcom ufshc: before start, queue hctx queued 723
[  275.486645] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  275.618437] ufshcd-qcom ufshc: after start, queue hctx queued 723
[  275.746500] blk_mq_start_request: Issue rq tag #27, internal-tag #31 on queue 0:0:0:4
[  275.882062] blk_mq_start_request: Issue rq tag #22, internal-tag #16 on queue 0:0:0:4
[  275.955062] blk_mq_start_request: Issue rq tag #28, internal-tag #17 on queue 0:0:0:4
[  276.035187] blk_mq_start_request: Issue rq tag #30, internal-tag #18 on queue 0:0:0:4
[  276.109812] blk_mq_start_request: Issue rq tag #23, internal-tag #54 on queue 0:0:0:4
[  276.184437] blk_mq_start_request: Issue rq tag #0, internal-tag #8 on queue 0:0:0:4
[  276.256041] blk_mq_start_request: Issue rq tag #1, internal-tag #9 on queue 0:0:0:4
[  276.390437] blk_mq_start_request: Issue rq tag #2, internal-tag #10 on queue 0:0:0:4
[  276.462760] blk_mq_start_request: Issue rq tag #3, internal-tag #11 on queue 0:0:0:4
[  276.540416] blk_mq_start_request: Issue rq tag #4, internal-tag #19 on queue 0:0:0:4
[  276.614635] blk_mq_start_request: Issue rq tag #5, internal-tag #20 on queue 0:0:0:4
[  276.687593] blk_mq_start_request: Issue rq tag #6, internal-tag #12 on queue 0:0:0:4
[  276.758479] blk_mq_start_request: Issue rq tag #7, internal-tag #13 on queue 0:0:0:4
[  276.831520] blk_mq_start_request: Issue rq tag #8, internal-tag #14 on queue 0:0:0:4
[  276.906458] blk_mq_start_request: Issue rq tag #9, internal-tag #15 on queue 0:0:0:4
[  276.979562] blk_mq_start_request: Issue rq tag #10, internal-tag #55 on queue 0:0:0:4
[  277.115333] blk_mq_start_request: Issue rq tag #11, internal-tag #56 on queue 0:0:0:4
[  277.243791] blk_mq_start_request: Issue rq tag #12, internal-tag #0 on queue 0:0:0:4
[  277.316218] blk_mq_start_request: Issue rq tag #13, internal-tag #35 on queue 0:0:0:4
[  277.391656] blk_mq_start_request: Issue rq tag #16, internal-tag #36 on queue 0:0:0:4
[  277.465708] blk_mq_start_request: Issue rq tag #14, internal-tag #37 on queue 0:0:0:4
[  277.544125] blk_mq_start_request: Issue rq tag #24, internal-tag #38 on queue 0:0:0:4
[  277.619552] blk_mq_start_request: Issue rq tag #15, internal-tag #57 on queue 0:0:0:4
[  277.694541] blk_mq_start_request: Issue rq tag #17, internal-tag #21 on queue 0:0:0:4
[  277.767677] ufshcd-qcom ufshc: waiting timed out! queue usage counter: 36, hctx queued 723, unfreeze it
[  277.920041] blk_mq_start_request: Issue rq tag #18, internal-tag #58 on queue 0:0:0:4
[  277.992354] blk_mq_start_request: Issue rq tag #19, internal-tag #59 on queue 0:0:0:4
[  278.067250] blk_mq_get_request: Create new rq tag #-1, internal-tag #22 on queue 0:0:0:4
[  278.202760] blk_mq_start_request: Issue rq tag #25, internal-tag #22 on queue 0:0:0:4
[  278.320447] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc # echo 1 > freeze_sde_queue
[  281.112052] ufshcd-qcom ufshc: before start, queue hctx queued 730
[  281.266468] ufshcd-qcom ufshc: freeze queue of sdev 0:0:0:4 start
[  281.374364] ufshcd-qcom ufshc: after start, queue hctx queued 730
[  281.502104] ufshcd-qcom ufshc: queue forzen, queue usage counter: 0
[  281.638125] ufshcd-qcom ufshc: after queue frozen, queue hctx queued 730
[  281.770010] ufshcd-qcom ufshc: unfreeze it
[  281.862187] ufshcd-qcom ufshc: queue unforzen
/sys/devices/platform/soc/ufshc #
Bart Van Assche Nov. 26, 2019, 1:05 a.m. UTC | #2
On 11/24/19 11:38 PM, cang@codeaurora.org wrote:
> Sorry to bring back the old discussion we had on V5 of this series here.
> I spent some time understanding the mq freeze queue implementation but I
> still have the same concern regards the your new implementation of clock
> scaling prepare/unprepare.
> 
> I attached two logs I collected on my setups(the kernel is 5.4).
> 
> In log1.txt, I pretty much ported your changes to our code base and copy
> a large file to partition /dev/sde1(mounted to /mnt). You can see the
> debug logs I added show that the -ETIMEDOUT returned from ufshcd_devfreq_scale(),
> although scaling UP/DOWN succeed some times.
> 
> To make it more controllable, I made a new sysfs entry, namely freeze_sde_queue,
> so that I can freeze the queue of /dev/sde whenever I do "echo 1 > freeze_sde_queue".
> After call blk_freeze_queue_start(), call blk_mq_freeze_queue_wait_timeout() to wait for 1 sec.
> In the end, blk_mq_unfreeze_queue() would be called no matter timeout happened or not.
> 
> I also added a flag, called enable_print, to request_queue, and set it after I call
> blk_freeze_queue_start(). Block layer, when this flag is set, will print logs from
> blk_mq_start_request() and blk_mq_get_request().
> 
> I tried multiple times during copy large a file to partition /dev/sde1(mounted to /mnt).
> 
> In log2.txt, there are still wait timeout, and you can see after blk_freeze_queue_start()
> is called for request queue of /dev/sde, there still can be requests sent down to UFS driver
> and these requests are the "latency" that I mentioned in our previous discussion. I know
> these requests are not "new" requests reach block layer after freeze starts,  but the ones
> in the queue before freeze starts. However, they are the difference from the old implementation.
> When scaling up should happen, these requests are still sent through the lowest HS Gear,
> while when scaling down should happen, these requests are sent through the highest HS Gear.
> The former may cause performance issue while the later may cause power penalty, this is our
> major concern.

Hi Can,

Thank you for having taken the time to run these tests and to share the output
of these tests. If I understood you correctly the patch I posted works but
introduces a performance regression, namely that it takes longer to suspend
request processing for an UFS host. How about replacing patch 4/4 with the
patch below?

Thanks,

Bart.

Subject: [PATCH] ufs: Make clock scaling more robust

Scaling the clock is only safe while no commands are in progress. The
current clock scaling implementation uses hba->clk_scaling_lock to
serialize clock scaling against the following three functions:
* ufshcd_queuecommand()          (uses sdev->request_queue)
* ufshcd_exec_dev_cmd()          (uses hba->cmd_queue)
* ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue)

__ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not yet serialized
against clock scaling. Disallowing that TMFs are submitted as follows from
user space while clock scaling is in progress:

submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue
-> ufs_bsg_request()
   -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ)
     -> ufshcd_exec_raw_upiu_cmd()
       -> __ufshcd_issue_tm_cmd()

Cc: Can Guo <cang@codeaurora.org>
Cc: Bean Huo <beanhuo@micron.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
  drivers/scsi/ufs/ufshcd.c | 36 +++++++++++++++++++++++++-----------
  1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 99337e0b54f6..631c588f279a 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -972,14 +972,13 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
  }

  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
+					unsigned long deadline)
  {
  	unsigned long flags;
  	int ret = 0;
  	u32 tm_doorbell;
  	u32 tr_doorbell;
  	bool timeout = false, do_last_check = false;
-	ktime_t start;

  	ufshcd_hold(hba, false);
  	spin_lock_irqsave(hba->host->host_lock, flags);
@@ -987,7 +986,6 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
  	 * Wait for all the outstanding tasks/transfer requests.
  	 * Verify by checking the doorbell registers are clear.
  	 */
-	start = ktime_get();
  	do {
  		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
  			ret = -EBUSY;
@@ -1005,8 +1003,7 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,

  		spin_unlock_irqrestore(hba->host->host_lock, flags);
  		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
+		if (time_after(jiffies, deadline)) {
  			timeout = true;
  			/*
  			 * We might have scheduled out for long time so make
@@ -1079,26 +1076,43 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)

  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
  {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
+	unsigned long deadline = jiffies + HZ /* 1 sec */;
  	int ret = 0;
  	/*
  	 * make sure that there are no outstanding requests when
  	 * clock scaling is in progress
  	 */
  	ufshcd_scsi_block_requests(hba);
+	blk_freeze_queue_start(hba->cmd_queue);
+	blk_freeze_queue_start(hba->tmf_queue);
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
+				max_t(long, 1, deadline - jiffies)) <= 0)
+		goto unblock;
+	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
+				max_t(long, 1, deadline - jiffies)) <= 0)
+		goto unblock;
  	down_write(&hba->clk_scaling_lock);
-	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
-	}
+	if (ufshcd_wait_for_doorbell_clr(hba, deadline))
+		goto up_write;

+out:
  	return ret;
+
+up_write:
+	up_write(&hba->clk_scaling_lock);
+unblock:
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	ufshcd_scsi_unblock_requests(hba);
+	ret = -EBUSY;
+	goto out;
  }

  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
  {
  	up_write(&hba->clk_scaling_lock);
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
  	ufshcd_scsi_unblock_requests(hba);
  }
Douglas Gilbert Nov. 26, 2019, 5 a.m. UTC | #3
On 2019-11-26 12:05 p.m., Bart Van Assche wrote:
> On 11/24/19 11:38 PM, cang@codeaurora.org wrote:
>> Sorry to bring back the old discussion we had on V5 of this series here.
>> I spent some time understanding the mq freeze queue implementation but I
>> still have the same concern regards the your new implementation of clock
>> scaling prepare/unprepare.
>>
>> I attached two logs I collected on my setups(the kernel is 5.4).
>>
>> In log1.txt, I pretty much ported your changes to our code base and copy
>> a large file to partition /dev/sde1(mounted to /mnt). You can see the
>> debug logs I added show that the -ETIMEDOUT returned from ufshcd_devfreq_scale(),
>> although scaling UP/DOWN succeed some times.
>>
>> To make it more controllable, I made a new sysfs entry, namely freeze_sde_queue,
>> so that I can freeze the queue of /dev/sde whenever I do "echo 1 > 
>> freeze_sde_queue".
>> After call blk_freeze_queue_start(), call blk_mq_freeze_queue_wait_timeout() 
>> to wait for 1 sec.
>> In the end, blk_mq_unfreeze_queue() would be called no matter timeout happened 
>> or not.
>>
>> I also added a flag, called enable_print, to request_queue, and set it after I 
>> call
>> blk_freeze_queue_start(). Block layer, when this flag is set, will print logs 
>> from
>> blk_mq_start_request() and blk_mq_get_request().
>>
>> I tried multiple times during copy large a file to partition /dev/sde1(mounted 
>> to /mnt).
>>
>> In log2.txt, there are still wait timeout, and you can see after 
>> blk_freeze_queue_start()
>> is called for request queue of /dev/sde, there still can be requests sent down 
>> to UFS driver
>> and these requests are the "latency" that I mentioned in our previous 
>> discussion. I know
>> these requests are not "new" requests reach block layer after freeze starts,  
>> but the ones
>> in the queue before freeze starts. However, they are the difference from the 
>> old implementation.
>> When scaling up should happen, these requests are still sent through the 
>> lowest HS Gear,
>> while when scaling down should happen, these requests are sent through the 
>> highest HS Gear.
>> The former may cause performance issue while the later may cause power 
>> penalty, this is our
>> major concern.
> 
> Hi Can,
> 
> Thank you for having taken the time to run these tests and to share the output
> of these tests. If I understood you correctly the patch I posted works but
> introduces a performance regression, namely that it takes longer to suspend
> request processing for an UFS host. How about replacing patch 4/4 with the
> patch below?
> 
> Thanks,
> 
> Bart.
> 
> Subject: [PATCH] ufs: Make clock scaling more robust
> 
> Scaling the clock is only safe while no commands are in progress. The
> current clock scaling implementation uses hba->clk_scaling_lock to
> serialize clock scaling against the following three functions:
> * ufshcd_queuecommand()          (uses sdev->request_queue)
> * ufshcd_exec_dev_cmd()          (uses hba->cmd_queue)
> * ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue)
> 
> __ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not yet serialized
> against clock scaling. Disallowing that TMFs are submitted as follows from
> user space while clock scaling is in progress:
> 
> submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue
> -> ufs_bsg_request()
>    -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ)
>      -> ufshcd_exec_raw_upiu_cmd()
>        -> __ufshcd_issue_tm_cmd()
> 
> Cc: Can Guo <cang@codeaurora.org>
> Cc: Bean Huo <beanhuo@micron.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/scsi/ufs/ufshcd.c | 36 +++++++++++++++++++++++++-----------
>   1 file changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 99337e0b54f6..631c588f279a 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -972,14 +972,13 @@ static bool ufshcd_is_devfreq_scaling_required(struct 
> ufs_hba *hba,
>   }
> 
>   static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -                    u64 wait_timeout_us)
> +                    unsigned long deadline)
>   {
>       unsigned long flags;
>       int ret = 0;
>       u32 tm_doorbell;
>       u32 tr_doorbell;
>       bool timeout = false, do_last_check = false;
> -    ktime_t start;
> 
>       ufshcd_hold(hba, false);
>       spin_lock_irqsave(hba->host->host_lock, flags);
> @@ -987,7 +986,6 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
>        * Wait for all the outstanding tasks/transfer requests.
>        * Verify by checking the doorbell registers are clear.
>        */
> -    start = ktime_get();

Rather than throw away the high precision clock and resort to jiffies, perhaps
you might try the middle road. For example: ktime_get_coarse().

Doug Gilbert
Bart Van Assche Nov. 26, 2019, 5:16 a.m. UTC | #4
On 2019-11-25 21:00, Douglas Gilbert wrote:
> On 2019-11-26 12:05 p.m., Bart Van Assche wrote:
>> -    start = ktime_get();
> 
> Rather than throw away the high precision clock and resort to jiffies,
> perhaps
> you might try the middle road. For example: ktime_get_coarse().

Hi Doug,

Since HZ >= 100, I think that 1/HZ is more than accurate enough for a
one second timeout.

Bart.
Douglas Gilbert Nov. 26, 2019, 10:09 a.m. UTC | #5
On 2019-11-26 4:16 p.m., Bart Van Assche wrote:
> On 2019-11-25 21:00, Douglas Gilbert wrote:
>> On 2019-11-26 12:05 p.m., Bart Van Assche wrote:
>>> -    start = ktime_get();
>>
>> Rather than throw away the high precision clock and resort to jiffies,
>> perhaps
>> you might try the middle road. For example: ktime_get_coarse().
> 
> Hi Doug,
> 
> Since HZ >= 100, I think that 1/HZ is more than accurate enough for a
> one second timeout.

For command timeouts, yes.

I was thinking more about instrumenting a LLD to time individual SCSI commands
or get accurate averages over many similar commands (e.g. READs and WRITEs).
Those times could then be compared to similar measurements done from the
user space via the sd or sg driver. The sg driver times commands starting
from each blk_execute_rq_nowait() call to its corresponding callback. In the
current sg driver the unit of that timing is jiffies which is too coarse to
be useful, especially with flash based or faster storage. Hence the sg driver
rewrite uses ktime_get() and friends. So far I haven't seen the need to use
ktime_get_coarse() but one reviewer suggested it.

Doug Gilbert
Can Guo Dec. 2, 2019, 5:39 a.m. UTC | #6
On 2019-11-26 09:05, Bart Van Assche wrote:
> On 11/24/19 11:38 PM, cang@codeaurora.org wrote:
>> Sorry to bring back the old discussion we had on V5 of this series 
>> here.
>> I spent some time understanding the mq freeze queue implementation but 
>> I
>> still have the same concern regards the your new implementation of 
>> clock
>> scaling prepare/unprepare.
>> 
>> I attached two logs I collected on my setups(the kernel is 5.4).
>> 
>> In log1.txt, I pretty much ported your changes to our code base and 
>> copy
>> a large file to partition /dev/sde1(mounted to /mnt). You can see the
>> debug logs I added show that the -ETIMEDOUT returned from 
>> ufshcd_devfreq_scale(),
>> although scaling UP/DOWN succeed some times.
>> 
>> To make it more controllable, I made a new sysfs entry, namely 
>> freeze_sde_queue,
>> so that I can freeze the queue of /dev/sde whenever I do "echo 1 > 
>> freeze_sde_queue".
>> After call blk_freeze_queue_start(), call 
>> blk_mq_freeze_queue_wait_timeout() to wait for 1 sec.
>> In the end, blk_mq_unfreeze_queue() would be called no matter timeout 
>> happened or not.
>> 
>> I also added a flag, called enable_print, to request_queue, and set it 
>> after I call
>> blk_freeze_queue_start(). Block layer, when this flag is set, will 
>> print logs from
>> blk_mq_start_request() and blk_mq_get_request().
>> 
>> I tried multiple times during copy large a file to partition 
>> /dev/sde1(mounted to /mnt).
>> 
>> In log2.txt, there are still wait timeout, and you can see after 
>> blk_freeze_queue_start()
>> is called for request queue of /dev/sde, there still can be requests 
>> sent down to UFS driver
>> and these requests are the "latency" that I mentioned in our previous 
>> discussion. I know
>> these requests are not "new" requests reach block layer after freeze 
>> starts,  but the ones
>> in the queue before freeze starts. However, they are the difference 
>> from the old implementation.
>> When scaling up should happen, these requests are still sent through 
>> the lowest HS Gear,
>> while when scaling down should happen, these requests are sent through 
>> the highest HS Gear.
>> The former may cause performance issue while the later may cause power 
>> penalty, this is our
>> major concern.
> 
> Hi Can,
> 
> Thank you for having taken the time to run these tests and to share the 
> output
> of these tests. If I understood you correctly the patch I posted works 
> but
> introduces a performance regression, namely that it takes longer to 
> suspend
> request processing for an UFS host. How about replacing patch 4/4 with 
> the
> patch below?
> 
> Thanks,
> 
> Bart.
> 
> Subject: [PATCH] ufs: Make clock scaling more robust
> 
> Scaling the clock is only safe while no commands are in progress. The
> current clock scaling implementation uses hba->clk_scaling_lock to
> serialize clock scaling against the following three functions:
> * ufshcd_queuecommand()          (uses sdev->request_queue)
> * ufshcd_exec_dev_cmd()          (uses hba->cmd_queue)
> * ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue)
> 
> __ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not yet 
> serialized
> against clock scaling. Disallowing that TMFs are submitted as follows 
> from
> user space while clock scaling is in progress:
> 
> submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue
> -> ufs_bsg_request()
>   -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ)
>     -> ufshcd_exec_raw_upiu_cmd()
>       -> __ufshcd_issue_tm_cmd()
> 
> Cc: Can Guo <cang@codeaurora.org>
> Cc: Bean Huo <beanhuo@micron.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 36 +++++++++++++++++++++++++-----------
>  1 file changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 99337e0b54f6..631c588f279a 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -972,14 +972,13 @@ static bool
> ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>  }
> 
>  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -					u64 wait_timeout_us)
> +					unsigned long deadline)
>  {
>  	unsigned long flags;
>  	int ret = 0;
>  	u32 tm_doorbell;
>  	u32 tr_doorbell;
>  	bool timeout = false, do_last_check = false;
> -	ktime_t start;
> 
>  	ufshcd_hold(hba, false);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> @@ -987,7 +986,6 @@ static int ufshcd_wait_for_doorbell_clr(struct 
> ufs_hba *hba,
>  	 * Wait for all the outstanding tasks/transfer requests.
>  	 * Verify by checking the doorbell registers are clear.
>  	 */
> -	start = ktime_get();
>  	do {
>  		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
>  			ret = -EBUSY;
> @@ -1005,8 +1003,7 @@ static int ufshcd_wait_for_doorbell_clr(struct
> ufs_hba *hba,
> 
>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
>  		schedule();
> -		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> -		    wait_timeout_us) {
> +		if (time_after(jiffies, deadline)) {
>  			timeout = true;
>  			/*
>  			 * We might have scheduled out for long time so make
> @@ -1079,26 +1076,43 @@ static int ufshcd_scale_gear(struct ufs_hba
> *hba, bool scale_up)
> 
>  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
>  {
> -	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
> +	unsigned long deadline = jiffies + HZ /* 1 sec */;
>  	int ret = 0;
>  	/*
>  	 * make sure that there are no outstanding requests when
>  	 * clock scaling is in progress
>  	 */
>  	ufshcd_scsi_block_requests(hba);
> +	blk_freeze_queue_start(hba->cmd_queue);
> +	blk_freeze_queue_start(hba->tmf_queue);
> +	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
> +				max_t(long, 1, deadline - jiffies)) <= 0)
> +		goto unblock;
> +	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
> +				max_t(long, 1, deadline - jiffies)) <= 0)
> +		goto unblock;
>  	down_write(&hba->clk_scaling_lock);

Hi Bart,

It looks better, but there is a race condition here. Consider below 
sequence,
thread A takes the clk_scaling_lock and waiting on blk_get_request(), 
while
thread B has frozen the queue and waiting for the lock.

Thread A
     Call ufshcd_exec_dev_cmd() or ufshcd_issue_devman_upiu_cmd()
     [a] down_write(hba->clk_scaling_lock)
     [d] blk_get_request()

Thread B
     Call ufshcd_clock_scaling_prepare()
     [b] blk_freeze_queue_start(hba->cmd_queue)
     [c] blk_mq_freeze_queue_wait_timeout(hba->cmd_queue) returns > 0
     [e] down_write(hba->clk_scaling_lock)

BTW, I see no needs to freeze the hba->cmd_queue in scaling_prepare.
I went through our previous discussions and you mentioned freezing 
hba->cmd_queue
can serialize scaling and err handler.
However, it is not necessary and not 100% true. We've already have 
ufshcd_eh_in_progress()
check in ufshcd_devfreq_target() before call ufshcd_devfreq_scale().
If you think this is not enough(err handler may kick off after this 
check), having
hba->cmd_queue frozen in scaling_prepare() does not mean the err handler 
is finished either,
because device management commands are only used in certain steps during 
err handler.
Actually, with the original design, we don't see any problems caused by
concurrency of scaling and err handler, and if the concurrency really 
happens,
scaling would just fail.

Thanks,

Can Guo.

> -	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
> -		ret = -EBUSY;
> -		up_write(&hba->clk_scaling_lock);
> -		ufshcd_scsi_unblock_requests(hba);
> -	}
> +	if (ufshcd_wait_for_doorbell_clr(hba, deadline))
> +		goto up_write;
> 
> +out:
>  	return ret;
> +
> +up_write:
> +	up_write(&hba->clk_scaling_lock);
> +unblock:
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
> +	ufshcd_scsi_unblock_requests(hba);
> +	ret = -EBUSY;
> +	goto out;
>  }
> 
>  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
>  {
>  	up_write(&hba->clk_scaling_lock);
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
>  	ufshcd_scsi_unblock_requests(hba);
>  }
Bart Van Assche Dec. 3, 2019, 4:16 p.m. UTC | #7
On 12/1/19 9:39 PM, cang@codeaurora.org wrote:
> On 2019-11-26 09:05, Bart Van Assche wrote:
>>  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
>>  {
>> -    #define DOORBELL_CLR_TOUT_US        (1000 * 1000) /* 1 sec */
>> +    unsigned long deadline = jiffies + HZ /* 1 sec */;
>>      int ret = 0;
>>      /*
>>       * make sure that there are no outstanding requests when
>>       * clock scaling is in progress
>>       */
>>      ufshcd_scsi_block_requests(hba);
>> +    blk_freeze_queue_start(hba->cmd_queue);
>> +    blk_freeze_queue_start(hba->tmf_queue);
>> +    if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
>> +                max_t(long, 1, deadline - jiffies)) <= 0)
>> +        goto unblock;
>> +    if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
>> +                max_t(long, 1, deadline - jiffies)) <= 0)
>> +        goto unblock;
>>      down_write(&hba->clk_scaling_lock);
> 
> Hi Bart,
> 
> It looks better, but there is a race condition here. Consider below 
> sequence,
> thread A takes the clk_scaling_lock and waiting on blk_get_request(), while
> thread B has frozen the queue and waiting for the lock.
> 
> Thread A
>      Call ufshcd_exec_dev_cmd() or ufshcd_issue_devman_upiu_cmd()
>      [a] down_write(hba->clk_scaling_lock)
>      [d] blk_get_request()
> 
> Thread B
>      Call ufshcd_clock_scaling_prepare()
>      [b] blk_freeze_queue_start(hba->cmd_queue)
>      [c] blk_mq_freeze_queue_wait_timeout(hba->cmd_queue) returns > 0
>      [e] down_write(hba->clk_scaling_lock)
> 
> BTW, I see no needs to freeze the hba->cmd_queue in scaling_prepare.
> I went through our previous discussions and you mentioned freezing 
> hba->cmd_queue
> can serialize scaling and err handler.
> However, it is not necessary and not 100% true. We've already have 
> ufshcd_eh_in_progress()
> check in ufshcd_devfreq_target() before call ufshcd_devfreq_scale().
> If you think this is not enough(err handler may kick off after this 
> check), having
> hba->cmd_queue frozen in scaling_prepare() does not mean the err handler 
> is finished either,
> because device management commands are only used in certain steps during 
> err handler.
> Actually, with the original design, we don't see any problems caused by
> concurrency of scaling and err handler, and if the concurrency really 
> happens,
> scaling would just fail.

Hi Can,

That's a good catch. When I repost this patch series I will leave out 
the freeze and unfreeze operations for hba->cmd_queue.

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 99337e0b54f6..472d164f6ae6 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -971,65 +971,6 @@  static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
 	return false;
 }
 
-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	unsigned long flags;
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_doorbell;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
-		ret = -EBUSY;
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_release(hba);
-	return ret;
-}
-
 /**
  * ufshcd_scale_gear - scale up/down UFS gear
  * @hba: per adapter instance
@@ -1079,27 +1020,54 @@  static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
 
 static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
-	int ret = 0;
+	unsigned long deadline = jiffies + HZ;
+	struct scsi_device *sdev;
+	int res = 0;
+
 	/*
-	 * make sure that there are no outstanding requests when
-	 * clock scaling is in progress
+	 * Make sure that no requests are outstanding while clock scaling is in
+	 * progress. If SCSI error recovery would start while this function is
+	 * in progress then
+	 * blk_mq_freeze_queue_wait_timeout(sdev->request_queue) will either
+	 * wait until error handling has finished or will time out.
 	 */
-	ufshcd_scsi_block_requests(hba);
-	down_write(&hba->clk_scaling_lock);
-	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	blk_freeze_queue_start(hba->cmd_queue);
+	blk_freeze_queue_start(hba->tmf_queue);
+	shost_for_each_device(sdev, hba->host) {
+		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0) {
+			goto err;
+		}
 	}
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
 
-	return ret;
+out:
+	return res;
+
+err:
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
+	res = -ETIMEDOUT;
+	goto out;
 }
 
 static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
 {
-	up_write(&hba->clk_scaling_lock);
-	ufshcd_scsi_unblock_requests(hba);
+	struct scsi_device *sdev;
+
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
 }
 
 /**
@@ -2394,9 +2362,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		BUG();
 	}
 
-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	switch (hba->ufshcd_state) {
 	case UFSHCD_STATE_OPERATIONAL:
@@ -2462,7 +2427,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -2616,8 +2580,6 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	struct completion wait;
 	unsigned long flags;
 
-	down_read(&hba->clk_scaling_lock);
-
 	/*
 	 * Get free slot, sleep if slots are unavailable.
 	 * Even though we use wait_event() which sleeps indefinitely,
@@ -2653,7 +2615,6 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
 out_put_tag:
 	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -5845,8 +5806,6 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	unsigned long flags;
 	u32 upiu_flags;
 
-	down_read(&hba->clk_scaling_lock);
-
 	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -5928,7 +5887,6 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	}
 
 	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -8397,8 +8355,6 @@  int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	/* Initialize mutex for device management commands */
 	mutex_init(&hba->dev_cmd.lock);
 
-	init_rwsem(&hba->clk_scaling_lock);
-
 	ufshcd_init_clk_gating(hba);
 
 	ufshcd_init_clk_scaling(hba);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 53bfe175342c..bbdef1153257 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -724,7 +724,6 @@  struct ufs_hba {
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
 
-	struct rw_semaphore clk_scaling_lock;
 	struct ufs_desc_size desc_size;
 	atomic_t scsi_block_reqs_cnt;