diff mbox series

[v2,18/20] scsi: ufs: Optimize the command queueing code

Message ID 20211119195743.2817-19-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series UFS patches for kernel v5.17 | expand

Commit Message

Bart Van Assche Nov. 19, 2021, 7:57 p.m. UTC
Remove the clock scaling lock from ufshcd_queuecommand() since it is a
performance bottleneck. Freeze request queues instead of polling the
doorbell registers to wait until pending commands have completed.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 124 +++++++++++++-------------------------
 drivers/scsi/ufs/ufshcd.h |   1 +
 2 files changed, 44 insertions(+), 81 deletions(-)

Comments

Asutosh Das (asd) Nov. 22, 2021, 5:46 p.m. UTC | #1
On 11/19/2021 11:57 AM, Bart Van Assche wrote:
> Remove the clock scaling lock from ufshcd_queuecommand() since it is a
> performance bottleneck. Freeze request queues instead of polling the
> doorbell registers to wait until pending commands have completed.
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/scsi/ufs/ufshcd.c | 124 +++++++++++++-------------------------
>   drivers/scsi/ufs/ufshcd.h |   1 +
>   2 files changed, 44 insertions(+), 81 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index a6d3f71c6b00..9cf4a22f1950 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1070,65 +1070,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>   	return false;
>   }
>   
[...]
>   /**
>    * ufshcd_scale_gear - scale up/down UFS gear
>    * @hba: per adapter instance
> @@ -1176,37 +1117,63 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
>   
>   static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
>   {
> -	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
> -	int ret = 0;
> +	struct scsi_device *sdev;
> +
>   	/*
> -	 * make sure that there are no outstanding requests when
> -	 * clock scaling is in progress
> +	 * Make sure that no commands are in progress while the clock frequency
> +	 * is being modified.
> +	 *
> +	 * Since ufshcd_exec_dev_cmd() and ufshcd_issue_devman_upiu_cmd() lock
> +	 * the clk_scaling_lock before calling blk_get_request(), lock
> +	 * clk_scaling_lock before freezing the request queues to prevent lock
> +	 * inversion.
>   	 */
> -	ufshcd_scsi_block_requests(hba);
>   	down_write(&hba->clk_scaling_lock);
> -
> -	if (!hba->clk_scaling.is_allowed ||
> -	    ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
> -		ret = -EBUSY;
> -		up_write(&hba->clk_scaling_lock);
> -		ufshcd_scsi_unblock_requests(hba);
> -		goto out;
> -	}
> -
> +	if (!hba->clk_scaling.is_allowed)
> +		goto busy;
> +	blk_freeze_queue_start(hba->tmf_queue);
> +	blk_freeze_queue_start(hba->cmd_queue);
> +	shost_for_each_device(sdev, hba->host)
> +		blk_freeze_queue_start(sdev->request_queue);
This would still issue the requests present in the queue before freezing 
and that's a concern.
> +	/*
> +	 * Calling synchronize_rcu_expedited() reduces the time required to
> +	 * freeze request queues from milliseconds to microseconds.
> +	 */
> +	synchronize_rcu_expedited();
> +	shost_for_each_device(sdev, hba->host)
> +		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, HZ)
> +		    <= 0)
> +			goto unfreeze;
> +	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, HZ) <= 0 ||
> +	    blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, HZ / 10) <= 0)
> +		goto unfreeze;
>   	/* let's not get into low power until clock scaling is completed */
>   	ufshcd_hold(hba, false);
> +	return 0;
>   
> -out:
> -	return ret;
> +unfreeze:
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
> +
> +busy:
> +	up_write(&hba->clk_scaling_lock);
> +	return -EBUSY;
>   }
>   
>   static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
>   {
> +	struct scsi_device *sdev;
> +
> +	shost_for_each_device(sdev, hba->host)
> +		blk_mq_unfreeze_queue(sdev->request_queue);
> +	blk_mq_unfreeze_queue(hba->cmd_queue);
> +	blk_mq_unfreeze_queue(hba->tmf_queue);
>   	if (writelock)
>   		up_write(&hba->clk_scaling_lock);
>   	else
>   		up_read(&hba->clk_scaling_lock);
> -	ufshcd_scsi_unblock_requests(hba);
>   	ufshcd_release(hba);
>   }
>   
> @@ -2699,9 +2666,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>   
>   	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
>   
> -	if (!down_read_trylock(&hba->clk_scaling_lock))
> -		return SCSI_MLQUEUE_HOST_BUSY;
> -
>   	/*
>   	 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
>   	 * calls.
> @@ -2790,8 +2754,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>   out:
>   	rcu_read_unlock();
>   
> -	up_read(&hba->clk_scaling_lock);
> -
>   	if (ufs_trigger_eh()) {
>   		unsigned long flags;
>   
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index e9bc07c69a80..7ec463c97d64 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -778,6 +778,7 @@ struct ufs_hba_monitor {
>    * @clk_list_head: UFS host controller clocks list node head
>    * @pwr_info: holds current power mode
>    * @max_pwr_info: keeps the device max valid pwm
> + * @clk_scaling_lock: used to serialize device commands and clock scaling
>    * @desc_size: descriptor sizes reported by device
>    * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>    * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
>
Bart Van Assche Nov. 22, 2021, 6:13 p.m. UTC | #2
On 11/22/21 9:46 AM, Asutosh Das (asd) wrote:
> On 11/19/2021 11:57 AM, Bart Van Assche wrote:
>> +    blk_freeze_queue_start(hba->tmf_queue);
>> +    blk_freeze_queue_start(hba->cmd_queue);
>> +    shost_for_each_device(sdev, hba->host)
>> +        blk_freeze_queue_start(sdev->request_queue);
>
> This would still issue the requests present in the queue before freezing 
> and that's a concern.

Isn't that exactly what the existing code is doing since the existing 
code waits until both doorbell registers are zero? See also 
ufshcd_wait_for_doorbell_clr().

Thanks,

Bart.
Asutosh Das (asd) Nov. 22, 2021, 11:02 p.m. UTC | #3
On 11/22/2021 10:13 AM, Bart Van Assche wrote:
> On 11/22/21 9:46 AM, Asutosh Das (asd) wrote:
>> On 11/19/2021 11:57 AM, Bart Van Assche wrote:
>>> +    blk_freeze_queue_start(hba->tmf_queue);
>>> +    blk_freeze_queue_start(hba->cmd_queue);
>>> +    shost_for_each_device(sdev, hba->host)
>>> +        blk_freeze_queue_start(sdev->request_queue);
>>
>> This would still issue the requests present in the queue before 
>> freezing and that's a concern.
> 
> Isn't that exactly what the existing code is doing since the existing 
> code waits until both doorbell registers are zero? See also 
> ufshcd_wait_for_doorbell_clr().
> 
> Thanks,
> 
> Bart.
Current code waits for the already issued requests to complete. It 
doesn't issue the yet-to-be issued requests. Wouldn't freezing the queue 
issue the requests in the context of scaling_{up/down}?
If yes, I don't think the current code is doing that.

-asd
Bart Van Assche Nov. 22, 2021, 11:48 p.m. UTC | #4
On 11/22/21 3:02 PM, Asutosh Das (asd) wrote:
> Current code waits for the already issued requests to complete. It 
> doesn't issue the yet-to-be issued requests. Wouldn't freezing the queue 
> issue the requests in the context of scaling_{up/down}?
> If yes, I don't think the current code is doing that.

Hi Asutosh,

How about the patch below that preserves most of the existing code for
preparing for clock scaling?

Thanks,

Bart.


Subject: [PATCH] scsi: ufs: Optimize the command queueing code

Remove the clock scaling lock from ufshcd_queuecommand() since it is a
performance bottleneck. Instead, use synchronize_rcu_expedited() to wait
for ongoing ufshcd_queuecommand() calls.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
  drivers/scsi/ufs/ufshcd.h |  1 +
  2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5d214456bf82..1d929c28efaf 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1196,6 +1196,13 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
  	/* let's not get into low power until clock scaling is completed */
  	ufshcd_hold(hba, false);

+	/*
+	 * Wait for ongoing ufshcd_queuecommand() calls. Calling
+	 * synchronize_rcu_expedited() instead of synchronize_rcu() reduces the
+	 * waiting time from milliseconds to microseconds.
+	 */
+	synchronize_rcu_expedited();
+
  out:
  	return ret;
  }
@@ -2699,9 +2706,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)

  	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);

-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
  	/*
  	 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
  	 * calls.
@@ -2790,8 +2794,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
  out:
  	rcu_read_unlock();

-	up_read(&hba->clk_scaling_lock);
-
  	if (ufs_trigger_eh()) {
  		unsigned long flags;

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index c13ae56fbff8..695bede14dac 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -777,6 +777,7 @@ struct ufs_hba_monitor {
   * @clk_list_head: UFS host controller clocks list node head
   * @pwr_info: holds current power mode
   * @max_pwr_info: keeps the device max valid pwm
+ * @clk_scaling_lock: used to serialize device commands and clock scaling
   * @desc_size: descriptor sizes reported by device
   * @urgent_bkops_lvl: keeps track of urgent bkops level for device
   * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
Asutosh Das (asd) Nov. 23, 2021, 6:24 p.m. UTC | #5
On 11/22/2021 3:48 PM, Bart Van Assche wrote:
> On 11/22/21 3:02 PM, Asutosh Das (asd) wrote:
>> Current code waits for the already issued requests to complete. It 
>> doesn't issue the yet-to-be issued requests. Wouldn't freezing the 
>> queue issue the requests in the context of scaling_{up/down}?
>> If yes, I don't think the current code is doing that.
> 
> Hi Asutosh,
> 
> How about the patch below that preserves most of the existing code for
> preparing for clock scaling?
> 
> Thanks,
> 
> Bart.
> 
Hi Bart,
This looks good to me. Please push a change and I can test it out.

-asd

> 
> Subject: [PATCH] scsi: ufs: Optimize the command queueing code
> 
> Remove the clock scaling lock from ufshcd_queuecommand() since it is a
> performance bottleneck. Instead, use synchronize_rcu_expedited() to wait
> for ongoing ufshcd_queuecommand() calls.
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>   drivers/scsi/ufs/ufshcd.h |  1 +
>   2 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 5d214456bf82..1d929c28efaf 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1196,6 +1196,13 @@ static int ufshcd_clock_scaling_prepare(struct 
> ufs_hba *hba)
>       /* let's not get into low power until clock scaling is completed */
>       ufshcd_hold(hba, false);
> 
> +    /*
> +     * Wait for ongoing ufshcd_queuecommand() calls. Calling
> +     * synchronize_rcu_expedited() instead of synchronize_rcu() reduces 
> the
> +     * waiting time from milliseconds to microseconds.
> +     */
> +    synchronize_rcu_expedited();
> +
>   out:
>       return ret;
>   }
> @@ -2699,9 +2706,6 @@ static int ufshcd_queuecommand(struct Scsi_Host 
> *host, struct scsi_cmnd *cmd)
> 
>       WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> 
> -    if (!down_read_trylock(&hba->clk_scaling_lock))
> -        return SCSI_MLQUEUE_HOST_BUSY;
> -
>       /*
>        * Allows the UFS error handler to wait for prior 
> ufshcd_queuecommand()
>        * calls.
> @@ -2790,8 +2794,6 @@ static int ufshcd_queuecommand(struct Scsi_Host 
> *host, struct scsi_cmnd *cmd)
>   out:
>       rcu_read_unlock();
> 
> -    up_read(&hba->clk_scaling_lock);
> -
>       if (ufs_trigger_eh()) {
>           unsigned long flags;
> 
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index c13ae56fbff8..695bede14dac 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -777,6 +777,7 @@ struct ufs_hba_monitor {
>    * @clk_list_head: UFS host controller clocks list node head
>    * @pwr_info: holds current power mode
>    * @max_pwr_info: keeps the device max valid pwm
> + * @clk_scaling_lock: used to serialize device commands and clock scaling
>    * @desc_size: descriptor sizes reported by device
>    * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>    * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level 
> for
Bart Van Assche Dec. 1, 2021, 6:33 p.m. UTC | #6
On 11/23/21 10:24 AM, Asutosh Das (asd) wrote:
> This looks good to me. Please push a change and I can test it out.

Thanks for having taken a look. v3 of this patch series is available at
https://github.com/bvanassche/linux/tree/ufs-for-next.

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a6d3f71c6b00..9cf4a22f1950 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1070,65 +1070,6 @@  static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
 	return false;
 }
 
-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	unsigned long flags;
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_doorbell;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
-		ret = -EBUSY;
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_release(hba);
-	return ret;
-}
-
 /**
  * ufshcd_scale_gear - scale up/down UFS gear
  * @hba: per adapter instance
@@ -1176,37 +1117,63 @@  static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
 
 static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
-	int ret = 0;
+	struct scsi_device *sdev;
+
 	/*
-	 * make sure that there are no outstanding requests when
-	 * clock scaling is in progress
+	 * Make sure that no commands are in progress while the clock frequency
+	 * is being modified.
+	 *
+	 * Since ufshcd_exec_dev_cmd() and ufshcd_issue_devman_upiu_cmd() lock
+	 * the clk_scaling_lock before calling blk_get_request(), lock
+	 * clk_scaling_lock before freezing the request queues to prevent lock
+	 * inversion.
 	 */
-	ufshcd_scsi_block_requests(hba);
 	down_write(&hba->clk_scaling_lock);
-
-	if (!hba->clk_scaling.is_allowed ||
-	    ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
-		goto out;
-	}
-
+	if (!hba->clk_scaling.is_allowed)
+		goto busy;
+	blk_freeze_queue_start(hba->tmf_queue);
+	blk_freeze_queue_start(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	/*
+	 * Calling synchronize_rcu_expedited() reduces the time required to
+	 * freeze request queues from milliseconds to microseconds.
+	 */
+	synchronize_rcu_expedited();
+	shost_for_each_device(sdev, hba->host)
+		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, HZ)
+		    <= 0)
+			goto unfreeze;
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, HZ) <= 0 ||
+	    blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, HZ / 10) <= 0)
+		goto unfreeze;
 	/* let's not get into low power until clock scaling is completed */
 	ufshcd_hold(hba, false);
+	return 0;
 
-out:
-	return ret;
+unfreeze:
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+
+busy:
+	up_write(&hba->clk_scaling_lock);
+	return -EBUSY;
 }
 
 static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
 {
+	struct scsi_device *sdev;
+
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	blk_mq_unfreeze_queue(hba->tmf_queue);
 	if (writelock)
 		up_write(&hba->clk_scaling_lock);
 	else
 		up_read(&hba->clk_scaling_lock);
-	ufshcd_scsi_unblock_requests(hba);
 	ufshcd_release(hba);
 }
 
@@ -2699,9 +2666,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
 	/*
 	 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
 	 * calls.
@@ -2790,8 +2754,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 out:
 	rcu_read_unlock();
 
-	up_read(&hba->clk_scaling_lock);
-
 	if (ufs_trigger_eh()) {
 		unsigned long flags;
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e9bc07c69a80..7ec463c97d64 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -778,6 +778,7 @@  struct ufs_hba_monitor {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @clk_scaling_lock: used to serialize device commands and clock scaling
  * @desc_size: descriptor sizes reported by device
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for