diff mbox series

[v9,17/17] scsi: ufs: Inform the block layer about write ordering

Message ID 20230816195447.3703954-18-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Improve performance for zoned UFS devices | expand

Commit Message

Bart Van Assche Aug. 16, 2023, 7:53 p.m. UTC
From the UFSHCI 4.0 specification, about the legacy (single queue) mode:
"The host controller always process transfer requests in-order according
to the order submitted to the list. In case of multiple commands with
single doorbell register ringing (batch mode), The dispatch order for
these transfer requests by host controller will base on their index in
the List. A transfer request with lower index value will be executed
before a transfer request with higher index value."

From the UFSHCI 4.0 specification, about the MCQ mode:
"Command Submission
1. Host SW writes an Entry to SQ
2. Host SW updates SQ doorbell tail pointer

Command Processing
3. After fetching the Entry, Host Controller updates SQ doorbell head
   pointer
4. Host controller sends COMMAND UPIU to UFS device"

In other words, for both legacy and MCQ mode, UFS controllers are
required to forward commands to the UFS device in the order these
commands have been received from the host.

Notes:
- For legacy mode this is only correct if the host submits one
  command at a time. The UFS driver does this.
- Also in legacy mode, the command order is not preserved if
  auto-hibernation is enabled in the UFS controller. Hence, enable
  zone write locking if auto-hibernation is enabled.

This patch improves performance as follows on my test setup:
- With the mq-deadline scheduler: 2.5x more IOPS for small writes.
- When not using an I/O scheduler compared to using mq-deadline with
  zone locking: 4x more IOPS for small writes.

Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Can Guo <quic_cang@quicinc.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/ufs/core/ufshcd.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

Comments

Bao D. Nguyen Aug. 17, 2023, 7 p.m. UTC | #1
On 8/16/2023 12:53 PM, Bart Van Assche wrote:
>  From the UFSHCI 4.0 specification, about the legacy (single queue) mode:
> "The host controller always process transfer requests in-order according
> to the order submitted to the list. In case of multiple commands with
> single doorbell register ringing (batch mode), The dispatch order for
> these transfer requests by host controller will base on their index in
> the List. A transfer request with lower index value will be executed
> before a transfer request with higher index value."
> 
>  From the UFSHCI 4.0 specification, about the MCQ mode:
> "Command Submission
> 1. Host SW writes an Entry to SQ
> 2. Host SW updates SQ doorbell tail pointer
> 
> Command Processing
> 3. After fetching the Entry, Host Controller updates SQ doorbell head
>     pointer
> 4. Host controller sends COMMAND UPIU to UFS device"
> 
> In other words, for both legacy and MCQ mode, UFS controllers are
> required to forward commands to the UFS device in the order these
> commands have been received from the host.
> 
> Notes:
> - For legacy mode this is only correct if the host submits one
>    command at a time. The UFS driver does this.
> - Also in legacy mode, the command order is not preserved if
>    auto-hibernation is enabled in the UFS controller. Hence, enable
>    zone write locking if auto-hibernation is enabled.
> 
> This patch improves performance as follows on my test setup:
> - With the mq-deadline scheduler: 2.5x more IOPS for small writes.
> - When not using an I/O scheduler compared to using mq-deadline with
>    zone locking: 4x more IOPS for small writes.
> 
> Cc: Martin K. Petersen <martin.petersen@oracle.com>
> Cc: Can Guo <quic_cang@quicinc.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>   drivers/ufs/core/ufshcd.c | 22 ++++++++++++++++++++--
>   1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 37d430d20939..7f5049a66cca 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -4356,6 +4356,19 @@ static int ufshcd_update_preserves_write_order(struct ufs_hba *hba,
>   				return -EPERM;
>   		}
>   	}
> +	shost_for_each_device(sdev, hba->host)
> +		blk_freeze_queue_start(sdev->request_queue);
> +	shost_for_each_device(sdev, hba->host) {
> +		struct request_queue *q = sdev->request_queue;
> +
> +		blk_mq_freeze_queue_wait(q);
> +		q->limits.driver_preserves_write_order = preserves_write_order;
> +		blk_queue_required_elevator_features(q,
> +			preserves_write_order ? 0 : ELEVATOR_F_ZBD_SEQ_WRITE);
> +		if (q->disk)
> +			disk_set_zoned(q->disk, q->limits.zoned);
> +		blk_mq_unfreeze_queue(q);
> +	}
>   
>   	return 0;
>   }
> @@ -4393,7 +4406,8 @@ int ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
>   
>   	if (!is_mcq_enabled(hba) && !prev_state && new_state) {
>   		/*
> -		 * Auto-hibernation will be enabled for legacy UFSHCI mode.
> +		 * Auto-hibernation will be enabled for legacy UFSHCI mode. Tell
> +		 * the block layer that write requests may be reordered.
>   		 */
>   		ret = ufshcd_update_preserves_write_order(hba, false);
>   		if (ret)
> @@ -4409,7 +4423,8 @@ int ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
>   	}
>   	if (!is_mcq_enabled(hba) && prev_state && !new_state) {
>   		/*
> -		 * Auto-hibernation has been disabled.
> +		 * Auto-hibernation has been disabled. Tell the block layer that
> +		 * the order of write requests is preserved.
>   		 */
>   		ret = ufshcd_update_preserves_write_order(hba, true);
>   		WARN_ON_ONCE(ret);
> @@ -5187,6 +5202,9 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
>   
>   	ufshcd_hpb_configure(hba, sdev);
>   
> +	q->limits.driver_preserves_write_order =
> +		!ufshcd_is_auto_hibern8_supported(hba) ||
> +		FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK, hba->ahit) == 0;
In legacy SDB mode with auto-hibernation enabled, the default setting 
for the driver_preserves_write_order = false.

Using the default setting, it may be missing this check that is part of 
the ufshcd_auto_hibern8_update()->ufshcd_update_preserves_write_order().

Auto-hibernation should not be enabled by default unless these 
conditions are met?
	if (blk_queue_is_zoned(q) && !q->elevator)
		return -EPERM;


>   	blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
>   	if (hba->quirks & UFSHCD_QUIRK_4KB_DMA_ALIGNMENT)
>   		blk_queue_update_dma_alignment(q, SZ_4K - 1);
Bart Van Assche Aug. 17, 2023, 7:34 p.m. UTC | #2
On 8/17/23 12:00, Bao D. Nguyen wrote:
> In legacy SDB mode with auto-hibernation enabled, the default setting for the
> driver_preserves_write_order = false.
 >
> Using the default setting, it may be missing this check that is part of the ufshcd_auto_hibern8_update()->ufshcd_update_preserves_write_order().

If auto-hibernation is enabled by the host driver, driver_preserves_write_order
is set by the following code in ufshcd_slave_configure():

	q->limits.driver_preserves_write_order =
		!ufshcd_is_auto_hibern8_supported(hba) ||
		FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK, hba->ahit) == 0;

Does this answer your question?

Thanks,

Bart.
Bao D. Nguyen Aug. 17, 2023, 9:47 p.m. UTC | #3
On 8/17/2023 12:34 PM, Bart Van Assche wrote:
> On 8/17/23 12:00, Bao D. Nguyen wrote:
>> In legacy SDB mode with auto-hibernation enabled, the default setting 
>> for the
>> driver_preserves_write_order = false.
>  >
>> Using the default setting, it may be missing this check that is part 
>> of the 
>> ufshcd_auto_hibern8_update()->ufshcd_update_preserves_write_order().
> 
> If auto-hibernation is enabled by the host driver, 
> driver_preserves_write_order
> is set by the following code in ufshcd_slave_configure():
> 
>      q->limits.driver_preserves_write_order =
>          !ufshcd_is_auto_hibern8_supported(hba) ||
>          FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK, hba->ahit) == 0;
> 
> Does this answer your question?
Hi Bart,
My concern is that in the ufshcd_update_preserves_write_order() you have 
this logic:

	if (!preserves_write_order) {
		shost_for_each_device(sdev, hba->host) {
			struct request_queue *q = sdev->request_queue;
			/*...*/
			if (blk_queue_is_zoned(q) && !q->elevator)
				return -EPERM;
		}
	}

The above logic is only invoked when ufshcd_auto_hibern8_update() is 
called. During initialization, ufshcd_auto_hibern8_update() is not 
called. Therefore, you may have SDB mode with auto hibernate enabled -> 
preserves_write_order = false, and (blk_queue_is_zoned(q) && 
!q->elevator) is true. Is that a problem? If you called 
ufshcd_auto_hibern8_update() during ufs probe, you would have returned 
-EPERM and not enable auto-hibernation in that case.

Thanks,
Bao


> 
> Thanks,
> 
> Bart.
Bart Van Assche Aug. 17, 2023, 10:05 p.m. UTC | #4
On 8/17/23 14:47, Bao D. Nguyen wrote:
> During initialization, ufshcd_auto_hibern8_update() is not called. Therefore,
> you may have SDB mode with auto hibernate enabled -> preserves_write_order = false, [ ... ]

Hi Bao,

ufshcd_slave_configure() is called before any SCSI commands are submitted to a
logical unit. ufshcd_slave_configure() sets 'preserves_write_order' depending on
the value of hba->ahit. Does this answer your question?

Thanks,

Bart.
Bao D. Nguyen Aug. 18, 2023, 12:19 a.m. UTC | #5
On 8/17/2023 3:05 PM, Bart Van Assche wrote:
> On 8/17/23 14:47, Bao D. Nguyen wrote:
>> During initialization, ufshcd_auto_hibern8_update() is not called. 
>> Therefore,
>> you may have SDB mode with auto hibernate enabled -> 
>> preserves_write_order = false, [ ... ]
> 
> Hi Bao,
> 
> ufshcd_slave_configure() is called before any SCSI commands are 
> submitted to a
> logical unit. ufshcd_slave_configure() sets 'preserves_write_order' 
> depending on
> the value of hba->ahit. Does this answer your question?

Sorry Bart. Not yet :-) Please let me try to explain myself again.

For example, in SDB mode, after the probe and you want to enable 
auto-hibern8, you would call ufshcd_auto_hibern8_update() which then calls
ufshcd_update_preserves_write_order(). Before auto-hibern8 is enabled, 
you would check this condition:
	if (blk_queue_is_zoned(q) && !q->elevator)

In other words, auto-hibern8 is enabled only if the above condition false.

However, the during a normal operation, the ufshcd_auto_hibern8_update() 
may not get called at all, and auto-hibern8 can be enabled in SDB mode 
as part of ufs init. Would that be a problem to have auto-hibern8 
enabled without checking whether the above condition is false?

Thanks
Bao

> 
> Thanks,
> 
> Bart.
Bart Van Assche Aug. 18, 2023, 5:56 p.m. UTC | #6
On 8/17/23 17:19, Bao D. Nguyen wrote:
> For example, in SDB mode, after the probe and you want to enable auto-hibern8, you would call ufshcd_auto_hibern8_update() which then calls
> ufshcd_update_preserves_write_order(). Before auto-hibern8 is enabled, you would check this condition:
>      if (blk_queue_is_zoned(q) && !q->elevator)
> 
> In other words, auto-hibern8 is enabled only if the above condition false.
> 
> However, the during a normal operation, the ufshcd_auto_hibern8_update() may not get called at all, and auto-hibern8 can be enabled in SDB mode as part of ufs init. Would that be a problem to have auto-hibern8 enabled without checking whether the above condition is false?

Hi Bao,

Probing the host controller happens as follows:
* The host controller probing function is called, e.g. ufs_qcom_probe().
* ufshcd_alloc_host() and ufshcd_init() are called directly or indirectly
   by the host controller probe function.
* ufshcd_init() calls scsi_add_host() and async_schedule(ufshcd_async_scan, hba).

Once ufshcd_async_scan() is called asynchronously, it performs the
following actions:
* ufs_probe_hba() is called. This function calls ufshcd_link_startup()
   indirectly. That function invokes the link_startup_notify vop if it has
   been defined. Some host drivers set hba->ahit from inside that vop.
* scsi_scan_host() is called and calls scsi_alloc_sdev() indirectly.
* scsi_alloc_sdev() calls blk_mq_init_queue() and shost->hostt->slave_alloc().
* scsi_add_lun() calls sdev->host->hostt->slave_configure().
* scsi_add_lun() calls scsi_sysfs_add_sdev(). This indirectly causes sd_probe()
   to be called asynchronously.
* sd_probe() calls sd_revalidate_disk(). This results in an indirect call of
   disk_set_zoned(). Additionally, the ELEVATOR_F_ZBD_SEQ_WRITE flag is set by
   the sd driver if q->limits.driver_preserves_write_order has not been set.
* Still from inside sd_probe(), device_add_disk() is called and a scheduler
   is selected based on the value of q->required_elevator_features.

There are two ways in which host drivers initialize auto-hibernation:
* Either by setting hba->ahit before ufshcd_init() is called.
* Or by calling ufshcd_auto_hibern8_update() from the link_startup_notify
   vop.

I think the above shows that the zoned model is queried *after* the above methods
for enabling auto-hibernation have completed and hence that blk_queue_is_zoned()
evaluates to false if a host driver calls ufshcd_auto_hibern8_update() from
inside the link_startup_notify vop.

If auto-hibernation is enabled from inside the host driver then that should
happen before sd_probe() is called. sd_probe() will use the value of
q->limits.driver_preserves_write_order that has been set by
ufshcd_slave_configure() so there is no risk for inconsistencies between the
auto-hibernation configuration and the request queue configuration.

Bart.
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 37d430d20939..7f5049a66cca 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -4356,6 +4356,19 @@  static int ufshcd_update_preserves_write_order(struct ufs_hba *hba,
 				return -EPERM;
 		}
 	}
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	shost_for_each_device(sdev, hba->host) {
+		struct request_queue *q = sdev->request_queue;
+
+		blk_mq_freeze_queue_wait(q);
+		q->limits.driver_preserves_write_order = preserves_write_order;
+		blk_queue_required_elevator_features(q,
+			preserves_write_order ? 0 : ELEVATOR_F_ZBD_SEQ_WRITE);
+		if (q->disk)
+			disk_set_zoned(q->disk, q->limits.zoned);
+		blk_mq_unfreeze_queue(q);
+	}
 
 	return 0;
 }
@@ -4393,7 +4406,8 @@  int ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
 
 	if (!is_mcq_enabled(hba) && !prev_state && new_state) {
 		/*
-		 * Auto-hibernation will be enabled for legacy UFSHCI mode.
+		 * Auto-hibernation will be enabled for legacy UFSHCI mode. Tell
+		 * the block layer that write requests may be reordered.
 		 */
 		ret = ufshcd_update_preserves_write_order(hba, false);
 		if (ret)
@@ -4409,7 +4423,8 @@  int ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
 	}
 	if (!is_mcq_enabled(hba) && prev_state && !new_state) {
 		/*
-		 * Auto-hibernation has been disabled.
+		 * Auto-hibernation has been disabled. Tell the block layer that
+		 * the order of write requests is preserved.
 		 */
 		ret = ufshcd_update_preserves_write_order(hba, true);
 		WARN_ON_ONCE(ret);
@@ -5187,6 +5202,9 @@  static int ufshcd_slave_configure(struct scsi_device *sdev)
 
 	ufshcd_hpb_configure(hba, sdev);
 
+	q->limits.driver_preserves_write_order =
+		!ufshcd_is_auto_hibern8_supported(hba) ||
+		FIELD_GET(UFSHCI_AHIBERN8_TIMER_MASK, hba->ahit) == 0;
 	blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
 	if (hba->quirks & UFSHCD_QUIRK_4KB_DMA_ALIGNMENT)
 		blk_queue_update_dma_alignment(q, SZ_4K - 1);