diff mbox series

[v15,01/19] block: Introduce more member variables related to zone write locking

Message ID 20231114211804.1449162-2-bvanassche@acm.org (mailing list archive)
State Changes Requested
Headers show
Series Improve write performance for zoned UFS devices​ | expand

Commit Message

Bart Van Assche Nov. 14, 2023, 9:16 p.m. UTC
Many but not all storage controllers require serialization of zoned writes.
Introduce two new request queue limit member variables related to write
serialization. 'driver_preserves_write_order' allows block drivers to
indicate that the order of write commands is preserved and hence that
serialization of writes per zone is not required. 'use_zone_write_lock' is
set by disk_set_zoned() if and only if the block device has zones and if
the block driver does not preserve the order of write requests.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-settings.c   | 15 +++++++++++++++
 block/blk-zoned.c      |  1 +
 include/linux/blkdev.h | 10 ++++++++++
 3 files changed, 26 insertions(+)

Comments

Damien Le Moal Nov. 19, 2023, 11:29 p.m. UTC | #1
On 11/15/23 06:16, Bart Van Assche wrote:
> Many but not all storage controllers require serialization of zoned writes.
> Introduce two new request queue limit member variables related to write
> serialization. 'driver_preserves_write_order' allows block drivers to
> indicate that the order of write commands is preserved and hence that
> serialization of writes per zone is not required. 'use_zone_write_lock' is
> set by disk_set_zoned() if and only if the block device has zones and if
> the block driver does not preserve the order of write requests.
> 
> Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/blk-settings.c   | 15 +++++++++++++++
>  block/blk-zoned.c      |  1 +
>  include/linux/blkdev.h | 10 ++++++++++
>  3 files changed, 26 insertions(+)
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 0046b447268f..4c776c08f190 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -56,6 +56,8 @@ void blk_set_default_limits(struct queue_limits *lim)
>  	lim->alignment_offset = 0;
>  	lim->io_opt = 0;
>  	lim->misaligned = 0;
> +	lim->driver_preserves_write_order = false;
> +	lim->use_zone_write_lock = false;
>  	lim->zoned = BLK_ZONED_NONE;
>  	lim->zone_write_granularity = 0;
>  	lim->dma_alignment = 511;
> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>  	lim->max_dev_sectors = UINT_MAX;
>  	lim->max_write_zeroes_sectors = UINT_MAX;
>  	lim->max_zone_append_sectors = UINT_MAX;
> +	/* Request-based stacking drivers do not reorder requests. */

Rereading this patch, I do not think this statement is correct. I seriously
doubt that multipath will preserve write command order in all cases...

> +	lim->driver_preserves_write_order = true;

... so it is likely much safer to set the default to "false" as that is the
default for all requests in general.

>  }
>  EXPORT_SYMBOL(blk_set_stacking_limits);
>  
> @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>  						   b->max_secure_erase_sectors);
>  	t->zone_write_granularity = max(t->zone_write_granularity,
>  					b->zone_write_granularity);
> +	t->driver_preserves_write_order = t->driver_preserves_write_order &&
> +		b->driver_preserves_write_order;
> +	t->use_zone_write_lock = t->use_zone_write_lock ||
> +		b->use_zone_write_lock;

Very minor nit: splitting the line after the equal would make this more readable.

>  	t->zoned = max(t->zoned, b->zoned);
>  	return ret;
>  }
> @@ -949,6 +957,13 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
>  	}
>  
>  	q->limits.zoned = model;
> +	/*
> +	 * Use the zone write lock only for zoned block devices and only if
> +	 * the block driver does not preserve the order of write commands.
> +	 */
> +	q->limits.use_zone_write_lock = model != BLK_ZONED_NONE &&
> +		!q->limits.driver_preserves_write_order;
> +
>  	if (model != BLK_ZONED_NONE) {
>  		/*
>  		 * Set the zone write granularity to the device logical block
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 619ee41a51cc..112620985bff 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -631,6 +631,7 @@ void disk_clear_zone_settings(struct gendisk *disk)
>  	q->limits.chunk_sectors = 0;
>  	q->limits.zone_write_granularity = 0;
>  	q->limits.max_zone_append_sectors = 0;
> +	q->limits.use_zone_write_lock = false;
>  
>  	blk_mq_unfreeze_queue(q);
>  }
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 51fa7ffdee83..2d452f5a36c8 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -316,6 +316,16 @@ struct queue_limits {
>  	unsigned char		misaligned;
>  	unsigned char		discard_misaligned;
>  	unsigned char		raid_partial_stripes_expensive;
> +	/*
> +	 * Whether or not the block driver preserves the order of write
> +	 * requests. Set by the block driver.
> +	 */
> +	bool			driver_preserves_write_order;
> +	/*
> +	 * Whether or not zone write locking should be used. Set by
> +	 * disk_set_zoned().
> +	 */
> +	bool			use_zone_write_lock;
>  	enum blk_zoned_model	zoned;
>  
>  	/*
Bart Van Assche Nov. 20, 2023, 8:44 p.m. UTC | #2
On 11/19/23 15:29, Damien Le Moal wrote:
> On 11/15/23 06:16, Bart Van Assche wrote:
>> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>>   	lim->max_dev_sectors = UINT_MAX;
>>   	lim->max_write_zeroes_sectors = UINT_MAX;
>>   	lim->max_zone_append_sectors = UINT_MAX;
>> +	/* Request-based stacking drivers do not reorder requests. */
> 
> Rereading this patch, I do not think this statement is correct. I seriously
> doubt that multipath will preserve write command order in all cases...
> 
>> +	lim->driver_preserves_write_order = true;
> 
> ... so it is likely much safer to set the default to "false" as that is the
> default for all requests in general.

How about applying this (untested) patch on top of this patch series?

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4c776c08f190..aba1972e9767 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
  	lim->max_dev_sectors = UINT_MAX;
  	lim->max_write_zeroes_sectors = UINT_MAX;
  	lim->max_zone_append_sectors = UINT_MAX;
-	/* Request-based stacking drivers do not reorder requests. */
-	lim->driver_preserves_write_order = true;
  }
  EXPORT_SYMBOL(blk_set_stacking_limits);

diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 2d3e186ca87e..cb9abe4bd065 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti,
  #define linear_report_zones NULL
  #endif

+static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+	limits->driver_preserves_write_order = true;
+}
+
  static int linear_iterate_devices(struct dm_target *ti,
  				  iterate_devices_callout_fn fn, void *data)
  {
@@ -208,6 +213,7 @@ static struct target_type linear_target = {
  	.map    = linear_map,
  	.status = linear_status,
  	.prepare_ioctl = linear_prepare_ioctl,
+	.io_hints = linear_io_hints,
  	.iterate_devices = linear_iterate_devices,
  	.direct_access = linear_dax_direct_access,
  	.dax_zero_page_range = linear_dax_zero_page_range,

>> @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>>   						   b->max_secure_erase_sectors);
>>   	t->zone_write_granularity = max(t->zone_write_granularity,
>>   					b->zone_write_granularity);
>> +	t->driver_preserves_write_order = t->driver_preserves_write_order &&
>> +		b->driver_preserves_write_order;
>> +	t->use_zone_write_lock = t->use_zone_write_lock ||
>> +		b->use_zone_write_lock;
> 
> Very minor nit: splitting the line after the equal would make this more readable.

Hmm ... I have often seen other reviewers asking to maximize the use of each
source code line as much as reasonably possible.

Thanks,

Bart.
Damien Le Moal Nov. 20, 2023, 11:02 p.m. UTC | #3
On 11/21/23 05:44, Bart Van Assche wrote:
> On 11/19/23 15:29, Damien Le Moal wrote:
>> On 11/15/23 06:16, Bart Van Assche wrote:
>>> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>>>   	lim->max_dev_sectors = UINT_MAX;
>>>   	lim->max_write_zeroes_sectors = UINT_MAX;
>>>   	lim->max_zone_append_sectors = UINT_MAX;
>>> +	/* Request-based stacking drivers do not reorder requests. */
>>
>> Rereading this patch, I do not think this statement is correct. I seriously
>> doubt that multipath will preserve write command order in all cases...
>>
>>> +	lim->driver_preserves_write_order = true;
>>
>> ... so it is likely much safer to set the default to "false" as that is the
>> default for all requests in general.
> 
> How about applying this (untested) patch on top of this patch series?
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 4c776c08f190..aba1972e9767 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>   	lim->max_dev_sectors = UINT_MAX;
>   	lim->max_write_zeroes_sectors = UINT_MAX;
>   	lim->max_zone_append_sectors = UINT_MAX;
> -	/* Request-based stacking drivers do not reorder requests. */
> -	lim->driver_preserves_write_order = true;
>   }
>   EXPORT_SYMBOL(blk_set_stacking_limits);
> 
> diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
> index 2d3e186ca87e..cb9abe4bd065 100644
> --- a/drivers/md/dm-linear.c
> +++ b/drivers/md/dm-linear.c
> @@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti,
>   #define linear_report_zones NULL
>   #endif
> 
> +static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits)
> +{
> +	limits->driver_preserves_write_order = true;
> +}

Hmm, but does dm-linear preserve write order ? I am not convinced. And what
about dm-flakey, dm-error and dm-crypt ? All of these also support zoned
devices. I do not think that we can say that any of these preserve write order.

> +
>   static int linear_iterate_devices(struct dm_target *ti,
>   				  iterate_devices_callout_fn fn, void *data)
>   {
> @@ -208,6 +213,7 @@ static struct target_type linear_target = {
>   	.map    = linear_map,
>   	.status = linear_status,
>   	.prepare_ioctl = linear_prepare_ioctl,
> +	.io_hints = linear_io_hints,
>   	.iterate_devices = linear_iterate_devices,
>   	.direct_access = linear_dax_direct_access,
>   	.dax_zero_page_range = linear_dax_zero_page_range,
> 
>>> @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>>>   						   b->max_secure_erase_sectors);
>>>   	t->zone_write_granularity = max(t->zone_write_granularity,
>>>   					b->zone_write_granularity);
>>> +	t->driver_preserves_write_order = t->driver_preserves_write_order &&
>>> +		b->driver_preserves_write_order;
>>> +	t->use_zone_write_lock = t->use_zone_write_lock ||
>>> +		b->use_zone_write_lock;
>>
>> Very minor nit: splitting the line after the equal would make this more readable.
> 
> Hmm ... I have often seen other reviewers asking to maximize the use of each
> source code line as much as reasonably possible.

As I said, very minor nit :) Feel free to ignore.

> 
> Thanks,
> 
> Bart.
>
Bart Van Assche Nov. 20, 2023, 11:58 p.m. UTC | #4
On 11/20/23 15:02, Damien Le Moal wrote:
> On 11/21/23 05:44, Bart Van Assche wrote:
>> How about applying this (untested) patch on top of this patch series?
>>
>> diff --git a/block/blk-settings.c b/block/blk-settings.c
>> index 4c776c08f190..aba1972e9767 100644
>> --- a/block/blk-settings.c
>> +++ b/block/blk-settings.c
>> @@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>>    	lim->max_dev_sectors = UINT_MAX;
>>    	lim->max_write_zeroes_sectors = UINT_MAX;
>>    	lim->max_zone_append_sectors = UINT_MAX;
>> -	/* Request-based stacking drivers do not reorder requests. */
>> -	lim->driver_preserves_write_order = true;
>>    }
>>    EXPORT_SYMBOL(blk_set_stacking_limits);
>>
>> diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
>> index 2d3e186ca87e..cb9abe4bd065 100644
>> --- a/drivers/md/dm-linear.c
>> +++ b/drivers/md/dm-linear.c
>> @@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti,
>>    #define linear_report_zones NULL
>>    #endif
>>
>> +static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits)
>> +{
>> +	limits->driver_preserves_write_order = true;
>> +}
> 
> Hmm, but does dm-linear preserve write order ? I am not convinced. And what
> about dm-flakey, dm-error and dm-crypt ? All of these also support zoned
> devices. I do not think that we can say that any of these preserve write order.

Hi Damien,

I propose to keep any changes for files in the drivers/md/ directory for
later. This patch series is already big enough. Additionally, I don't
need the dm changes myself since Android does does not use dm-linear nor
dm-verity to access a zoned logical unit. All we need to know right now
is that the approach of this patch series can be extended to dm drivers.

Thanks,

Bart.
Damien Le Moal Nov. 21, 2023, 1:21 a.m. UTC | #5
On 11/21/23 08:58, Bart Van Assche wrote:
> On 11/20/23 15:02, Damien Le Moal wrote:
>> On 11/21/23 05:44, Bart Van Assche wrote:
>>> How about applying this (untested) patch on top of this patch series?
>>>
>>> diff --git a/block/blk-settings.c b/block/blk-settings.c
>>> index 4c776c08f190..aba1972e9767 100644
>>> --- a/block/blk-settings.c
>>> +++ b/block/blk-settings.c
>>> @@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>>>    	lim->max_dev_sectors = UINT_MAX;
>>>    	lim->max_write_zeroes_sectors = UINT_MAX;
>>>    	lim->max_zone_append_sectors = UINT_MAX;
>>> -	/* Request-based stacking drivers do not reorder requests. */
>>> -	lim->driver_preserves_write_order = true;
>>>    }
>>>    EXPORT_SYMBOL(blk_set_stacking_limits);
>>>
>>> diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
>>> index 2d3e186ca87e..cb9abe4bd065 100644
>>> --- a/drivers/md/dm-linear.c
>>> +++ b/drivers/md/dm-linear.c
>>> @@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti,
>>>    #define linear_report_zones NULL
>>>    #endif
>>>
>>> +static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits)
>>> +{
>>> +	limits->driver_preserves_write_order = true;
>>> +}
>>
>> Hmm, but does dm-linear preserve write order ? I am not convinced. And what
>> about dm-flakey, dm-error and dm-crypt ? All of these also support zoned
>> devices. I do not think that we can say that any of these preserve write order.
> 
> Hi Damien,
> 
> I propose to keep any changes for files in the drivers/md/ directory for
> later. This patch series is already big enough. Additionally, I don't
> need the dm changes myself since Android does does not use dm-linear nor
> dm-verity to access a zoned logical unit. All we need to know right now
> is that the approach of this patch series can be extended to dm drivers.

Agree. For now, dm will keep working as usual using the zone write locking. We
can optimize that later as needed and if possible. So initializing the limits
driver_preserves_write_order to false (default) is the way to go.
Damien Le Moal Nov. 21, 2023, 2:12 a.m. UTC | #6
On 11/21/23 10:21, Damien Le Moal wrote:
> On 11/21/23 08:58, Bart Van Assche wrote:
>> On 11/20/23 15:02, Damien Le Moal wrote:
>>> On 11/21/23 05:44, Bart Van Assche wrote:
>>>> How about applying this (untested) patch on top of this patch series?
>>>>
>>>> diff --git a/block/blk-settings.c b/block/blk-settings.c
>>>> index 4c776c08f190..aba1972e9767 100644
>>>> --- a/block/blk-settings.c
>>>> +++ b/block/blk-settings.c
>>>> @@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>>>>    	lim->max_dev_sectors = UINT_MAX;
>>>>    	lim->max_write_zeroes_sectors = UINT_MAX;
>>>>    	lim->max_zone_append_sectors = UINT_MAX;
>>>> -	/* Request-based stacking drivers do not reorder requests. */
>>>> -	lim->driver_preserves_write_order = true;
>>>>    }
>>>>    EXPORT_SYMBOL(blk_set_stacking_limits);
>>>>
>>>> diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
>>>> index 2d3e186ca87e..cb9abe4bd065 100644
>>>> --- a/drivers/md/dm-linear.c
>>>> +++ b/drivers/md/dm-linear.c
>>>> @@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti,
>>>>    #define linear_report_zones NULL
>>>>    #endif
>>>>
>>>> +static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits)
>>>> +{
>>>> +	limits->driver_preserves_write_order = true;
>>>> +}
>>>
>>> Hmm, but does dm-linear preserve write order ? I am not convinced. And what
>>> about dm-flakey, dm-error and dm-crypt ? All of these also support zoned
>>> devices. I do not think that we can say that any of these preserve write order.
>>
>> Hi Damien,
>>
>> I propose to keep any changes for files in the drivers/md/ directory for
>> later. This patch series is already big enough. Additionally, I don't
>> need the dm changes myself since Android does does not use dm-linear nor
>> dm-verity to access a zoned logical unit. All we need to know right now
>> is that the approach of this patch series can be extended to dm drivers.
> 
> Agree. For now, dm will keep working as usual using the zone write locking. We
> can optimize that later as needed and if possible. So initializing the limits
> driver_preserves_write_order to false (default) is the way to go.

Actually, I do not think it matters since DM devices do not have an IO
scheduler... So I do not think any optimization is really needed at all. The use
of the zone write locking or not based on driver_preserves_write_order will be
at the lowest physical device level only. So for BIO based DM, we should not
need to do anything at all.
diff mbox series

Patch

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 0046b447268f..4c776c08f190 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -56,6 +56,8 @@  void blk_set_default_limits(struct queue_limits *lim)
 	lim->alignment_offset = 0;
 	lim->io_opt = 0;
 	lim->misaligned = 0;
+	lim->driver_preserves_write_order = false;
+	lim->use_zone_write_lock = false;
 	lim->zoned = BLK_ZONED_NONE;
 	lim->zone_write_granularity = 0;
 	lim->dma_alignment = 511;
@@ -82,6 +84,8 @@  void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_zeroes_sectors = UINT_MAX;
 	lim->max_zone_append_sectors = UINT_MAX;
+	/* Request-based stacking drivers do not reorder requests. */
+	lim->driver_preserves_write_order = true;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
 
@@ -685,6 +689,10 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 						   b->max_secure_erase_sectors);
 	t->zone_write_granularity = max(t->zone_write_granularity,
 					b->zone_write_granularity);
+	t->driver_preserves_write_order = t->driver_preserves_write_order &&
+		b->driver_preserves_write_order;
+	t->use_zone_write_lock = t->use_zone_write_lock ||
+		b->use_zone_write_lock;
 	t->zoned = max(t->zoned, b->zoned);
 	return ret;
 }
@@ -949,6 +957,13 @@  void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 	}
 
 	q->limits.zoned = model;
+	/*
+	 * Use the zone write lock only for zoned block devices and only if
+	 * the block driver does not preserve the order of write commands.
+	 */
+	q->limits.use_zone_write_lock = model != BLK_ZONED_NONE &&
+		!q->limits.driver_preserves_write_order;
+
 	if (model != BLK_ZONED_NONE) {
 		/*
 		 * Set the zone write granularity to the device logical block
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 619ee41a51cc..112620985bff 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -631,6 +631,7 @@  void disk_clear_zone_settings(struct gendisk *disk)
 	q->limits.chunk_sectors = 0;
 	q->limits.zone_write_granularity = 0;
 	q->limits.max_zone_append_sectors = 0;
+	q->limits.use_zone_write_lock = false;
 
 	blk_mq_unfreeze_queue(q);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 51fa7ffdee83..2d452f5a36c8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -316,6 +316,16 @@  struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
+	/*
+	 * Whether or not the block driver preserves the order of write
+	 * requests. Set by the block driver.
+	 */
+	bool			driver_preserves_write_order;
+	/*
+	 * Whether or not zone write locking should be used. Set by
+	 * disk_set_zoned().
+	 */
+	bool			use_zone_write_lock;
 	enum blk_zoned_model	zoned;
 
 	/*