diff mbox series

[v5,1/7] block: Introduce the flag QUEUE_FLAG_NO_ZONE_WRITE_LOCK

Message ID 20230731221458.437440-2-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Improve the performance for zoned UFS devices | expand

Commit Message

Bart Van Assche July 31, 2023, 10:14 p.m. UTC
Writes in sequential write required zones must happen at the write
pointer. Even if the submitter of the write commands (e.g. a filesystem)
submits writes for sequential write required zones in order, the block
layer or the storage controller may reorder these write commands.

The zone locking mechanism in the mq-deadline I/O scheduler serializes
write commands for sequential zones. Some but not all storage controllers
require this serialization. Introduce a new request queue flag to allow
block drivers to indicate that they preserve the order of write commands
and thus do not require serialization of writes per zone.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 include/linux/blkdev.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

Comments

Damien Le Moal Aug. 2, 2023, 9:12 a.m. UTC | #1
On 8/1/23 07:14, Bart Van Assche wrote:
> Writes in sequential write required zones must happen at the write
> pointer. Even if the submitter of the write commands (e.g. a filesystem)
> submits writes for sequential write required zones in order, the block
> layer or the storage controller may reorder these write commands.
> 
> The zone locking mechanism in the mq-deadline I/O scheduler serializes
> write commands for sequential zones. Some but not all storage controllers
> require this serialization. Introduce a new request queue flag to allow
> block drivers to indicate that they preserve the order of write commands
> and thus do not require serialization of writes per zone.
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

Looks OK.
Very minor nit below.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>

> ---
>  include/linux/blkdev.h | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 2f5371b8482c..de5e05cc34fa 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -534,6 +534,11 @@ struct request_queue {
>  #define QUEUE_FLAG_NONROT	6	/* non-rotational device (SSD) */
>  #define QUEUE_FLAG_VIRT		QUEUE_FLAG_NONROT /* paravirt device */
>  #define QUEUE_FLAG_IO_STAT	7	/* do disk/partitions IO accounting */
> +/*
> + * Do not serialize sequential writes (REQ_OP_WRITE, REQ_OP_WRITE_ZEROES) sent
> + * to a sequential write required zone (BLK_ZONE_TYPE_SEQWRITE_REQ).
> + */

I would be very explicit here, for this to be clear to people who are not
familiar with zone device write operation handling. Something like:

/*
 * The device supports not using the zone write locking mechanism to serialize
 * write operations (REQ_OP_WRITE, REQ_OP_WRITE_ZEROES) issued to a sequential
 * write required zone (BLK_ZONE_TYPE_SEQWRITE_REQ).
 */

> +#define QUEUE_FLAG_NO_ZONE_WRITE_LOCK 8
>  #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
>  #define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
>  #define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
> @@ -597,6 +602,11 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
>  #define blk_queue_skip_tagset_quiesce(q) \
>  	test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
>  
> +static inline bool blk_queue_no_zone_write_lock(struct request_queue *q)
> +{
> +	return test_bit(QUEUE_FLAG_NO_ZONE_WRITE_LOCK, &q->queue_flags);
> +}
> +
>  extern void blk_set_pm_only(struct request_queue *q);
>  extern void blk_clear_pm_only(struct request_queue *q);
>
diff mbox series

Patch

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2f5371b8482c..de5e05cc34fa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -534,6 +534,11 @@  struct request_queue {
 #define QUEUE_FLAG_NONROT	6	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT		QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT	7	/* do disk/partitions IO accounting */
+/*
+ * Do not serialize sequential writes (REQ_OP_WRITE, REQ_OP_WRITE_ZEROES) sent
+ * to a sequential write required zone (BLK_ZONE_TYPE_SEQWRITE_REQ).
+ */
+#define QUEUE_FLAG_NO_ZONE_WRITE_LOCK 8
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
 #define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
@@ -597,6 +602,11 @@  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_skip_tagset_quiesce(q) \
 	test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
 
+static inline bool blk_queue_no_zone_write_lock(struct request_queue *q)
+{
+	return test_bit(QUEUE_FLAG_NO_ZONE_WRITE_LOCK, &q->queue_flags);
+}
+
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);