diff mbox series

[v3,1/6] block: Introduce the flag REQ_NO_WRITE_LOCK

Message ID 20230726005742.303865-2-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Improve the performance of F2FS on zoned UFS | expand

Commit Message

Bart Van Assche July 26, 2023, 12:57 a.m. UTC
Not all software that supports zoned storage allocates and submits zoned
writes in LBA order per zone. Introduce the REQ_NO_WRITE_LOCK flag such
that submitters of zoned writes can indicate that zoned writes are
allocated and submitted in LBA order per zone.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-flush.c         | 3 ++-
 include/linux/blk_types.h | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

Comments

Damien Le Moal July 26, 2023, 8:37 a.m. UTC | #1
On 7/26/23 09:57, Bart Van Assche wrote:
> Not all software that supports zoned storage allocates and submits zoned
> writes in LBA order per zone. Introduce the REQ_NO_WRITE_LOCK flag such
> that submitters of zoned writes can indicate that zoned writes are
> allocated and submitted in LBA order per zone.

That is absolutely NOT true. *ALL* in-kernel and application software supporting
zoned block devices issue writes sequentially. Otherwise, they are considered
buggy. The justification for zone write locking is to preserve sequential write
streams in the presence of storage adapters that do not preserve command orders,
or if the low level driver requeue write commands. If both of these conditions
are false (e.g. UFS), with the help of proper scsi requeue handling, zone write
locking can be disabled. REQ_NO_WRITE_LOCK indicates that a write request does
not need zone write locking when the underlying device signaled that it can
preserve command ordering. So we also need a queue flag for that...

> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/blk-flush.c         | 3 ++-
>  include/linux/blk_types.h | 8 ++++++++
>  2 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index e73dc22d05c1..038350ed7cce 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -336,7 +336,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
>  		flush_rq->internal_tag = first_rq->internal_tag;
>  
>  	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
> -	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
> +	flush_rq->cmd_flags |= flags &
> +		(REQ_FAILFAST_MASK | REQ_NO_ZONE_WRITE_LOCK | REQ_DRV);

Why ? I do not see why this change is necessary. If it is, the commit message
should mention it and this change should probably be its own patch.

>  	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
>  	flush_rq->end_io = flush_end_io;
>  	/*
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 0bad62cca3d0..68f7934d8fa6 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -416,6 +416,12 @@ enum req_flag_bits {
>  	__REQ_PREFLUSH,		/* request for cache flush */
>  	__REQ_RAHEAD,		/* read ahead, can fail anytime */
>  	__REQ_BACKGROUND,	/* background IO */
> +	/*
> +	 * Do not use zone write locking. Setting this flag is only safe if
> +	 * the request submitter allocates and submit requests in LBA order per
> +	 * zone.

...if the request submitter submits write requests sequentially per zone.

> +	 */
> +	__REQ_NO_ZONE_WRITE_LOCK,
>  	__REQ_NOWAIT,           /* Don't wait if request will block */
>  	__REQ_POLLED,		/* caller polls for completion using bio_poll */
>  	__REQ_ALLOC_CACHE,	/* allocate IO from cache if available */
> @@ -448,6 +454,8 @@ enum req_flag_bits {
>  #define REQ_PREFLUSH	(__force blk_opf_t)(1ULL << __REQ_PREFLUSH)
>  #define REQ_RAHEAD	(__force blk_opf_t)(1ULL << __REQ_RAHEAD)
>  #define REQ_BACKGROUND	(__force blk_opf_t)(1ULL << __REQ_BACKGROUND)
> +#define REQ_NO_ZONE_WRITE_LOCK	\
> +			(__force blk_opf_t)(1ULL << __REQ_NO_ZONE_WRITE_LOCK)
>  #define REQ_NOWAIT	(__force blk_opf_t)(1ULL << __REQ_NOWAIT)
>  #define REQ_POLLED	(__force blk_opf_t)(1ULL << __REQ_POLLED)
>  #define REQ_ALLOC_CACHE	(__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE)
Bart Van Assche July 26, 2023, 2:58 p.m. UTC | #2
On 7/26/23 01:37, Damien Le Moal wrote:
> REQ_NO_WRITE_LOCK indicates that a write request does
> not need zone write locking when the underlying device signaled that it can
> preserve command ordering. So we also need a queue flag for that...

My apologies: I accidentally left out the patch "block: Introduce the 
flag QUEUE_FLAG_NO_ZONE_WRITE_LOCK" from this series. I will repost this 
series.

Bart.
diff mbox series

Patch

diff --git a/block/blk-flush.c b/block/blk-flush.c
index e73dc22d05c1..038350ed7cce 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -336,7 +336,8 @@  static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
 		flush_rq->internal_tag = first_rq->internal_tag;
 
 	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
-	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
+	flush_rq->cmd_flags |= flags &
+		(REQ_FAILFAST_MASK | REQ_NO_ZONE_WRITE_LOCK | REQ_DRV);
 	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
 	flush_rq->end_io = flush_end_io;
 	/*
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..68f7934d8fa6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -416,6 +416,12 @@  enum req_flag_bits {
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
+	/*
+	 * Do not use zone write locking. Setting this flag is only safe if
+	 * the request submitter allocates and submit requests in LBA order per
+	 * zone.
+	 */
+	__REQ_NO_ZONE_WRITE_LOCK,
 	__REQ_NOWAIT,           /* Don't wait if request will block */
 	__REQ_POLLED,		/* caller polls for completion using bio_poll */
 	__REQ_ALLOC_CACHE,	/* allocate IO from cache if available */
@@ -448,6 +454,8 @@  enum req_flag_bits {
 #define REQ_PREFLUSH	(__force blk_opf_t)(1ULL << __REQ_PREFLUSH)
 #define REQ_RAHEAD	(__force blk_opf_t)(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND	(__force blk_opf_t)(1ULL << __REQ_BACKGROUND)
+#define REQ_NO_ZONE_WRITE_LOCK	\
+			(__force blk_opf_t)(1ULL << __REQ_NO_ZONE_WRITE_LOCK)
 #define REQ_NOWAIT	(__force blk_opf_t)(1ULL << __REQ_NOWAIT)
 #define REQ_POLLED	(__force blk_opf_t)(1ULL << __REQ_POLLED)
 #define REQ_ALLOC_CACHE	(__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE)