diff mbox series

[v4,2/7] block: Introduce the flag REQ_NO_ZONE_WRITE_LOCK

Message ID 20230726193440.1655149-3-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Improve the performance of F2FS on zoned UFS | expand

Commit Message

Bart Van Assche July 26, 2023, 7:34 p.m. UTC
Not all software that supports zoned storage allocates and submits zoned
writes in LBA order per zone. Introduce the REQ_NO_WRITE_LOCK flag such
that submitters of zoned writes can indicate that zoned writes are
allocated and submitted in LBA order per zone.

Introduce the blk_no_zone_write_lock() function to make it easy to test
whether both QUEUE_FLAG_NO_ZONE_WRITE_LOCK and REQ_NO_ZONE_WRITE_LOCK
are set.

Make flush requests inherit the REQ_NO_ZONE_WRITE_LOCK flag from the
request they are derived from.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-flush.c         |  3 ++-
 include/linux/blk-mq.h    | 11 +++++++++++
 include/linux/blk_types.h |  8 ++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

Comments

Damien Le Moal July 27, 2023, 12:03 a.m. UTC | #1
On 7/27/23 04:34, Bart Van Assche wrote:
> Not all software that supports zoned storage allocates and submits zoned
> writes in LBA order per zone. Introduce the REQ_NO_WRITE_LOCK flag such
> that submitters of zoned writes can indicate that zoned writes are
> allocated and submitted in LBA order per zone.

This really needs more explanation... "Not all software that supports zoned
storage allocates and submits zoned writes in LBA order per zone" is very
misleading. I already commented on this on the last round. All writers to zones
are suppose to be writing sequentially. But while we can trust in-kernel users
to do that correctly, we cannot trust applications for the same. Hence this flag.

> Introduce the blk_no_zone_write_lock() function to make it easy to test
> whether both QUEUE_FLAG_NO_ZONE_WRITE_LOCK and REQ_NO_ZONE_WRITE_LOCK
> are set.

As mentioned in patch 1, I think it would be a lot better to squash this patch
with the previous one since the queue flag and the req flag work together are
are useless alone.

> 
> Make flush requests inherit the REQ_NO_ZONE_WRITE_LOCK flag from the
> request they are derived from.

This really needs to be moved to its own patch. But I still fail to understand
why flush needs to be touched. If flush commands are being subjected to zone
write locking, then that's a bug I think. Or I am missing something ?

> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/blk-flush.c         |  3 ++-
>  include/linux/blk-mq.h    | 11 +++++++++++
>  include/linux/blk_types.h |  8 ++++++++
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index e73dc22d05c1..038350ed7cce 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -336,7 +336,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
>  		flush_rq->internal_tag = first_rq->internal_tag;
>  
>  	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
> -	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
> +	flush_rq->cmd_flags |= flags &
> +		(REQ_FAILFAST_MASK | REQ_NO_ZONE_WRITE_LOCK | REQ_DRV);

See above. Let's move this to another patch to better explain why this change
is needed.

>  	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
>  	flush_rq->end_io = flush_end_io;
>  	/*
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 01e8c31db665..dc8ec26ba2d0 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -1170,6 +1170,12 @@ static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
>  		blk_rq_zone_is_seq(rq);
>  }
>  
> +static inline bool blk_no_zone_write_lock(struct request *rq)
> +{
> +	return blk_queue_no_zone_write_lock(rq->q) &&
> +		rq->cmd_flags & REQ_NO_ZONE_WRITE_LOCK;
> +}
> +
>  bool blk_req_needs_zone_write_lock(struct request *rq);
>  bool blk_req_zone_write_trylock(struct request *rq);
>  void __blk_req_zone_write_lock(struct request *rq);
> @@ -1205,6 +1211,11 @@ static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
>  	return false;
>  }
>  
> +static inline bool blk_no_zone_write_lock(struct request *rq)
> +{
> +	return true;
> +}
> +
>  static inline bool blk_req_needs_zone_write_lock(struct request *rq)
>  {
>  	return false;
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 0bad62cca3d0..68f7934d8fa6 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -416,6 +416,12 @@ enum req_flag_bits {
>  	__REQ_PREFLUSH,		/* request for cache flush */
>  	__REQ_RAHEAD,		/* read ahead, can fail anytime */
>  	__REQ_BACKGROUND,	/* background IO */
> +	/*
> +	 * Do not use zone write locking. Setting this flag is only safe if
> +	 * the request submitter allocates and submit requests in LBA order per
> +	 * zone.

"submit requests" -> "submits write requests"
LBA -> sector (as requests use sector unit, not LBA unit)

> +	 */
> +	__REQ_NO_ZONE_WRITE_LOCK,
>  	__REQ_NOWAIT,           /* Don't wait if request will block */
>  	__REQ_POLLED,		/* caller polls for completion using bio_poll */
>  	__REQ_ALLOC_CACHE,	/* allocate IO from cache if available */
> @@ -448,6 +454,8 @@ enum req_flag_bits {
>  #define REQ_PREFLUSH	(__force blk_opf_t)(1ULL << __REQ_PREFLUSH)
>  #define REQ_RAHEAD	(__force blk_opf_t)(1ULL << __REQ_RAHEAD)
>  #define REQ_BACKGROUND	(__force blk_opf_t)(1ULL << __REQ_BACKGROUND)
> +#define REQ_NO_ZONE_WRITE_LOCK	\
> +			(__force blk_opf_t)(1ULL << __REQ_NO_ZONE_WRITE_LOCK)
>  #define REQ_NOWAIT	(__force blk_opf_t)(1ULL << __REQ_NOWAIT)
>  #define REQ_POLLED	(__force blk_opf_t)(1ULL << __REQ_POLLED)
>  #define REQ_ALLOC_CACHE	(__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE)
diff mbox series

Patch

diff --git a/block/blk-flush.c b/block/blk-flush.c
index e73dc22d05c1..038350ed7cce 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -336,7 +336,8 @@  static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
 		flush_rq->internal_tag = first_rq->internal_tag;
 
 	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
-	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
+	flush_rq->cmd_flags |= flags &
+		(REQ_FAILFAST_MASK | REQ_NO_ZONE_WRITE_LOCK | REQ_DRV);
 	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
 	flush_rq->end_io = flush_end_io;
 	/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 01e8c31db665..dc8ec26ba2d0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1170,6 +1170,12 @@  static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
 		blk_rq_zone_is_seq(rq);
 }
 
+static inline bool blk_no_zone_write_lock(struct request *rq)
+{
+	return blk_queue_no_zone_write_lock(rq->q) &&
+		rq->cmd_flags & REQ_NO_ZONE_WRITE_LOCK;
+}
+
 bool blk_req_needs_zone_write_lock(struct request *rq);
 bool blk_req_zone_write_trylock(struct request *rq);
 void __blk_req_zone_write_lock(struct request *rq);
@@ -1205,6 +1211,11 @@  static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
 	return false;
 }
 
+static inline bool blk_no_zone_write_lock(struct request *rq)
+{
+	return true;
+}
+
 static inline bool blk_req_needs_zone_write_lock(struct request *rq)
 {
 	return false;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..68f7934d8fa6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -416,6 +416,12 @@  enum req_flag_bits {
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
+	/*
+	 * Do not use zone write locking. Setting this flag is only safe if
+	 * the request submitter allocates and submit requests in LBA order per
+	 * zone.
+	 */
+	__REQ_NO_ZONE_WRITE_LOCK,
 	__REQ_NOWAIT,           /* Don't wait if request will block */
 	__REQ_POLLED,		/* caller polls for completion using bio_poll */
 	__REQ_ALLOC_CACHE,	/* allocate IO from cache if available */
@@ -448,6 +454,8 @@  enum req_flag_bits {
 #define REQ_PREFLUSH	(__force blk_opf_t)(1ULL << __REQ_PREFLUSH)
 #define REQ_RAHEAD	(__force blk_opf_t)(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND	(__force blk_opf_t)(1ULL << __REQ_BACKGROUND)
+#define REQ_NO_ZONE_WRITE_LOCK	\
+			(__force blk_opf_t)(1ULL << __REQ_NO_ZONE_WRITE_LOCK)
 #define REQ_NOWAIT	(__force blk_opf_t)(1ULL << __REQ_NOWAIT)
 #define REQ_POLLED	(__force blk_opf_t)(1ULL << __REQ_POLLED)
 #define REQ_ALLOC_CACHE	(__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE)