diff mbox series

[v2,2/4] blk-flush: split queues for preflush and postflush requests

Message ID 20230725130102.3030032-3-chengming.zhou@linux.dev (mailing list archive)
State New, archived
Headers show
Series blk-flush: optimize non-postflush requests | expand

Commit Message

Chengming Zhou July 25, 2023, 1:01 p.m. UTC
From: Chengming Zhou <zhouchengming@bytedance.com>

We don't need to replace rq->end_io to make it return to the flush state
machine if it doesn't need post-flush.

The previous approach [1] we take is to move blk_rq_init_flush() to
REQ_FSEQ_DATA stage and only replace rq->end_io if it needs post-flush.
Otherwise, it can end like normal request and doesn't need to return
back to the flush state machine.

But this way add more magic to the already way too magic flush sequence.
Christoph suggested that we can kill the flush sequence entirely, and
just split the flush_queue into a preflush and a postflush queue.

The reason we need separate queues for preflush and postflush requests
is that in flush_end_io(), we need to handle differently: end request
for postflush requests, but requeue dispatch for preflush requests.

This patch is just in preparation for the following patches, no
functional changes intended.

[1] https://lore.kernel.org/lkml/20230710133308.GB23157@lst.de/

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 block/blk-flush.c | 50 +++++++++++++++++++++++++++++++++++------------
 block/blk.h       |  3 ++-
 2 files changed, 40 insertions(+), 13 deletions(-)

Comments

Christoph Hellwig July 31, 2023, 6:15 a.m. UTC | #1
> -	list_for_each_entry_safe(rq, n, running, queuelist) {
> +	list_for_each_entry_safe(rq, n, preflush_running, queuelist) {
> +		unsigned int seq = blk_flush_cur_seq(rq);
> +
> +		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
> +		blk_flush_complete_seq(rq, fq, seq, error);
> +	}
> +
> +	list_for_each_entry_safe(rq, n, postflush_running, queuelist) {
>  		unsigned int seq = blk_flush_cur_seq(rq);
>  
>  		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);

Shouldn't the BUG_ON be split into one that only checks for PREFLUSH and
one only for POSTFLUSH?

> +	if (fq->flush_pending_idx != fq->flush_running_idx)
> +		return;
> +
> +	if (!list_empty(preflush_pending))
> +		first_rq = list_first_entry(preflush_pending, struct request, queuelist);
> +	else if (!list_empty(postflush_pending))
> +		first_rq = list_first_entry(postflush_pending, struct request, queuelist);
> +	else
>  		return;

Hmm, I don't think both lists can be empty here?

I'd simplify this and avoid the overly long lines as:

	first_rq = list_first_entry_or_null(preflush_pending, struct request,
					    queuelist);
	if (!first_rq)
		first_rq = list_first_entry_or_null(postflush_pending,
						    struct request, queuelist);
Chengming Zhou July 31, 2023, 2:15 p.m. UTC | #2
On 2023/7/31 14:15, Christoph Hellwig wrote:
>> -	list_for_each_entry_safe(rq, n, running, queuelist) {
>> +	list_for_each_entry_safe(rq, n, preflush_running, queuelist) {
>> +		unsigned int seq = blk_flush_cur_seq(rq);
>> +
>> +		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
>> +		blk_flush_complete_seq(rq, fq, seq, error);
>> +	}
>> +
>> +	list_for_each_entry_safe(rq, n, postflush_running, queuelist) {
>>  		unsigned int seq = blk_flush_cur_seq(rq);
>>  
>>  		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
> 
> Shouldn't the BUG_ON be split into one that only checks for PREFLUSH and
> one only for POSTFLUSH?

Ah yes, will fix it.

> 
>> +	if (fq->flush_pending_idx != fq->flush_running_idx)
>> +		return;
>> +
>> +	if (!list_empty(preflush_pending))
>> +		first_rq = list_first_entry(preflush_pending, struct request, queuelist);
>> +	else if (!list_empty(postflush_pending))
>> +		first_rq = list_first_entry(postflush_pending, struct request, queuelist);
>> +	else
>>  		return;
> 
> Hmm, I don't think both lists can be empty here?

Yes if check fq->flush_pending_since != 0 before.

> 
> I'd simplify this and avoid the overly long lines as:
> 
> 	first_rq = list_first_entry_or_null(preflush_pending, struct request,
> 					    queuelist);
> 	if (!first_rq)
> 		first_rq = list_first_entry_or_null(postflush_pending,
> 						    struct request, queuelist);
> 

This is better, will change it.

Thanks.
diff mbox series

Patch

diff --git a/block/blk-flush.c b/block/blk-flush.c
index fc25228f7bb1..4993c3c3b502 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -165,7 +165,7 @@  static void blk_flush_complete_seq(struct request *rq,
 				   unsigned int seq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
-	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
+	struct list_head *pending;
 
 	BUG_ON(rq->flush.seq & seq);
 	rq->flush.seq |= seq;
@@ -177,9 +177,9 @@  static void blk_flush_complete_seq(struct request *rq,
 
 	switch (seq) {
 	case REQ_FSEQ_PREFLUSH:
-	case REQ_FSEQ_POSTFLUSH:
+		pending = &fq->preflush_queue[fq->flush_pending_idx];
 		/* queue for flush */
-		if (list_empty(pending))
+		if (!fq->flush_pending_since)
 			fq->flush_pending_since = jiffies;
 		list_move_tail(&rq->queuelist, pending);
 		break;
@@ -192,6 +192,14 @@  static void blk_flush_complete_seq(struct request *rq,
 		blk_mq_kick_requeue_list(q);
 		break;
 
+	case REQ_FSEQ_POSTFLUSH:
+		pending = &fq->postflush_queue[fq->flush_pending_idx];
+		/* queue for flush */
+		if (!fq->flush_pending_since)
+			fq->flush_pending_since = jiffies;
+		list_move_tail(&rq->queuelist, pending);
+		break;
+
 	case REQ_FSEQ_DONE:
 		/*
 		 * @rq was previously adjusted by blk_insert_flush() for
@@ -215,7 +223,7 @@  static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
 				       blk_status_t error)
 {
 	struct request_queue *q = flush_rq->q;
-	struct list_head *running;
+	struct list_head *preflush_running, *postflush_running;
 	struct request *rq, *n;
 	unsigned long flags = 0;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
@@ -248,14 +256,22 @@  static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
 		flush_rq->internal_tag = BLK_MQ_NO_TAG;
 	}
 
-	running = &fq->flush_queue[fq->flush_running_idx];
+	preflush_running = &fq->preflush_queue[fq->flush_running_idx];
+	postflush_running = &fq->postflush_queue[fq->flush_running_idx];
 	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
 
 	/* account completion of the flush request */
 	fq->flush_running_idx ^= 1;
 
 	/* and push the waiting requests to the next stage */
-	list_for_each_entry_safe(rq, n, running, queuelist) {
+	list_for_each_entry_safe(rq, n, preflush_running, queuelist) {
+		unsigned int seq = blk_flush_cur_seq(rq);
+
+		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
+		blk_flush_complete_seq(rq, fq, seq, error);
+	}
+
+	list_for_each_entry_safe(rq, n, postflush_running, queuelist) {
 		unsigned int seq = blk_flush_cur_seq(rq);
 
 		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
@@ -285,13 +301,20 @@  bool is_flush_rq(struct request *rq)
  */
 static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 {
-	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
-	struct request *first_rq =
-		list_first_entry(pending, struct request, queuelist);
+	struct list_head *preflush_pending = &fq->preflush_queue[fq->flush_pending_idx];
+	struct list_head *postflush_pending = &fq->postflush_queue[fq->flush_pending_idx];
+	struct request *first_rq = NULL;
 	struct request *flush_rq = fq->flush_rq;
 
 	/* C1 described at the top of this file */
-	if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
+	if (fq->flush_pending_idx != fq->flush_running_idx)
+		return;
+
+	if (!list_empty(preflush_pending))
+		first_rq = list_first_entry(preflush_pending, struct request, queuelist);
+	else if (!list_empty(postflush_pending))
+		first_rq = list_first_entry(postflush_pending, struct request, queuelist);
+	else
 		return;
 
 	/* C2 and C3 */
@@ -305,6 +328,7 @@  static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	 * different from running_idx, which means flush is in flight.
 	 */
 	fq->flush_pending_idx ^= 1;
+	fq->flush_pending_since = 0;
 
 	blk_rq_init(q, flush_rq);
 
@@ -496,8 +520,10 @@  struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
 	if (!fq->flush_rq)
 		goto fail_rq;
 
-	INIT_LIST_HEAD(&fq->flush_queue[0]);
-	INIT_LIST_HEAD(&fq->flush_queue[1]);
+	INIT_LIST_HEAD(&fq->preflush_queue[0]);
+	INIT_LIST_HEAD(&fq->preflush_queue[1]);
+	INIT_LIST_HEAD(&fq->postflush_queue[0]);
+	INIT_LIST_HEAD(&fq->postflush_queue[1]);
 
 	return fq;
 
diff --git a/block/blk.h b/block/blk.h
index 686712e13835..1a11675152ac 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -20,7 +20,8 @@  struct blk_flush_queue {
 	unsigned int		flush_running_idx:1;
 	blk_status_t 		rq_status;
 	unsigned long		flush_pending_since;
-	struct list_head	flush_queue[2];
+	struct list_head	preflush_queue[2];
+	struct list_head	postflush_queue[2];
 	unsigned long		flush_data_in_flight;
 	struct request		*flush_rq;
 };