diff mbox series

[v2,08/13] blk-mq: simplify flush check in blk_mq_dispatch_rq_list

Message ID 20230104142259.2673013-9-shikemeng@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series A few bugfix and cleanup patches for blk-mq | expand

Commit Message

Kemeng Shi Jan. 4, 2023, 2:22 p.m. UTC
For busy error BLK_STS*_RESOURCE, request will always be added
back to list, so need_resource will not be true and ret will
not be == BLK_STS_DEV_RESOURCE if list is empty. We could remove
these dead check.
If list is empty, we only need to send extra flush
if error happens at last request in the list which is stored in
ret. So send a extra flush if ret is not BLK_STS_OK instead of
errors is non-zero to avoid unnecessary flush for error at middle
request in list.

Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

Christoph Hellwig Jan. 8, 2023, 6:06 p.m. UTC | #1
I think we need to come up with a clear rule on when commit_rqs
needs to be called, and follow that.  In this case I'd be confused
if there was any case where we need to call it if list was empty.
Kemeng Shi Jan. 9, 2023, 2:27 a.m. UTC | #2
Hi, Christoph, thank you so much for review.
on 1/9/2023 2:06 AM, Christoph Hellwig wrote:
> I think we need to come up with a clear rule on when commit_rqs
> needs to be called, and follow that.  In this case I'd be confused
> if there was any case where we need to call it if list was empty.
> 
After we queue request[s] to one driver queue, we need to notify driver
that there are no more request to the queue or driver will keep waiting
for the last request to be queued and IO hung could happen.
Normaly, we will notify this by setting .last in struct blk_mq_queue_data
along with the normal last request .rq in struct blk_mq_queue_data. The
extra commit is only needed if normal last information in .last is lost.
(See comment in struct blk_mq_ops for commit_rqs).

The lost could occur if error happens for sending last request with .last
set or error happen in middle of list and we even do not send the request
with .last set.
Christoph Hellwig Jan. 10, 2023, 8:09 a.m. UTC | #3
On Mon, Jan 09, 2023 at 10:27:33AM +0800, Kemeng Shi wrote:
> After we queue request[s] to one driver queue, we need to notify driver
> that there are no more request to the queue or driver will keep waiting
> for the last request to be queued and IO hung could happen.

Yes.

> Normaly, we will notify this by setting .last in struct blk_mq_queue_data
> along with the normal last request .rq in struct blk_mq_queue_data. The
> extra commit is only needed if normal last information in .last is lost.
> (See comment in struct blk_mq_ops for commit_rqs).
> 
> The lost could occur if error happens for sending last request with .last
> set or error happen in middle of list and we even do not send the request
> with .last set.

Yes. So the rule is:

 1) did not queue everything initially scheduled to queue

OR

 2) the last attempt to queue a request failed

I think we need to find a way to clearly document that and that
make all callers match it.

For most this becomes a

	if (ret || !list_empty(list))

or even just

	if (ret)

as an error is often the only way to break out of the submission
loop.

I wonder if we need to split the queued clearing from blk_mq_commit_rqs
and just clear it in the existing callers, so that we can use that
helpers for all commits, nicely hiding the ->commit_rqs presence
check, and then move that call to where it is needed directly.  Something
like this untested patch (which needs to be split up), which also
makes sure we trace these calls consistently:

---
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c5cf0dbca1db8d..436ca56a0b7172 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2001,6 +2001,15 @@ static void blk_mq_release_budgets(struct request_queue *q,
 	}
 }
 
+static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
+			      bool from_schedule)
+{
+	if (queued && hctx->queue->mq_ops->commit_rqs) {
+		trace_block_unplug(hctx->queue, queued, !from_schedule);
+		hctx->queue->mq_ops->commit_rqs(hctx);
+	}
+}
+
 /*
  * Returns true if we did some work AND can potentially do more.
  */
@@ -2082,12 +2091,9 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
 	if (!list_empty(&zone_list))
 		list_splice_tail_init(&zone_list, list);
 
-	/* If we didn't flush the entire list, we could have told the driver
-	 * there was more coming, but that turned out to be a lie.
-	 */
-	if ((!list_empty(list) || errors || needs_resource ||
-	     ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
-		q->mq_ops->commit_rqs(hctx);
+	if (!list_empty(list) || ret)
+		blk_mq_commit_rqs(hctx, queued, false);
+
 	/*
 	 * Any items that need requeuing? Stuff them into hctx->dispatch,
 	 * that is where we will continue on next queue run.
@@ -2548,16 +2554,6 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	spin_unlock(&ctx->lock);
 }
 
-static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
-			      bool from_schedule)
-{
-	if (hctx->queue->mq_ops->commit_rqs) {
-		trace_block_unplug(hctx->queue, *queued, !from_schedule);
-		hctx->queue->mq_ops->commit_rqs(hctx);
-	}
-	*queued = 0;
-}
-
 static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 		unsigned int nr_segs)
 {
@@ -2684,17 +2680,17 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
 static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
 {
 	struct blk_mq_hw_ctx *hctx = NULL;
+	blk_status_t ret = BLK_STS_OK;
 	struct request *rq;
 	int queued = 0;
-	int errors = 0;
 
 	while ((rq = rq_list_pop(&plug->mq_list))) {
 		bool last = rq_list_empty(plug->mq_list);
-		blk_status_t ret;
 
 		if (hctx != rq->mq_hctx) {
 			if (hctx)
-				blk_mq_commit_rqs(hctx, &queued, from_schedule);
+				blk_mq_commit_rqs(hctx, queued, from_schedule);
+			queued = 0;
 			hctx = rq->mq_hctx;
 		}
 
@@ -2706,21 +2702,15 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
 		case BLK_STS_RESOURCE:
 		case BLK_STS_DEV_RESOURCE:
 			blk_mq_request_bypass_insert(rq, false, true);
-			blk_mq_commit_rqs(hctx, &queued, from_schedule);
-			return;
+			goto out;
 		default:
 			blk_mq_end_request(rq, ret);
-			errors++;
 			break;
 		}
 	}
-
-	/*
-	 * If we didn't flush the entire list, we could have told the driver
-	 * there was more coming, but that turned out to be a lie.
-	 */
-	if (errors)
-		blk_mq_commit_rqs(hctx, &queued, from_schedule);
+out:
+	if (ret)
+		blk_mq_commit_rqs(hctx, queued, from_schedule);
 }
 
 static void __blk_mq_flush_plug_list(struct request_queue *q,
@@ -2804,37 +2794,33 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 		struct list_head *list)
 {
+	blk_status_t ret = BLK_STS_OK;
+	struct request *rq;
 	int queued = 0;
-	int errors = 0;
-
-	while (!list_empty(list)) {
-		blk_status_t ret;
-		struct request *rq = list_first_entry(list, struct request,
-				queuelist);
+	bool last;
 
+	while ((rq = list_first_entry_or_null(list, struct request,
+			queuelist))) {
 		list_del_init(&rq->queuelist);
-		ret = blk_mq_request_issue_directly(rq, list_empty(list));
-		if (ret != BLK_STS_OK) {
-			errors++;
-			if (ret == BLK_STS_RESOURCE ||
-					ret == BLK_STS_DEV_RESOURCE) {
-				blk_mq_request_bypass_insert(rq, false,
-							list_empty(list));
-				break;
-			}
-			blk_mq_end_request(rq, ret);
-		} else
+		last = list_empty(list);
+
+		ret = blk_mq_request_issue_directly(rq, last);
+		switch (ret) {
+		case BLK_STS_OK:
 			queued++;
+			break;
+		case BLK_STS_RESOURCE:
+		case BLK_STS_DEV_RESOURCE:
+			blk_mq_request_bypass_insert(rq, false, last);
+			goto out;
+		default:
+			blk_mq_end_request(rq, ret);
+			break;
+		}
 	}
-
-	/*
-	 * If we didn't flush the entire list, we could have told
-	 * the driver there was more coming, but that turned out to
-	 * be a lie.
-	 */
-	if ((!list_empty(list) || errors) &&
-	     hctx->queue->mq_ops->commit_rqs && queued)
-		hctx->queue->mq_ops->commit_rqs(hctx);
+out:
+	if (ret)
+		blk_mq_commit_rqs(hctx, queued, false);
 }
 
 static bool blk_mq_attempt_bio_merge(struct request_queue *q,
Kemeng Shi Jan. 10, 2023, 12:35 p.m. UTC | #4
on 1/10/2023 4:09 PM, Christoph Hellwig wrote:
> On Mon, Jan 09, 2023 at 10:27:33AM +0800, Kemeng Shi wrote:
>> After we queue request[s] to one driver queue, we need to notify driver
>> that there are no more request to the queue or driver will keep waiting
>> for the last request to be queued and IO hung could happen.
> 
> Yes.
> 
>> Normaly, we will notify this by setting .last in struct blk_mq_queue_data
>> along with the normal last request .rq in struct blk_mq_queue_data. The
>> extra commit is only needed if normal last information in .last is lost.
>> (See comment in struct blk_mq_ops for commit_rqs).
>>
>> The lost could occur if error happens for sending last request with .last
>> set or error happen in middle of list and we even do not send the request
>> with .last set.
> 
> Yes. So the rule is:
> 
>  1) did not queue everything initially scheduled to queue
> 
> OR
> 
>  2) the last attempt to queue a request failed
> 
> I think we need to find a way to clearly document that and that
> make all callers match it.
> For most this becomes a
> 
> 	if (ret || !list_empty(list))
> 
> or even just
> 
> 	if (ret)
> 
> as an error is often the only way to break out of the submission
> loop.
> 
> I wonder if we need to split the queued clearing from blk_mq_commit_rqs
> and just clear it in the existing callers, so that we can use that
> helpers for all commits, nicely hiding the ->commit_rqs presence
> check, and then move that call to where it is needed directly.  Something
> like this untested patch (which needs to be split up), which also
> makes sure we trace these calls consistently:
Yes, using helper also makes queued check consistently. Currently, most code
only calls commit_rqs if any request is queued, one exception is that
blk_mq_plug_issue_direct calls commit_rqs without queued check.
Besides, we can document the the rule before blk_mq_commit_rqs. Any caller
in future can notice the rule and match it.
I will send next version based on suggested helper.
Thanks.
> ---
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index c5cf0dbca1db8d..436ca56a0b7172 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2001,6 +2001,15 @@ static void blk_mq_release_budgets(struct request_queue *q,
>  	}
>  }
>  
> +static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
> +			      bool from_schedule)
> +{
> +	if (queued && hctx->queue->mq_ops->commit_rqs) {
> +		trace_block_unplug(hctx->queue, queued, !from_schedule);
> +		hctx->queue->mq_ops->commit_rqs(hctx);
> +	}
> +}
> +
>  /*
>   * Returns true if we did some work AND can potentially do more.
>   */
> @@ -2082,12 +2091,9 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
>  	if (!list_empty(&zone_list))
>  		list_splice_tail_init(&zone_list, list);
>  
> -	/* If we didn't flush the entire list, we could have told the driver
> -	 * there was more coming, but that turned out to be a lie.
> -	 */
> -	if ((!list_empty(list) || errors || needs_resource ||
> -	     ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
> -		q->mq_ops->commit_rqs(hctx);
> +	if (!list_empty(list) || ret)
> +		blk_mq_commit_rqs(hctx, queued, false);
> +
>  	/*
>  	 * Any items that need requeuing? Stuff them into hctx->dispatch,
>  	 * that is where we will continue on next queue run.
> @@ -2548,16 +2554,6 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
>  	spin_unlock(&ctx->lock);
>  }
>  
> -static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
> -			      bool from_schedule)
> -{
> -	if (hctx->queue->mq_ops->commit_rqs) {
> -		trace_block_unplug(hctx->queue, *queued, !from_schedule);
> -		hctx->queue->mq_ops->commit_rqs(hctx);
> -	}
> -	*queued = 0;
> -}
> -
>  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
>  		unsigned int nr_segs)
>  {
> @@ -2684,17 +2680,17 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
>  static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>  {
>  	struct blk_mq_hw_ctx *hctx = NULL;
> +	blk_status_t ret = BLK_STS_OK;
>  	struct request *rq;
>  	int queued = 0;
> -	int errors = 0;
>  
>  	while ((rq = rq_list_pop(&plug->mq_list))) {
>  		bool last = rq_list_empty(plug->mq_list);
> -		blk_status_t ret;
>  
>  		if (hctx != rq->mq_hctx) {
>  			if (hctx)
> -				blk_mq_commit_rqs(hctx, &queued, from_schedule);
> +				blk_mq_commit_rqs(hctx, queued, from_schedule);
> +			queued = 0;
>  			hctx = rq->mq_hctx;
>  		}
>  
> @@ -2706,21 +2702,15 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>  		case BLK_STS_RESOURCE:
>  		case BLK_STS_DEV_RESOURCE:
>  			blk_mq_request_bypass_insert(rq, false, true);
> -			blk_mq_commit_rqs(hctx, &queued, from_schedule);
> -			return;
> +			goto out;
>  		default:
>  			blk_mq_end_request(rq, ret);
> -			errors++;
>  			break;
>  		}
>  	}
> -
> -	/*
> -	 * If we didn't flush the entire list, we could have told the driver
> -	 * there was more coming, but that turned out to be a lie.
> -	 */
> -	if (errors)
> -		blk_mq_commit_rqs(hctx, &queued, from_schedule);
> +out:
> +	if (ret)
> +		blk_mq_commit_rqs(hctx, queued, from_schedule);
>  }
>  
>  static void __blk_mq_flush_plug_list(struct request_queue *q,
> @@ -2804,37 +2794,33 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
>  void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
>  		struct list_head *list)
>  {
> +	blk_status_t ret = BLK_STS_OK;
> +	struct request *rq;
>  	int queued = 0;
> -	int errors = 0;
> -
> -	while (!list_empty(list)) {
> -		blk_status_t ret;
> -		struct request *rq = list_first_entry(list, struct request,
> -				queuelist);
> +	bool last;
>  
> +	while ((rq = list_first_entry_or_null(list, struct request,
> +			queuelist))) {
>  		list_del_init(&rq->queuelist);
> -		ret = blk_mq_request_issue_directly(rq, list_empty(list));
> -		if (ret != BLK_STS_OK) {
> -			errors++;
> -			if (ret == BLK_STS_RESOURCE ||
> -					ret == BLK_STS_DEV_RESOURCE) {
> -				blk_mq_request_bypass_insert(rq, false,
> -							list_empty(list));
> -				break;
> -			}
> -			blk_mq_end_request(rq, ret);
> -		} else
> +		last = list_empty(list);
> +
> +		ret = blk_mq_request_issue_directly(rq, last);
> +		switch (ret) {
> +		case BLK_STS_OK:
>  			queued++;
> +			break;
> +		case BLK_STS_RESOURCE:
> +		case BLK_STS_DEV_RESOURCE:
> +			blk_mq_request_bypass_insert(rq, false, last);
> +			goto out;
> +		default:
> +			blk_mq_end_request(rq, ret);
> +			break;
> +		}
>  	}
> -
> -	/*
> -	 * If we didn't flush the entire list, we could have told
> -	 * the driver there was more coming, but that turned out to
> -	 * be a lie.
> -	 */
> -	if ((!list_empty(list) || errors) &&
> -	     hctx->queue->mq_ops->commit_rqs && queued)
> -		hctx->queue->mq_ops->commit_rqs(hctx);
> +out:
> +	if (ret)
> +		blk_mq_commit_rqs(hctx, queued, false);
>  }
>  
>  static bool blk_mq_attempt_bio_merge(struct request_queue *q,
>
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9e88037550b..c543c14fdb47 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2085,8 +2085,8 @@  bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
 	/* If we didn't flush the entire list, we could have told the driver
 	 * there was more coming, but that turned out to be a lie.
 	 */
-	if ((!list_empty(list) || errors || needs_resource ||
-	     ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
+	if ((!list_empty(list) || ret != BLK_STS_OK) &&
+	     q->mq_ops->commit_rqs && queued)
 		q->mq_ops->commit_rqs(hctx);
 	/*
 	 * Any items that need requeuing? Stuff them into hctx->dispatch,