diff mbox series

blk-mq: make sure active queue usage is held for bio_integrity_prep()

Message ID 20231108080504.2144952-1-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series blk-mq: make sure active queue usage is held for bio_integrity_prep() | expand

Commit Message

Ming Lei Nov. 8, 2023, 8:05 a.m. UTC
blk_integrity_unregister() can come if queue usage counter isn't held
for one bio with integrity prepared, so this request may be completed with
calling profile->complete_fn, then kernel panic.

Another constraint is that bio_integrity_prep() needs to be called
before bio merge.

Fix the issue by:

- call bio_integrity_prep() with one queue usage counter grabbed reliably

- call bio_integrity_prep() before bio merge

Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c | 71 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 23 deletions(-)

Comments

Christoph Hellwig Nov. 9, 2023, 7:30 a.m. UTC | #1
> +++ b/block/blk-mq.c
> @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
>  	};
>  	struct request *rq;
>  
> -	if (unlikely(bio_queue_enter(bio)))
> -		return NULL;
> -
>  	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
> -		goto queue_exit;
> +		return NULL;
>  
>  	rq_qos_throttle(q, bio);

For clarity splitting this out might be a bit more readable.

> +static inline struct request *blk_mq_cached_req(const struct request_queue *q,
> +		const struct blk_plug *plug)
> +{
> +	if (plug) {
> +		struct request *rq = rq_list_peek(&plug->cached_rq);
> +
> +		if (rq && rq->q == q)
> +			return rq;
> +	}
> +	return NULL;
> +}

Not sure this helper adds much value over just open coding it.

> +/* return true if this bio needs to handle by allocating new request */
> +static inline bool blk_mq_try_cached_rq(struct request *rq,
>  		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)

The polarity here is a bit weird, I'd expect a true return if the
request can be used and a naming implying that.

> +	rq = blk_mq_cached_req(q, plug);
> +	if (rq) {
> +		/* cached request held queue usage counter */
> +		if (!bio_integrity_prep(bio))
> +			return;
> +
> +		need_alloc = blk_mq_try_cached_rq(rq, plug, &bio, nr_segs);
>  		if (!bio)
>  			return;

If you take the blk_mq_attempt_bio_merge merge out of the helper,
the calling convention becomes much saner.

> +			/* cached request held queue usage counter */
> +			percpu_ref_get(&q->q_usage_counter);

I don't think we can just do the percpu_ref_get here.  While getting
the counter obviosuly can't fail, we still need to do the queue
pm only check.

Below is the untested version of how I'd do this that I hacked while
reviewing it:

---
commit 1134ce39504c30a9804ed8147027e66bf7d3157e
Author: Ming Lei <ming.lei@redhat.com>
Date:   Wed Nov 8 16:05:04 2023 +0800

    blk-mq: make sure active queue usage is held for bio_integrity_prep()
    
    blk_integrity_unregister() can come if queue usage counter isn't held
    for one bio with integrity prepared, so this request may be completed with
    calling profile->complete_fn, then kernel panic.
    
    Another constraint is that bio_integrity_prep() needs to be called
    before bio merge.
    
    Fix the issue by:
    
    - call bio_integrity_prep() with one queue usage counter grabbed reliably
    
    - call bio_integrity_prep() before bio merge
    
    Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()")
    Reported-by: Yi Zhang <yi.zhang@redhat.com>
    Signed-off-by: Ming Lei <ming.lei@redhat.com>

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e37..b758dc8ed10957 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	};
 	struct request *rq;
 
-	if (unlikely(bio_queue_enter(bio)))
-		return NULL;
-
 	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
-		goto queue_exit;
+		return NULL;
 
 	rq_qos_throttle(q, bio);
 
@@ -2878,35 +2875,24 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
-queue_exit:
-	blk_queue_exit(q);
 	return NULL;
 }
 
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
-		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+		struct bio *bio)
 {
-	struct request *rq;
-	enum hctx_type type, hctx_type;
+	struct request_queue *q = rq->q;
+	enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+	enum hctx_type hctx_type = rq->mq_hctx->type;
 
-	if (!plug)
-		return NULL;
-	rq = rq_list_peek(&plug->cached_rq);
-	if (!rq || rq->q != q)
-		return NULL;
+	WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
 
-	if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
-		*bio = NULL;
-		return NULL;
-	}
-
-	type = blk_mq_get_hctx_type((*bio)->bi_opf);
-	hctx_type = rq->mq_hctx->type;
 	if (type != hctx_type &&
 	    !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
-		return NULL;
-	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
-		return NULL;
+		return false;
+	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+		return false;
 
 	/*
 	 * If any qos ->throttle() end up blocking, we will have flushed the
@@ -2914,12 +2900,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 	 * before we throttle.
 	 */
 	plug->cached_rq = rq_list_next(rq);
-	rq_qos_throttle(q, *bio);
+	rq_qos_throttle(q, bio);
 
 	blk_mq_rq_time_init(rq, 0);
-	rq->cmd_flags = (*bio)->bi_opf;
+	rq->cmd_flags = bio->bi_opf;
 	INIT_LIST_HEAD(&rq->queuelist);
-	return rq;
+	return true;
 }
 
 static void bio_set_ioprio(struct bio *bio)
@@ -2949,7 +2935,7 @@ void blk_mq_submit_bio(struct bio *bio)
 	struct blk_plug *plug = blk_mq_plug(bio);
 	const int is_sync = op_is_sync(bio->bi_opf);
 	struct blk_mq_hw_ctx *hctx;
-	struct request *rq;
+	struct request *rq = NULL;
 	unsigned int nr_segs = 1;
 	blk_status_t ret;
 
@@ -2960,20 +2946,39 @@ void blk_mq_submit_bio(struct bio *bio)
 			return;
 	}
 
-	if (!bio_integrity_prep(bio))
-		return;
-
 	bio_set_ioprio(bio);
 
-	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
-	if (!rq) {
-		if (!bio)
+	if (plug) {
+		rq = rq_list_peek(&plug->cached_rq);
+		if (rq && rq->q != q)
+			rq = NULL;
+	}
+	if (rq) {
+		/* rq already holds a q_usage_counter reference */
+		if (!bio_integrity_prep(bio))
 			return;
-		rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
-		if (unlikely(!rq))
+		if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
+			return;
+
+		if (blk_mq_can_use_cached_rq(rq, plug, bio))
+			goto done;
+
+		percpu_ref_get(&q->q_usage_counter);
+	} else {
+		if (unlikely(bio_queue_enter(bio)))
+			return;
+
+		if (!bio_integrity_prep(bio))
 			return;
 	}
 
+	rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+	if (unlikely(!rq)) {
+		blk_queue_exit(q);
+		return;
+	}
+
+done:
 	trace_block_getrq(bio);
 
 	rq_qos_track(q, rq, bio);
Ming Lei Nov. 9, 2023, 8:11 a.m. UTC | #2
On Wed, Nov 08, 2023 at 11:30:10PM -0800, Christoph Hellwig wrote:
> > +++ b/block/blk-mq.c
> > @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
> >  	};
> >  	struct request *rq;
> >  
> > -	if (unlikely(bio_queue_enter(bio)))
> > -		return NULL;
> > -
> >  	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
> > -		goto queue_exit;
> > +		return NULL;
> >  
> >  	rq_qos_throttle(q, bio);
> 
> For clarity splitting this out might be a bit more readable.

I'd rather not do too many things in single fix, which need backport,
but I am fine to do it in following cleanup.

> 
> > +static inline struct request *blk_mq_cached_req(const struct request_queue *q,
> > +		const struct blk_plug *plug)
> > +{
> > +	if (plug) {
> > +		struct request *rq = rq_list_peek(&plug->cached_rq);
> > +
> > +		if (rq && rq->q == q)
> > +			return rq;
> > +	}
> > +	return NULL;
> > +}
> 
> Not sure this helper adds much value over just open coding it.

I am fine with either way, but the function name actually
has document benefit.

> 
> > +/* return true if this bio needs to handle by allocating new request */
> > +static inline bool blk_mq_try_cached_rq(struct request *rq,
> >  		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
> 
> The polarity here is a bit weird, I'd expect a true return if the
> request can be used and a naming implying that.

Fine, my original local version actually returns false if cached req can
be used, and it is changed just for not checking
!blk_mq_try_cached_rq().

> 
> > +	rq = blk_mq_cached_req(q, plug);
> > +	if (rq) {
> > +		/* cached request held queue usage counter */
> > +		if (!bio_integrity_prep(bio))
> > +			return;
> > +
> > +		need_alloc = blk_mq_try_cached_rq(rq, plug, &bio, nr_segs);
> >  		if (!bio)
> >  			return;
> 
> If you take the blk_mq_attempt_bio_merge merge out of the helper,
> the calling convention becomes much saner.

IMO we shouldn't mix cleanup with fix and it is friendly to take
stable backport into account.

> 
> > +			/* cached request held queue usage counter */
> > +			percpu_ref_get(&q->q_usage_counter);
> 
> I don't think we can just do the percpu_ref_get here.  While getting
> the counter obviosuly can't fail, we still need to do the queue
> pm only check.

blk_pre_runtime_suspend() can't succeed if any active queue usage
counter is held, so it is fine to call percpu_ref_get() here.

> 
> Below is the untested version of how I'd do this that I hacked while
> reviewing it:
> 
> ---
> commit 1134ce39504c30a9804ed8147027e66bf7d3157e
> Author: Ming Lei <ming.lei@redhat.com>
> Date:   Wed Nov 8 16:05:04 2023 +0800
> 
>     blk-mq: make sure active queue usage is held for bio_integrity_prep()
>     
>     blk_integrity_unregister() can come if queue usage counter isn't held
>     for one bio with integrity prepared, so this request may be completed with
>     calling profile->complete_fn, then kernel panic.
>     
>     Another constraint is that bio_integrity_prep() needs to be called
>     before bio merge.
>     
>     Fix the issue by:
>     
>     - call bio_integrity_prep() with one queue usage counter grabbed reliably
>     
>     - call bio_integrity_prep() before bio merge
>     
>     Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()")
>     Reported-by: Yi Zhang <yi.zhang@redhat.com>
>     Signed-off-by: Ming Lei <ming.lei@redhat.com>
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index e2d11183f62e37..b758dc8ed10957 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
>  	};
>  	struct request *rq;
>  
> -	if (unlikely(bio_queue_enter(bio)))
> -		return NULL;
> -
>  	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
> -		goto queue_exit;
> +		return NULL;
>  
>  	rq_qos_throttle(q, bio);
>  
> @@ -2878,35 +2875,24 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
>  	rq_qos_cleanup(q, bio);
>  	if (bio->bi_opf & REQ_NOWAIT)
>  		bio_wouldblock_error(bio);
> -queue_exit:
> -	blk_queue_exit(q);
>  	return NULL;
>  }
>  
> -static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
> -		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
> +/* return true if this @rq can be used for @bio */
> +static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
> +		struct bio *bio)

Switching to "*bio" is in my todo cleanup list too, and I will make it
standalone patch.

>  {
> -	struct request *rq;
> -	enum hctx_type type, hctx_type;
> +	struct request_queue *q = rq->q;
> +	enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
> +	enum hctx_type hctx_type = rq->mq_hctx->type;
>  
> -	if (!plug)
> -		return NULL;
> -	rq = rq_list_peek(&plug->cached_rq);
> -	if (!rq || rq->q != q)
> -		return NULL;
> +	WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
>  
> -	if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
> -		*bio = NULL;
> -		return NULL;
> -	}
> -
> -	type = blk_mq_get_hctx_type((*bio)->bi_opf);
> -	hctx_type = rq->mq_hctx->type;
>  	if (type != hctx_type &&
>  	    !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
> -		return NULL;
> -	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
> -		return NULL;
> +		return false;
> +	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
> +		return false;
>  
>  	/*
>  	 * If any qos ->throttle() end up blocking, we will have flushed the
> @@ -2914,12 +2900,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
>  	 * before we throttle.
>  	 */
>  	plug->cached_rq = rq_list_next(rq);
> -	rq_qos_throttle(q, *bio);
> +	rq_qos_throttle(q, bio);
>  
>  	blk_mq_rq_time_init(rq, 0);
> -	rq->cmd_flags = (*bio)->bi_opf;
> +	rq->cmd_flags = bio->bi_opf;
>  	INIT_LIST_HEAD(&rq->queuelist);
> -	return rq;
> +	return true;
>  }
>  
>  static void bio_set_ioprio(struct bio *bio)
> @@ -2949,7 +2935,7 @@ void blk_mq_submit_bio(struct bio *bio)
>  	struct blk_plug *plug = blk_mq_plug(bio);
>  	const int is_sync = op_is_sync(bio->bi_opf);
>  	struct blk_mq_hw_ctx *hctx;
> -	struct request *rq;
> +	struct request *rq = NULL;
>  	unsigned int nr_segs = 1;
>  	blk_status_t ret;
>  
> @@ -2960,20 +2946,39 @@ void blk_mq_submit_bio(struct bio *bio)
>  			return;
>  	}
>  
> -	if (!bio_integrity_prep(bio))
> -		return;
> -
>  	bio_set_ioprio(bio);
>  
> -	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
> -	if (!rq) {
> -		if (!bio)
> +	if (plug) {
> +		rq = rq_list_peek(&plug->cached_rq);
> +		if (rq && rq->q != q)
> +			rq = NULL;
> +	}
> +	if (rq) {
> +		/* rq already holds a q_usage_counter reference */
> +		if (!bio_integrity_prep(bio))
>  			return;
> -		rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
> -		if (unlikely(!rq))
> +		if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
> +			return;
> +
> +		if (blk_mq_can_use_cached_rq(rq, plug, bio))
> +			goto done;
> +
> +		percpu_ref_get(&q->q_usage_counter);
> +	} else {
> +		if (unlikely(bio_queue_enter(bio)))
> +			return;
> +
> +		if (!bio_integrity_prep(bio))
>  			return;

blk_queue_exit() is needed if bio_integrity_prep() fails.

I will try to make one easy fix first, then follows cleanup.

Thanks, 
Ming
Christoph Hellwig Nov. 9, 2023, 2:34 p.m. UTC | #3
On Thu, Nov 09, 2023 at 04:11:52PM +0800, Ming Lei wrote:
> > >  	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
> > > -		goto queue_exit;
> > > +		return NULL;
> > >  
> > >  	rq_qos_throttle(q, bio);
> > 
> > For clarity splitting this out might be a bit more readable.
> 
> I'd rather not do too many things in single fix, which need backport,
> but I am fine to do it in following cleanup.

The resulting diff is smaller..
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e..80f36096f16f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2858,11 +2858,8 @@  static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	};
 	struct request *rq;
 
-	if (unlikely(bio_queue_enter(bio)))
-		return NULL;
-
 	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
-		goto queue_exit;
+		return NULL;
 
 	rq_qos_throttle(q, bio);
 
@@ -2878,35 +2875,43 @@  static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
-queue_exit:
-	blk_queue_exit(q);
 	return NULL;
 }
 
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
+/* cached request means we grab active queue usage counter */
+static inline struct request *blk_mq_cached_req(const struct request_queue *q,
+		const struct blk_plug *plug)
+{
+	if (plug) {
+		struct request *rq = rq_list_peek(&plug->cached_rq);
+
+		if (rq && rq->q == q)
+			return rq;
+	}
+	return NULL;
+}
+
+/* return true if this bio needs to handle by allocating new request */
+static inline bool blk_mq_try_cached_rq(struct request *rq,
 		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
 {
-	struct request *rq;
+	struct request_queue *q = rq->q;
 	enum hctx_type type, hctx_type;
 
-	if (!plug)
-		return NULL;
-	rq = rq_list_peek(&plug->cached_rq);
-	if (!rq || rq->q != q)
-		return NULL;
+	WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
 
 	if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
 		*bio = NULL;
-		return NULL;
+		return false;
 	}
 
 	type = blk_mq_get_hctx_type((*bio)->bi_opf);
 	hctx_type = rq->mq_hctx->type;
 	if (type != hctx_type &&
 	    !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
-		return NULL;
+		return true;
 	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
-		return NULL;
+		return true;
 
 	/*
 	 * If any qos ->throttle() end up blocking, we will have flushed the
@@ -2919,7 +2924,8 @@  static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 	blk_mq_rq_time_init(rq, 0);
 	rq->cmd_flags = (*bio)->bi_opf;
 	INIT_LIST_HEAD(&rq->queuelist);
-	return rq;
+
+	return false;
 }
 
 static void bio_set_ioprio(struct bio *bio)
@@ -2951,6 +2957,7 @@  void blk_mq_submit_bio(struct bio *bio)
 	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
 	unsigned int nr_segs = 1;
+	bool need_alloc = true;
 	blk_status_t ret;
 
 	bio = blk_queue_bounce(bio, q);
@@ -2960,18 +2967,36 @@  void blk_mq_submit_bio(struct bio *bio)
 			return;
 	}
 
-	if (!bio_integrity_prep(bio))
-		return;
-
 	bio_set_ioprio(bio);
 
-	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
-	if (!rq) {
+	rq = blk_mq_cached_req(q, plug);
+	if (rq) {
+		/* cached request held queue usage counter */
+		if (!bio_integrity_prep(bio))
+			return;
+
+		need_alloc = blk_mq_try_cached_rq(rq, plug, &bio, nr_segs);
 		if (!bio)
 			return;
+	}
+
+	if (need_alloc) {
+		if (!rq) {
+			if (unlikely(bio_queue_enter(bio)))
+				return;
+
+			if (!bio_integrity_prep(bio))
+				return;
+		} else {
+			/* cached request held queue usage counter */
+			percpu_ref_get(&q->q_usage_counter);
+		}
+
 		rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
-		if (unlikely(!rq))
+		if (unlikely(!rq)) {
+			blk_queue_exit(q);
 			return;
+		}
 	}
 
 	trace_block_getrq(bio);