Message ID | 20231108080504.2144952-1-ming.lei@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blk-mq: make sure active queue usage is held for bio_integrity_prep() | expand |
> +++ b/block/blk-mq.c > @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, > }; > struct request *rq; > > - if (unlikely(bio_queue_enter(bio))) > - return NULL; > - > if (blk_mq_attempt_bio_merge(q, bio, nsegs)) > - goto queue_exit; > + return NULL; > > rq_qos_throttle(q, bio); For clarity splitting this out might be a bit more readable. > +static inline struct request *blk_mq_cached_req(const struct request_queue *q, > + const struct blk_plug *plug) > +{ > + if (plug) { > + struct request *rq = rq_list_peek(&plug->cached_rq); > + > + if (rq && rq->q == q) > + return rq; > + } > + return NULL; > +} Not sure this helper adds much value over just open coding it. > +/* return true if this bio needs to handle by allocating new request */ > +static inline bool blk_mq_try_cached_rq(struct request *rq, > struct blk_plug *plug, struct bio **bio, unsigned int nsegs) The polarity here is a bit weird, I'd expect a true return if the request can be used and a naming implying that. > + rq = blk_mq_cached_req(q, plug); > + if (rq) { > + /* cached request held queue usage counter */ > + if (!bio_integrity_prep(bio)) > + return; > + > + need_alloc = blk_mq_try_cached_rq(rq, plug, &bio, nr_segs); > if (!bio) > return; If you take the blk_mq_attempt_bio_merge merge out of the helper, the calling convention becomes much saner. > + /* cached request held queue usage counter */ > + percpu_ref_get(&q->q_usage_counter); I don't think we can just do the percpu_ref_get here. While getting the counter obviosuly can't fail, we still need to do the queue pm only check. Below is the untested version of how I'd do this that I hacked while reviewing it: --- commit 1134ce39504c30a9804ed8147027e66bf7d3157e Author: Ming Lei <ming.lei@redhat.com> Date: Wed Nov 8 16:05:04 2023 +0800 blk-mq: make sure active queue usage is held for bio_integrity_prep() blk_integrity_unregister() can come if queue usage counter isn't held for one bio with integrity prepared, so this request may be completed with calling profile->complete_fn, then kernel panic. Another constraint is that bio_integrity_prep() needs to be called before bio merge. Fix the issue by: - call bio_integrity_prep() with one queue usage counter grabbed reliably - call bio_integrity_prep() before bio merge Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()") Reported-by: Yi Zhang <yi.zhang@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> diff --git a/block/blk-mq.c b/block/blk-mq.c index e2d11183f62e37..b758dc8ed10957 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, }; struct request *rq; - if (unlikely(bio_queue_enter(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - goto queue_exit; + return NULL; rq_qos_throttle(q, bio); @@ -2878,35 +2875,24 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -queue_exit: - blk_queue_exit(q); return NULL; } -static inline struct request *blk_mq_get_cached_request(struct request_queue *q, - struct blk_plug *plug, struct bio **bio, unsigned int nsegs) +/* return true if this @rq can be used for @bio */ +static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug, + struct bio *bio) { - struct request *rq; - enum hctx_type type, hctx_type; + struct request_queue *q = rq->q; + enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); + enum hctx_type hctx_type = rq->mq_hctx->type; - if (!plug) - return NULL; - rq = rq_list_peek(&plug->cached_rq); - if (!rq || rq->q != q) - return NULL; + WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); - if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { - *bio = NULL; - return NULL; - } - - type = blk_mq_get_hctx_type((*bio)->bi_opf); - hctx_type = rq->mq_hctx->type; if (type != hctx_type && !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) - return NULL; - if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) - return NULL; + return false; + if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) + return false; /* * If any qos ->throttle() end up blocking, we will have flushed the @@ -2914,12 +2900,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, * before we throttle. */ plug->cached_rq = rq_list_next(rq); - rq_qos_throttle(q, *bio); + rq_qos_throttle(q, bio); blk_mq_rq_time_init(rq, 0); - rq->cmd_flags = (*bio)->bi_opf; + rq->cmd_flags = bio->bi_opf; INIT_LIST_HEAD(&rq->queuelist); - return rq; + return true; } static void bio_set_ioprio(struct bio *bio) @@ -2949,7 +2935,7 @@ void blk_mq_submit_bio(struct bio *bio) struct blk_plug *plug = blk_mq_plug(bio); const int is_sync = op_is_sync(bio->bi_opf); struct blk_mq_hw_ctx *hctx; - struct request *rq; + struct request *rq = NULL; unsigned int nr_segs = 1; blk_status_t ret; @@ -2960,20 +2946,39 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (!bio_integrity_prep(bio)) - return; - bio_set_ioprio(bio); - rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); - if (!rq) { - if (!bio) + if (plug) { + rq = rq_list_peek(&plug->cached_rq); + if (rq && rq->q != q) + rq = NULL; + } + if (rq) { + /* rq already holds a q_usage_counter reference */ + if (!bio_integrity_prep(bio)) return; - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) + return; + + if (blk_mq_can_use_cached_rq(rq, plug, bio)) + goto done; + + percpu_ref_get(&q->q_usage_counter); + } else { + if (unlikely(bio_queue_enter(bio))) + return; + + if (!bio_integrity_prep(bio)) return; } + rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + if (unlikely(!rq)) { + blk_queue_exit(q); + return; + } + +done: trace_block_getrq(bio); rq_qos_track(q, rq, bio);
On Wed, Nov 08, 2023 at 11:30:10PM -0800, Christoph Hellwig wrote: > > +++ b/block/blk-mq.c > > @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, > > }; > > struct request *rq; > > > > - if (unlikely(bio_queue_enter(bio))) > > - return NULL; > > - > > if (blk_mq_attempt_bio_merge(q, bio, nsegs)) > > - goto queue_exit; > > + return NULL; > > > > rq_qos_throttle(q, bio); > > For clarity splitting this out might be a bit more readable. I'd rather not do too many things in single fix, which need backport, but I am fine to do it in following cleanup. > > > +static inline struct request *blk_mq_cached_req(const struct request_queue *q, > > + const struct blk_plug *plug) > > +{ > > + if (plug) { > > + struct request *rq = rq_list_peek(&plug->cached_rq); > > + > > + if (rq && rq->q == q) > > + return rq; > > + } > > + return NULL; > > +} > > Not sure this helper adds much value over just open coding it. I am fine with either way, but the function name actually has document benefit. > > > +/* return true if this bio needs to handle by allocating new request */ > > +static inline bool blk_mq_try_cached_rq(struct request *rq, > > struct blk_plug *plug, struct bio **bio, unsigned int nsegs) > > The polarity here is a bit weird, I'd expect a true return if the > request can be used and a naming implying that. Fine, my original local version actually returns false if cached req can be used, and it is changed just for not checking !blk_mq_try_cached_rq(). > > > + rq = blk_mq_cached_req(q, plug); > > + if (rq) { > > + /* cached request held queue usage counter */ > > + if (!bio_integrity_prep(bio)) > > + return; > > + > > + need_alloc = blk_mq_try_cached_rq(rq, plug, &bio, nr_segs); > > if (!bio) > > return; > > If you take the blk_mq_attempt_bio_merge merge out of the helper, > the calling convention becomes much saner. IMO we shouldn't mix cleanup with fix and it is friendly to take stable backport into account. > > > + /* cached request held queue usage counter */ > > + percpu_ref_get(&q->q_usage_counter); > > I don't think we can just do the percpu_ref_get here. While getting > the counter obviosuly can't fail, we still need to do the queue > pm only check. blk_pre_runtime_suspend() can't succeed if any active queue usage counter is held, so it is fine to call percpu_ref_get() here. > > Below is the untested version of how I'd do this that I hacked while > reviewing it: > > --- > commit 1134ce39504c30a9804ed8147027e66bf7d3157e > Author: Ming Lei <ming.lei@redhat.com> > Date: Wed Nov 8 16:05:04 2023 +0800 > > blk-mq: make sure active queue usage is held for bio_integrity_prep() > > blk_integrity_unregister() can come if queue usage counter isn't held > for one bio with integrity prepared, so this request may be completed with > calling profile->complete_fn, then kernel panic. > > Another constraint is that bio_integrity_prep() needs to be called > before bio merge. > > Fix the issue by: > > - call bio_integrity_prep() with one queue usage counter grabbed reliably > > - call bio_integrity_prep() before bio merge > > Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()") > Reported-by: Yi Zhang <yi.zhang@redhat.com> > Signed-off-by: Ming Lei <ming.lei@redhat.com> > > diff --git a/block/blk-mq.c b/block/blk-mq.c > index e2d11183f62e37..b758dc8ed10957 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, > }; > struct request *rq; > > - if (unlikely(bio_queue_enter(bio))) > - return NULL; > - > if (blk_mq_attempt_bio_merge(q, bio, nsegs)) > - goto queue_exit; > + return NULL; > > rq_qos_throttle(q, bio); > > @@ -2878,35 +2875,24 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, > rq_qos_cleanup(q, bio); > if (bio->bi_opf & REQ_NOWAIT) > bio_wouldblock_error(bio); > -queue_exit: > - blk_queue_exit(q); > return NULL; > } > > -static inline struct request *blk_mq_get_cached_request(struct request_queue *q, > - struct blk_plug *plug, struct bio **bio, unsigned int nsegs) > +/* return true if this @rq can be used for @bio */ > +static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug, > + struct bio *bio) Switching to "*bio" is in my todo cleanup list too, and I will make it standalone patch. > { > - struct request *rq; > - enum hctx_type type, hctx_type; > + struct request_queue *q = rq->q; > + enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); > + enum hctx_type hctx_type = rq->mq_hctx->type; > > - if (!plug) > - return NULL; > - rq = rq_list_peek(&plug->cached_rq); > - if (!rq || rq->q != q) > - return NULL; > + WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); > > - if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { > - *bio = NULL; > - return NULL; > - } > - > - type = blk_mq_get_hctx_type((*bio)->bi_opf); > - hctx_type = rq->mq_hctx->type; > if (type != hctx_type && > !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) > - return NULL; > - if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) > - return NULL; > + return false; > + if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) > + return false; > > /* > * If any qos ->throttle() end up blocking, we will have flushed the > @@ -2914,12 +2900,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, > * before we throttle. > */ > plug->cached_rq = rq_list_next(rq); > - rq_qos_throttle(q, *bio); > + rq_qos_throttle(q, bio); > > blk_mq_rq_time_init(rq, 0); > - rq->cmd_flags = (*bio)->bi_opf; > + rq->cmd_flags = bio->bi_opf; > INIT_LIST_HEAD(&rq->queuelist); > - return rq; > + return true; > } > > static void bio_set_ioprio(struct bio *bio) > @@ -2949,7 +2935,7 @@ void blk_mq_submit_bio(struct bio *bio) > struct blk_plug *plug = blk_mq_plug(bio); > const int is_sync = op_is_sync(bio->bi_opf); > struct blk_mq_hw_ctx *hctx; > - struct request *rq; > + struct request *rq = NULL; > unsigned int nr_segs = 1; > blk_status_t ret; > > @@ -2960,20 +2946,39 @@ void blk_mq_submit_bio(struct bio *bio) > return; > } > > - if (!bio_integrity_prep(bio)) > - return; > - > bio_set_ioprio(bio); > > - rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); > - if (!rq) { > - if (!bio) > + if (plug) { > + rq = rq_list_peek(&plug->cached_rq); > + if (rq && rq->q != q) > + rq = NULL; > + } > + if (rq) { > + /* rq already holds a q_usage_counter reference */ > + if (!bio_integrity_prep(bio)) > return; > - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); > - if (unlikely(!rq)) > + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) > + return; > + > + if (blk_mq_can_use_cached_rq(rq, plug, bio)) > + goto done; > + > + percpu_ref_get(&q->q_usage_counter); > + } else { > + if (unlikely(bio_queue_enter(bio))) > + return; > + > + if (!bio_integrity_prep(bio)) > return; blk_queue_exit() is needed if bio_integrity_prep() fails. I will try to make one easy fix first, then follows cleanup. Thanks, Ming
On Thu, Nov 09, 2023 at 04:11:52PM +0800, Ming Lei wrote: > > > if (blk_mq_attempt_bio_merge(q, bio, nsegs)) > > > - goto queue_exit; > > > + return NULL; > > > > > > rq_qos_throttle(q, bio); > > > > For clarity splitting this out might be a bit more readable. > > I'd rather not do too many things in single fix, which need backport, > but I am fine to do it in following cleanup. The resulting diff is smaller..
diff --git a/block/blk-mq.c b/block/blk-mq.c index e2d11183f62e..80f36096f16f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, }; struct request *rq; - if (unlikely(bio_queue_enter(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - goto queue_exit; + return NULL; rq_qos_throttle(q, bio); @@ -2878,35 +2875,43 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -queue_exit: - blk_queue_exit(q); return NULL; } -static inline struct request *blk_mq_get_cached_request(struct request_queue *q, +/* cached request means we grab active queue usage counter */ +static inline struct request *blk_mq_cached_req(const struct request_queue *q, + const struct blk_plug *plug) +{ + if (plug) { + struct request *rq = rq_list_peek(&plug->cached_rq); + + if (rq && rq->q == q) + return rq; + } + return NULL; +} + +/* return true if this bio needs to handle by allocating new request */ +static inline bool blk_mq_try_cached_rq(struct request *rq, struct blk_plug *plug, struct bio **bio, unsigned int nsegs) { - struct request *rq; + struct request_queue *q = rq->q; enum hctx_type type, hctx_type; - if (!plug) - return NULL; - rq = rq_list_peek(&plug->cached_rq); - if (!rq || rq->q != q) - return NULL; + WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { *bio = NULL; - return NULL; + return false; } type = blk_mq_get_hctx_type((*bio)->bi_opf); hctx_type = rq->mq_hctx->type; if (type != hctx_type && !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) - return NULL; + return true; if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) - return NULL; + return true; /* * If any qos ->throttle() end up blocking, we will have flushed the @@ -2919,7 +2924,8 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, blk_mq_rq_time_init(rq, 0); rq->cmd_flags = (*bio)->bi_opf; INIT_LIST_HEAD(&rq->queuelist); - return rq; + + return false; } static void bio_set_ioprio(struct bio *bio) @@ -2951,6 +2957,7 @@ void blk_mq_submit_bio(struct bio *bio) struct blk_mq_hw_ctx *hctx; struct request *rq; unsigned int nr_segs = 1; + bool need_alloc = true; blk_status_t ret; bio = blk_queue_bounce(bio, q); @@ -2960,18 +2967,36 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (!bio_integrity_prep(bio)) - return; - bio_set_ioprio(bio); - rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); - if (!rq) { + rq = blk_mq_cached_req(q, plug); + if (rq) { + /* cached request held queue usage counter */ + if (!bio_integrity_prep(bio)) + return; + + need_alloc = blk_mq_try_cached_rq(rq, plug, &bio, nr_segs); if (!bio) return; + } + + if (need_alloc) { + if (!rq) { + if (unlikely(bio_queue_enter(bio))) + return; + + if (!bio_integrity_prep(bio)) + return; + } else { + /* cached request held queue usage counter */ + percpu_ref_get(&q->q_usage_counter); + } + rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) + if (unlikely(!rq)) { + blk_queue_exit(q); return; + } } trace_block_getrq(bio);
blk_integrity_unregister() can come if queue usage counter isn't held for one bio with integrity prepared, so this request may be completed with calling profile->complete_fn, then kernel panic. Another constraint is that bio_integrity_prep() needs to be called before bio merge. Fix the issue by: - call bio_integrity_prep() with one queue usage counter grabbed reliably - call bio_integrity_prep() before bio merge Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()") Reported-by: Yi Zhang <yi.zhang@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> --- block/blk-mq.c | 71 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 23 deletions(-)