Message ID | c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kyber: fix out of bounds access when preempted | expand |
On Mon, May 10, 2021 at 05:05:35PM -0700, Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > __blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and > passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx > for the current CPU again and uses that to get the corresponding Kyber > context in the passed hctx. However, the thread may be preempted between > the two calls to blk_mq_get_ctx(), and the ctx returned the second time > may no longer correspond to the passed hctx. This "works" accidentally > most of the time, but it can cause us to read garbage if the second ctx > came from an hctx with more ctx's than the first one (i.e., if > ctx->index_hw[hctx->type] > hctx->nr_ctx). > > This manifested as this UBSAN array index out of bounds error reported > by Jakub: > > UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9 > index 13106 is out of range for type 'long unsigned int [128]' > Call Trace: > dump_stack+0xa4/0xe5 > ubsan_epilogue+0x5/0x40 > __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34 > queued_spin_lock_slowpath+0x476/0x480 > do_raw_spin_lock+0x1c2/0x1d0 > kyber_bio_merge+0x112/0x180 > blk_mq_submit_bio+0x1f5/0x1100 > submit_bio_noacct+0x7b0/0x870 > submit_bio+0xc2/0x3a0 > btrfs_map_bio+0x4f0/0x9d0 > btrfs_submit_data_bio+0x24e/0x310 > submit_one_bio+0x7f/0xb0 > submit_extent_page+0xc4/0x440 > __extent_writepage_io+0x2b8/0x5e0 > __extent_writepage+0x28d/0x6e0 > extent_write_cache_pages+0x4d7/0x7a0 > extent_writepages+0xa2/0x110 > do_writepages+0x8f/0x180 > __writeback_single_inode+0x99/0x7f0 > writeback_sb_inodes+0x34e/0x790 > __writeback_inodes_wb+0x9e/0x120 > wb_writeback+0x4d2/0x660 > wb_workfn+0x64d/0xa10 > process_one_work+0x53a/0xa80 > worker_thread+0x69/0x5b0 > kthread+0x20b/0x240 > ret_from_fork+0x1f/0x30 > > Only Kyber uses the hctx, so fix it by passing the request_queue to > ->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can > map the queues itself to avoid the mismatch. I should elaborate that once we get the Kyber context, it doesn't matter if we get preempted after that, since we lock it for merging.
On 5/10/21 6:05 PM, Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > __blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and > passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx > for the current CPU again and uses that to get the corresponding Kyber > context in the passed hctx. However, the thread may be preempted between > the two calls to blk_mq_get_ctx(), and the ctx returned the second time > may no longer correspond to the passed hctx. This "works" accidentally > most of the time, but it can cause us to read garbage if the second ctx > came from an hctx with more ctx's than the first one (i.e., if > ctx->index_hw[hctx->type] > hctx->nr_ctx). > > This manifested as this UBSAN array index out of bounds error reported > by Jakub: > > UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9 > index 13106 is out of range for type 'long unsigned int [128]' > Call Trace: > dump_stack+0xa4/0xe5 > ubsan_epilogue+0x5/0x40 > __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34 > queued_spin_lock_slowpath+0x476/0x480 > do_raw_spin_lock+0x1c2/0x1d0 > kyber_bio_merge+0x112/0x180 > blk_mq_submit_bio+0x1f5/0x1100 > submit_bio_noacct+0x7b0/0x870 > submit_bio+0xc2/0x3a0 > btrfs_map_bio+0x4f0/0x9d0 > btrfs_submit_data_bio+0x24e/0x310 > submit_one_bio+0x7f/0xb0 > submit_extent_page+0xc4/0x440 > __extent_writepage_io+0x2b8/0x5e0 > __extent_writepage+0x28d/0x6e0 > extent_write_cache_pages+0x4d7/0x7a0 > extent_writepages+0xa2/0x110 > do_writepages+0x8f/0x180 > __writeback_single_inode+0x99/0x7f0 > writeback_sb_inodes+0x34e/0x790 > __writeback_inodes_wb+0x9e/0x120 > wb_writeback+0x4d2/0x660 > wb_workfn+0x64d/0xa10 > process_one_work+0x53a/0xa80 > worker_thread+0x69/0x5b0 > kthread+0x20b/0x240 > ret_from_fork+0x1f/0x30 > > Only Kyber uses the hctx, so fix it by passing the request_queue to > ->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can > map the queues itself to avoid the mismatch. Applied, thanks Omar.
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0270cd7ca165..59b2499d3f8b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2263,10 +2263,9 @@ static void bfq_remove_request(struct request_queue *q, } -static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; struct request *free = NULL; /* diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 42a365b1b9c0..996a4b2f73aa 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct elevator_queue *e = q->elevator; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); + struct blk_mq_ctx *ctx; + struct blk_mq_hw_ctx *hctx; bool ret = false; enum hctx_type type; if (e && e->type->ops.bio_merge) - return e->type->ops.bio_merge(hctx, bio, nr_segs); + return e->type->ops.bio_merge(q, bio, nr_segs); + ctx = blk_mq_get_ctx(q); + hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || list_empty_careful(&ctx->rq_lists[type])) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 8969e122f081..81e3279ecd57 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } -static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool kyber_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); struct kyber_hctx_data *khd = hctx->sched_data; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); struct list_head *rq_list = &kcq->rq_list[sched_domain]; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 04aded71ead2..8eea2cbf2bf4 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, return ELEVATOR_NO_MERGE; } -static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool dd_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct request *free = NULL; bool ret; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1fe8e105b83b..dcb2f9022c1d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); + bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *);