Message ID | 20220209093751.2986716-1-yi.zhang@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | dm: make sure dm_table is binded before queue request | expand |
Friendly ping. On 2022/2/9 17:37, Zhang Yi wrote: > We found a NULL pointer dereference problem when using dm-mpath target. > The problem is if we submit IO between loading and binding the table, > we could neither get a valid dm_target nor a valid dm table when > submitting request in dm_mq_queue_rq(). BIO based dm target could > handle this case in dm_submit_bio(). This patch fix this by checking > the mapping table before submitting request. > > Signed-off-by: Zhang Yi <yi.zhang@huawei.com> > --- > drivers/md/dm-rq.c | 11 +++++++++-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c > index 579ab6183d4d..af2cf71519e9 100644 > --- a/drivers/md/dm-rq.c > +++ b/drivers/md/dm-rq.c > @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, > > if (unlikely(!ti)) { > int srcu_idx; > - struct dm_table *map = dm_get_live_table(md, &srcu_idx); > - > + struct dm_table *map; > + > + map = dm_get_live_table(md, &srcu_idx); > + if (!map) { > + DMERR_LIMIT("%s: mapping table unavailable, erroring io", > + dm_device_name(md)); > + dm_put_live_table(md, srcu_idx); > + return BLK_STS_IOERR; > + } > ti = dm_table_find_target(map, 0); > dm_put_live_table(md, srcu_idx); > } >
On Wed, Feb 09 2022 at 4:37P -0500, Zhang Yi <yi.zhang@huawei.com> wrote: > We found a NULL pointer dereference problem when using dm-mpath target. > The problem is if we submit IO between loading and binding the table, > we could neither get a valid dm_target nor a valid dm table when > submitting request in dm_mq_queue_rq(). BIO based dm target could > handle this case in dm_submit_bio(). This patch fix this by checking > the mapping table before submitting request. > > Signed-off-by: Zhang Yi <yi.zhang@huawei.com> > --- > drivers/md/dm-rq.c | 11 +++++++++-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c > index 579ab6183d4d..af2cf71519e9 100644 > --- a/drivers/md/dm-rq.c > +++ b/drivers/md/dm-rq.c > @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, > > if (unlikely(!ti)) { > int srcu_idx; > - struct dm_table *map = dm_get_live_table(md, &srcu_idx); > - > + struct dm_table *map; > + > + map = dm_get_live_table(md, &srcu_idx); > + if (!map) { > + DMERR_LIMIT("%s: mapping table unavailable, erroring io", > + dm_device_name(md)); > + dm_put_live_table(md, srcu_idx); > + return BLK_STS_IOERR; > + } > ti = dm_table_find_target(map, 0); > dm_put_live_table(md, srcu_idx); > } > -- > 2.31.1 > I think both dm_submit_bio() and now dm_mq_queue_rq() should _not_ error the IO. This is such a narrow race during device setup that it best to requeue the IO. I'll queue this for 5.18: diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 6948d5db9092..3dd040a56318 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -491,8 +491,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!ti)) { int srcu_idx; - struct dm_table *map = dm_get_live_table(md, &srcu_idx); + struct dm_table *map; + map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + dm_put_live_table(md, srcu_idx); + return BLK_STS_RESOURCE; + } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 082366d0ad49..c70be6e5ed55 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1533,15 +1533,10 @@ static void dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); - if (unlikely(!map)) { - DMERR_LIMIT("%s: mapping table unavailable, erroring io", - dm_device_name(md)); - bio_io_error(bio); - goto out; - } - /* If suspended, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { + /* If suspended, or map not yet available, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || + unlikely(!map)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD)
On 2022/2/23 2:27, Mike Snitzer wrote: > On Wed, Feb 09 2022 at 4:37P -0500, > Zhang Yi <yi.zhang@huawei.com> wrote: > >> We found a NULL pointer dereference problem when using dm-mpath target. >> The problem is if we submit IO between loading and binding the table, >> we could neither get a valid dm_target nor a valid dm table when >> submitting request in dm_mq_queue_rq(). BIO based dm target could >> handle this case in dm_submit_bio(). This patch fix this by checking >> the mapping table before submitting request. >> >> Signed-off-by: Zhang Yi <yi.zhang@huawei.com> >> --- >> drivers/md/dm-rq.c | 11 +++++++++-- >> 1 file changed, 9 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c >> index 579ab6183d4d..af2cf71519e9 100644 >> --- a/drivers/md/dm-rq.c >> +++ b/drivers/md/dm-rq.c >> @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, >> >> if (unlikely(!ti)) { >> int srcu_idx; >> - struct dm_table *map = dm_get_live_table(md, &srcu_idx); >> - >> + struct dm_table *map; >> + >> + map = dm_get_live_table(md, &srcu_idx); >> + if (!map) { >> + DMERR_LIMIT("%s: mapping table unavailable, erroring io", >> + dm_device_name(md)); >> + dm_put_live_table(md, srcu_idx); >> + return BLK_STS_IOERR; >> + } >> ti = dm_table_find_target(map, 0); >> dm_put_live_table(md, srcu_idx); >> } >> -- >> 2.31.1 >> > > I think both dm_submit_bio() and now dm_mq_queue_rq() should _not_ > error the IO. This is such a narrow race during device setup that it > best to requeue the IO. > Yes,make sense, thanks for the fix. Thanks, Yi. > I'll queue this for 5.18: > > diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c > index 6948d5db9092..3dd040a56318 100644 > --- a/drivers/md/dm-rq.c > +++ b/drivers/md/dm-rq.c > @@ -491,8 +491,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, > > if (unlikely(!ti)) { > int srcu_idx; > - struct dm_table *map = dm_get_live_table(md, &srcu_idx); > + struct dm_table *map; > > + map = dm_get_live_table(md, &srcu_idx); > + if (unlikely(!map)) { > + dm_put_live_table(md, srcu_idx); > + return BLK_STS_RESOURCE; > + } > ti = dm_table_find_target(map, 0); > dm_put_live_table(md, srcu_idx); > } > diff --git a/drivers/md/dm.c b/drivers/md/dm.c > index 082366d0ad49..c70be6e5ed55 100644 > --- a/drivers/md/dm.c > +++ b/drivers/md/dm.c > @@ -1533,15 +1533,10 @@ static void dm_submit_bio(struct bio *bio) > struct dm_table *map; > > map = dm_get_live_table(md, &srcu_idx); > - if (unlikely(!map)) { > - DMERR_LIMIT("%s: mapping table unavailable, erroring io", > - dm_device_name(md)); > - bio_io_error(bio); > - goto out; > - } > > - /* If suspended, queue this IO for later */ > - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { > + /* If suspended, or map not yet available, queue this IO for later */ > + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || > + unlikely(!map)) { > if (bio->bi_opf & REQ_NOWAIT) > bio_wouldblock_error(bio); > else if (bio->bi_opf & REQ_RAHEAD) > > . >
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 579ab6183d4d..af2cf71519e9 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!ti)) { int srcu_idx; - struct dm_table *map = dm_get_live_table(md, &srcu_idx); - + struct dm_table *map; + + map = dm_get_live_table(md, &srcu_idx); + if (!map) { + DMERR_LIMIT("%s: mapping table unavailable, erroring io", + dm_device_name(md)); + dm_put_live_table(md, srcu_idx); + return BLK_STS_IOERR; + } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); }
We found a NULL pointer dereference problem when using dm-mpath target. The problem is if we submit IO between loading and binding the table, we could neither get a valid dm_target nor a valid dm table when submitting request in dm_mq_queue_rq(). BIO based dm target could handle this case in dm_submit_bio(). This patch fix this by checking the mapping table before submitting request. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> --- drivers/md/dm-rq.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)