diff mbox series

dm: make sure dm_table is binded before queue request

Message ID 20220209093751.2986716-1-yi.zhang@huawei.com (mailing list archive)
State New, archived
Headers show
Series dm: make sure dm_table is binded before queue request | expand

Commit Message

Zhang Yi Feb. 9, 2022, 9:37 a.m. UTC
We found a NULL pointer dereference problem when using dm-mpath target.
The problem is if we submit IO between loading and binding the table,
we could neither get a valid dm_target nor a valid dm table when
submitting request in dm_mq_queue_rq(). BIO based dm target could
handle this case in dm_submit_bio(). This patch fix this by checking
the mapping table before submitting request.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 drivers/md/dm-rq.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Comments

Zhang Yi Feb. 14, 2022, 8:29 a.m. UTC | #1
Friendly ping.

On 2022/2/9 17:37, Zhang Yi wrote:
> We found a NULL pointer dereference problem when using dm-mpath target.
> The problem is if we submit IO between loading and binding the table,
> we could neither get a valid dm_target nor a valid dm table when
> submitting request in dm_mq_queue_rq(). BIO based dm target could
> handle this case in dm_submit_bio(). This patch fix this by checking
> the mapping table before submitting request.
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
>  drivers/md/dm-rq.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index 579ab6183d4d..af2cf71519e9 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	if (unlikely(!ti)) {
>  		int srcu_idx;
> -		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
> -
> +		struct dm_table *map;
> +
> +		map = dm_get_live_table(md, &srcu_idx);
> +		if (!map) {
> +			DMERR_LIMIT("%s: mapping table unavailable, erroring io",
> +				    dm_device_name(md));
> +			dm_put_live_table(md, srcu_idx);
> +			return BLK_STS_IOERR;
> +		}
>  		ti = dm_table_find_target(map, 0);
>  		dm_put_live_table(md, srcu_idx);
>  	}
>
Mike Snitzer Feb. 22, 2022, 6:27 p.m. UTC | #2
On Wed, Feb 09 2022 at  4:37P -0500,
Zhang Yi <yi.zhang@huawei.com> wrote:

> We found a NULL pointer dereference problem when using dm-mpath target.
> The problem is if we submit IO between loading and binding the table,
> we could neither get a valid dm_target nor a valid dm table when
> submitting request in dm_mq_queue_rq(). BIO based dm target could
> handle this case in dm_submit_bio(). This patch fix this by checking
> the mapping table before submitting request.
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
>  drivers/md/dm-rq.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index 579ab6183d4d..af2cf71519e9 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	if (unlikely(!ti)) {
>  		int srcu_idx;
> -		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
> -
> +		struct dm_table *map;
> +
> +		map = dm_get_live_table(md, &srcu_idx);
> +		if (!map) {
> +			DMERR_LIMIT("%s: mapping table unavailable, erroring io",
> +				    dm_device_name(md));
> +			dm_put_live_table(md, srcu_idx);
> +			return BLK_STS_IOERR;
> +		}
>  		ti = dm_table_find_target(map, 0);
>  		dm_put_live_table(md, srcu_idx);
>  	}
> -- 
> 2.31.1
> 

I think both dm_submit_bio() and now dm_mq_queue_rq() should _not_
error the IO.  This is such a narrow race during device setup that it
best to requeue the IO.

I'll queue this for 5.18:

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 6948d5db9092..3dd040a56318 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -491,8 +491,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (unlikely(!ti)) {
 		int srcu_idx;
-		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+		struct dm_table *map;
 
+		map = dm_get_live_table(md, &srcu_idx);
+		if (unlikely(!map)) {
+			dm_put_live_table(md, srcu_idx);
+			return BLK_STS_RESOURCE;
+		}
 		ti = dm_table_find_target(map, 0);
 		dm_put_live_table(md, srcu_idx);
 	}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 082366d0ad49..c70be6e5ed55 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1533,15 +1533,10 @@ static void dm_submit_bio(struct bio *bio)
 	struct dm_table *map;
 
 	map = dm_get_live_table(md, &srcu_idx);
-	if (unlikely(!map)) {
-		DMERR_LIMIT("%s: mapping table unavailable, erroring io",
-			    dm_device_name(md));
-		bio_io_error(bio);
-		goto out;
-	}
 
-	/* If suspended, queue this IO for later */
-	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
+	/* If suspended, or map not yet available, queue this IO for later */
+	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
+	    unlikely(!map)) {
 		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
 		else if (bio->bi_opf & REQ_RAHEAD)
Zhang Yi Feb. 23, 2022, 12:34 p.m. UTC | #3
On 2022/2/23 2:27, Mike Snitzer wrote:
> On Wed, Feb 09 2022 at  4:37P -0500,
> Zhang Yi <yi.zhang@huawei.com> wrote:
> 
>> We found a NULL pointer dereference problem when using dm-mpath target.
>> The problem is if we submit IO between loading and binding the table,
>> we could neither get a valid dm_target nor a valid dm table when
>> submitting request in dm_mq_queue_rq(). BIO based dm target could
>> handle this case in dm_submit_bio(). This patch fix this by checking
>> the mapping table before submitting request.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>> ---
>>  drivers/md/dm-rq.c | 11 +++++++++--
>>  1 file changed, 9 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
>> index 579ab6183d4d..af2cf71519e9 100644
>> --- a/drivers/md/dm-rq.c
>> +++ b/drivers/md/dm-rq.c
>> @@ -499,8 +499,15 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>>  
>>  	if (unlikely(!ti)) {
>>  		int srcu_idx;
>> -		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
>> -
>> +		struct dm_table *map;
>> +
>> +		map = dm_get_live_table(md, &srcu_idx);
>> +		if (!map) {
>> +			DMERR_LIMIT("%s: mapping table unavailable, erroring io",
>> +				    dm_device_name(md));
>> +			dm_put_live_table(md, srcu_idx);
>> +			return BLK_STS_IOERR;
>> +		}
>>  		ti = dm_table_find_target(map, 0);
>>  		dm_put_live_table(md, srcu_idx);
>>  	}
>> -- 
>> 2.31.1
>>
> 
> I think both dm_submit_bio() and now dm_mq_queue_rq() should _not_
> error the IO.  This is such a narrow race during device setup that it
> best to requeue the IO.
> 

Yes,make sense, thanks for the fix.

Thanks,
Yi.

> I'll queue this for 5.18:
> 
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index 6948d5db9092..3dd040a56318 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -491,8 +491,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	if (unlikely(!ti)) {
>  		int srcu_idx;
> -		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
> +		struct dm_table *map;
>  
> +		map = dm_get_live_table(md, &srcu_idx);
> +		if (unlikely(!map)) {
> +			dm_put_live_table(md, srcu_idx);
> +			return BLK_STS_RESOURCE;
> +		}
>  		ti = dm_table_find_target(map, 0);
>  		dm_put_live_table(md, srcu_idx);
>  	}
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 082366d0ad49..c70be6e5ed55 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1533,15 +1533,10 @@ static void dm_submit_bio(struct bio *bio)
>  	struct dm_table *map;
>  
>  	map = dm_get_live_table(md, &srcu_idx);
> -	if (unlikely(!map)) {
> -		DMERR_LIMIT("%s: mapping table unavailable, erroring io",
> -			    dm_device_name(md));
> -		bio_io_error(bio);
> -		goto out;
> -	}
>  
> -	/* If suspended, queue this IO for later */
> -	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
> +	/* If suspended, or map not yet available, queue this IO for later */
> +	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
> +	    unlikely(!map)) {
>  		if (bio->bi_opf & REQ_NOWAIT)
>  			bio_wouldblock_error(bio);
>  		else if (bio->bi_opf & REQ_RAHEAD)
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 579ab6183d4d..af2cf71519e9 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -499,8 +499,15 @@  static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (unlikely(!ti)) {
 		int srcu_idx;
-		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-
+		struct dm_table *map;
+
+		map = dm_get_live_table(md, &srcu_idx);
+		if (!map) {
+			DMERR_LIMIT("%s: mapping table unavailable, erroring io",
+				    dm_device_name(md));
+			dm_put_live_table(md, srcu_idx);
+			return BLK_STS_IOERR;
+		}
 		ti = dm_table_find_target(map, 0);
 		dm_put_live_table(md, srcu_idx);
 	}