diff mbox series

dm: Fix UAF in run_timer_softirq()

Message ID 20221010143905.240306-1-luomeng@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series dm: Fix UAF in run_timer_softirq() | expand

Commit Message

Luo Meng Oct. 10, 2022, 2:39 p.m. UTC
From: Luo Meng <luomeng12@huawei.com>

When dm_resume() and dm_destroy() are concurrent, it will
lead to UAF.

One of the concurrency UAF can be shown as below:

        use                                  free
do_resume                           |
  __find_device_hash_cell           |
    dm_get                          |
      atomic_inc(&md->holders)      |
                                    | dm_destroy
				    |   __dm_destroy
				    |     if (!dm_suspended_md(md))
                                    |     atomic_read(&md->holders)
				    |     msleep(1)
  dm_resume                         |
    __dm_resume                     |
      dm_table_resume_targets       |
	pool_resume                 |
	  do_waker  #add delay work |
				    |     dm_table_destroy
				    |       pool_dtr
				    |         __pool_dec
                                    |           __pool_destroy
                                    |             destroy_workqueue
                                    |             kfree(pool) # free pool
	time out
__do_softirq
  run_timer_softirq # pool has already been freed

This can be easily reproduced using:
  1. create thin-pool
  2. dmsetup suspend pool
  3. dmsetup resume pool
  4. dmsetup remove_all # Concurrent with 3

The root cause of UAF bugs is that dm_resume() adds timer after
dm_destroy() skips cancel timer beause of suspend status. After
timeout, it will call run_timer_softirq(), however pool has already
been freed. The concurrency UAF bug will happen.

Therefore, canceling timer is moved after md->holders is zero.

Signed-off-by: Luo Meng <luomeng12@huawei.com>
---
 drivers/md/dm.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

Comments

Mike Snitzer Oct. 17, 2022, 7:38 p.m. UTC | #1
On Mon, Oct 10 2022 at 10:39P -0400,
Luo Meng <luomeng@huaweicloud.com> wrote:

> From: Luo Meng <luomeng12@huawei.com>
> 
> When dm_resume() and dm_destroy() are concurrent, it will
> lead to UAF.
> 
> One of the concurrency UAF can be shown as below:
> 
>         use                                  free
> do_resume                           |
>   __find_device_hash_cell           |
>     dm_get                          |
>       atomic_inc(&md->holders)      |
>                                     | dm_destroy
> 				    |   __dm_destroy
> 				    |     if (!dm_suspended_md(md))
>                                     |     atomic_read(&md->holders)
> 				    |     msleep(1)
>   dm_resume                         |
>     __dm_resume                     |
>       dm_table_resume_targets       |
> 	pool_resume                 |
> 	  do_waker  #add delay work |
> 				    |     dm_table_destroy
> 				    |       pool_dtr
> 				    |         __pool_dec
>                                     |           __pool_destroy
>                                     |             destroy_workqueue
>                                     |             kfree(pool) # free pool
> 	time out
> __do_softirq
>   run_timer_softirq # pool has already been freed
> 
> This can be easily reproduced using:
>   1. create thin-pool
>   2. dmsetup suspend pool
>   3. dmsetup resume pool
>   4. dmsetup remove_all # Concurrent with 3
> 
> The root cause of UAF bugs is that dm_resume() adds timer after
> dm_destroy() skips cancel timer beause of suspend status. After
> timeout, it will call run_timer_softirq(), however pool has already
> been freed. The concurrency UAF bug will happen.
> 
> Therefore, canceling timer is moved after md->holders is zero.
> 
> Signed-off-by: Luo Meng <luomeng12@huawei.com>
> ---
>  drivers/md/dm.c | 26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 60549b65c799..379525313628 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -2420,6 +2420,19 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>  
>  	blk_mark_disk_dead(md->disk);
>  
> +	/*
> +	 * Rare, but there may be I/O requests still going to complete,
> +	 * for example.  Wait for all references to disappear.
> +	 * No one should increment the reference count of the mapped_device,
> +	 * after the mapped_device state becomes DMF_FREEING.
> +	 */
> +	if (wait)
> +		while (atomic_read(&md->holders))
> +			msleep(1);
> +	else if (atomic_read(&md->holders))
> +		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
> +		       dm_device_name(md), atomic_read(&md->holders));
> +
>  	/*
>  	 * Take suspend_lock so that presuspend and postsuspend methods
>  	 * do not race with internal suspend.
> @@ -2436,19 +2449,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>  	dm_put_live_table(md, srcu_idx);
>  	mutex_unlock(&md->suspend_lock);
>  
> -	/*
> -	 * Rare, but there may be I/O requests still going to complete,
> -	 * for example.  Wait for all references to disappear.
> -	 * No one should increment the reference count of the mapped_device,
> -	 * after the mapped_device state becomes DMF_FREEING.
> -	 */
> -	if (wait)
> -		while (atomic_read(&md->holders))
> -			msleep(1);
> -	else if (atomic_read(&md->holders))
> -		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
> -		       dm_device_name(md), atomic_read(&md->holders));
> -
>  	dm_table_destroy(__unbind(md));
>  	free_dev(md);
>  }
> -- 
> 2.31.1
> 

Thanks for the report but your fix seems wrong.  A thin-pool specific
fix seems much more appropriate.  Does this fix the issue?

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e76c96c760a9..dc271c107fb5 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool)
 	dm_bio_prison_destroy(pool->prison);
 	dm_kcopyd_client_destroy(pool->copier);
 
+	cancel_delayed_work_sync(&pool->waker);
+	cancel_delayed_work_sync(&pool->no_space_timeout);
 	if (pool->wq)
 		destroy_workqueue(pool->wq);
 

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Luo Meng Oct. 18, 2022, 8:17 a.m. UTC | #2
在 2022/10/18 3:38, Mike Snitzer 写道:
> On Mon, Oct 10 2022 at 10:39P -0400,
> Luo Meng <luomeng@huaweicloud.com> wrote:
> 
>> From: Luo Meng <luomeng12@huawei.com>
>>
>> When dm_resume() and dm_destroy() are concurrent, it will
>> lead to UAF.
>>
>> One of the concurrency UAF can be shown as below:
>>
>>          use                                  free
>> do_resume                           |
>>    __find_device_hash_cell           |
>>      dm_get                          |
>>        atomic_inc(&md->holders)      |
>>                                      | dm_destroy
>> 				    |   __dm_destroy
>> 				    |     if (!dm_suspended_md(md))
>>                                      |     atomic_read(&md->holders)
>> 				    |     msleep(1)
>>    dm_resume                         |
>>      __dm_resume                     |
>>        dm_table_resume_targets       |
>> 	pool_resume                 |
>> 	  do_waker  #add delay work |
>> 				    |     dm_table_destroy
>> 				    |       pool_dtr
>> 				    |         __pool_dec
>>                                      |           __pool_destroy
>>                                      |             destroy_workqueue
>>                                      |             kfree(pool) # free pool
>> 	time out
>> __do_softirq
>>    run_timer_softirq # pool has already been freed
>>
>> This can be easily reproduced using:
>>    1. create thin-pool
>>    2. dmsetup suspend pool
>>    3. dmsetup resume pool
>>    4. dmsetup remove_all # Concurrent with 3
>>
>> The root cause of UAF bugs is that dm_resume() adds timer after
>> dm_destroy() skips cancel timer beause of suspend status. After
>> timeout, it will call run_timer_softirq(), however pool has already
>> been freed. The concurrency UAF bug will happen.
>>
>> Therefore, canceling timer is moved after md->holders is zero.
>>
>> Signed-off-by: Luo Meng <luomeng12@huawei.com>
>> ---
>>   drivers/md/dm.c | 26 +++++++++++++-------------
>>   1 file changed, 13 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
>> index 60549b65c799..379525313628 100644
>> --- a/drivers/md/dm.c
>> +++ b/drivers/md/dm.c
>> @@ -2420,6 +2420,19 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>>   
>>   	blk_mark_disk_dead(md->disk);
>>   
>> +	/*
>> +	 * Rare, but there may be I/O requests still going to complete,
>> +	 * for example.  Wait for all references to disappear.
>> +	 * No one should increment the reference count of the mapped_device,
>> +	 * after the mapped_device state becomes DMF_FREEING.
>> +	 */
>> +	if (wait)
>> +		while (atomic_read(&md->holders))
>> +			msleep(1);
>> +	else if (atomic_read(&md->holders))
>> +		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
>> +		       dm_device_name(md), atomic_read(&md->holders));
>> +
>>   	/*
>>   	 * Take suspend_lock so that presuspend and postsuspend methods
>>   	 * do not race with internal suspend.
>> @@ -2436,19 +2449,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>>   	dm_put_live_table(md, srcu_idx);
>>   	mutex_unlock(&md->suspend_lock);
>>   
>> -	/*
>> -	 * Rare, but there may be I/O requests still going to complete,
>> -	 * for example.  Wait for all references to disappear.
>> -	 * No one should increment the reference count of the mapped_device,
>> -	 * after the mapped_device state becomes DMF_FREEING.
>> -	 */
>> -	if (wait)
>> -		while (atomic_read(&md->holders))
>> -			msleep(1);
>> -	else if (atomic_read(&md->holders))
>> -		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
>> -		       dm_device_name(md), atomic_read(&md->holders));
>> -
>>   	dm_table_destroy(__unbind(md));
>>   	free_dev(md);
>>   }
>> -- 
>> 2.31.1
>>
> 
> Thanks for the report but your fix seems wrong.  A thin-pool specific
> fix seems much more appropriate.  Does this fix the issue?
> 
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index e76c96c760a9..dc271c107fb5 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool)
>   	dm_bio_prison_destroy(pool->prison);
>   	dm_kcopyd_client_destroy(pool->copier);
>   
> +	cancel_delayed_work_sync(&pool->waker);
> +	cancel_delayed_work_sync(&pool->no_space_timeout);
>   	if (pool->wq)
>   		destroy_workqueue(pool->wq);
>   
> 
Thanks for your reply.

However this issue exits not only in thin-pool, cache_target also has 
thisissus.  Generic fix maybe more appropriate.

After adding cancel_delayed_work_sync() in __pool_destroy(), this will 
make it possible to call cancel_delayed_work_sync(&pool->waker) twice 
when dm is not suspend.

Luo Meng

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Mike Snitzer Oct. 19, 2022, 7:40 p.m. UTC | #3
On Tue, Oct 18 2022 at  4:17P -0400,
Luo Meng <luomeng@huaweicloud.com> wrote:

> 
> 
> 在 2022/10/18 3:38, Mike Snitzer 写道:
> > On Mon, Oct 10 2022 at 10:39P -0400,
> > Luo Meng <luomeng@huaweicloud.com> wrote:
> > 
> > > From: Luo Meng <luomeng12@huawei.com>
> > > 
> > > When dm_resume() and dm_destroy() are concurrent, it will
> > > lead to UAF.
> > > 
> > > One of the concurrency UAF can be shown as below:
> > > 
> > >          use                                  free
> > > do_resume                           |
> > >    __find_device_hash_cell           |
> > >      dm_get                          |
> > >        atomic_inc(&md->holders)      |
> > >                                      | dm_destroy
> > > 				    |   __dm_destroy
> > > 				    |     if (!dm_suspended_md(md))
> > >                                      |     atomic_read(&md->holders)
> > > 				    |     msleep(1)
> > >    dm_resume                         |
> > >      __dm_resume                     |
> > >        dm_table_resume_targets       |
> > > 	pool_resume                 |
> > > 	  do_waker  #add delay work |
> > > 				    |     dm_table_destroy
> > > 				    |       pool_dtr
> > > 				    |         __pool_dec
> > >                                      |           __pool_destroy
> > >                                      |             destroy_workqueue
> > >                                      |             kfree(pool) # free pool
> > > 	time out
> > > __do_softirq
> > >    run_timer_softirq # pool has already been freed
> > > 
> > > This can be easily reproduced using:
> > >    1. create thin-pool
> > >    2. dmsetup suspend pool
> > >    3. dmsetup resume pool
> > >    4. dmsetup remove_all # Concurrent with 3
> > > 
> > > The root cause of UAF bugs is that dm_resume() adds timer after
> > > dm_destroy() skips cancel timer beause of suspend status. After
> > > timeout, it will call run_timer_softirq(), however pool has already
> > > been freed. The concurrency UAF bug will happen.
> > > 
> > > Therefore, canceling timer is moved after md->holders is zero.
> > > 
> > > Signed-off-by: Luo Meng <luomeng12@huawei.com>
> > > ---
> > >   drivers/md/dm.c | 26 +++++++++++++-------------
> > >   1 file changed, 13 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> > > index 60549b65c799..379525313628 100644
> > > --- a/drivers/md/dm.c
> > > +++ b/drivers/md/dm.c
> > > @@ -2420,6 +2420,19 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
> > >   	blk_mark_disk_dead(md->disk);
> > > +	/*
> > > +	 * Rare, but there may be I/O requests still going to complete,
> > > +	 * for example.  Wait for all references to disappear.
> > > +	 * No one should increment the reference count of the mapped_device,
> > > +	 * after the mapped_device state becomes DMF_FREEING.
> > > +	 */
> > > +	if (wait)
> > > +		while (atomic_read(&md->holders))
> > > +			msleep(1);
> > > +	else if (atomic_read(&md->holders))
> > > +		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
> > > +		       dm_device_name(md), atomic_read(&md->holders));
> > > +
> > >   	/*
> > >   	 * Take suspend_lock so that presuspend and postsuspend methods
> > >   	 * do not race with internal suspend.
> > > @@ -2436,19 +2449,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
> > >   	dm_put_live_table(md, srcu_idx);
> > >   	mutex_unlock(&md->suspend_lock);
> > > -	/*
> > > -	 * Rare, but there may be I/O requests still going to complete,
> > > -	 * for example.  Wait for all references to disappear.
> > > -	 * No one should increment the reference count of the mapped_device,
> > > -	 * after the mapped_device state becomes DMF_FREEING.
> > > -	 */
> > > -	if (wait)
> > > -		while (atomic_read(&md->holders))
> > > -			msleep(1);
> > > -	else if (atomic_read(&md->holders))
> > > -		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
> > > -		       dm_device_name(md), atomic_read(&md->holders));
> > > -
> > >   	dm_table_destroy(__unbind(md));
> > >   	free_dev(md);
> > >   }
> > > -- 
> > > 2.31.1
> > > 
> > 
> > Thanks for the report but your fix seems wrong.  A thin-pool specific
> > fix seems much more appropriate.  Does this fix the issue?
> > 
> > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> > index e76c96c760a9..dc271c107fb5 100644
> > --- a/drivers/md/dm-thin.c
> > +++ b/drivers/md/dm-thin.c
> > @@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool)
> >   	dm_bio_prison_destroy(pool->prison);
> >   	dm_kcopyd_client_destroy(pool->copier);
> > +	cancel_delayed_work_sync(&pool->waker);
> > +	cancel_delayed_work_sync(&pool->no_space_timeout);
> >   	if (pool->wq)
> >   		destroy_workqueue(pool->wq);
> > 
> Thanks for your reply.
> 
> However this issue exits not only in thin-pool, cache_target also has
> thisissus.  Generic fix maybe more appropriate.

No, you've pointed out a problem with dm-thinp not properly tearing
down its resources. Same for dm-cache. So a generic fix doesn't make
sense.  And your particular generic fix merely papers over the
resource leak, while also causing DM core to no longer properly wait
for outstanding IO to complete.

> After adding cancel_delayed_work_sync() in __pool_destroy(), this will make
> it possible to call cancel_delayed_work_sync(&pool->waker) twice when dm is
> not suspend.

That's not a problem, that's the cost of properly accounting for
resources where we must.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Luo Meng Oct. 25, 2022, 2 p.m. UTC | #4
在 2022/10/20 3:40, Mike Snitzer 写道:
> On Tue, Oct 18 2022 at  4:17P -0400,
> Luo Meng <luomeng@huaweicloud.com> wrote:
> 
>>
>>
>> 在 2022/10/18 3:38, Mike Snitzer 写道:
>>> On Mon, Oct 10 2022 at 10:39P -0400,
>>> Luo Meng <luomeng@huaweicloud.com> wrote:
>>>
>>>> From: Luo Meng <luomeng12@huawei.com>
>>>>
>>>> When dm_resume() and dm_destroy() are concurrent, it will
>>>> lead to UAF.
>>>>
>>>> One of the concurrency UAF can be shown as below:
>>>>
>>>>           use                                  free
>>>> do_resume                           |
>>>>     __find_device_hash_cell           |
>>>>       dm_get                          |
>>>>         atomic_inc(&md->holders)      |
>>>>                                       | dm_destroy
>>>> 				    |   __dm_destroy
>>>> 				    |     if (!dm_suspended_md(md))
>>>>                                       |     atomic_read(&md->holders)
>>>> 				    |     msleep(1)
>>>>     dm_resume                         |
>>>>       __dm_resume                     |
>>>>         dm_table_resume_targets       |
>>>> 	pool_resume                 |
>>>> 	  do_waker  #add delay work |
>>>> 				    |     dm_table_destroy
>>>> 				    |       pool_dtr
>>>> 				    |         __pool_dec
>>>>                                       |           __pool_destroy
>>>>                                       |             destroy_workqueue
>>>>                                       |             kfree(pool) # free pool
>>>> 	time out
>>>> __do_softirq
>>>>     run_timer_softirq # pool has already been freed
>>>>
>>>> This can be easily reproduced using:
>>>>     1. create thin-pool
>>>>     2. dmsetup suspend pool
>>>>     3. dmsetup resume pool
>>>>     4. dmsetup remove_all # Concurrent with 3
>>>>
>>>> The root cause of UAF bugs is that dm_resume() adds timer after
>>>> dm_destroy() skips cancel timer beause of suspend status. After
>>>> timeout, it will call run_timer_softirq(), however pool has already
>>>> been freed. The concurrency UAF bug will happen.
>>>>
>>>> Therefore, canceling timer is moved after md->holders is zero.
>>>>
>>>> Signed-off-by: Luo Meng <luomeng12@huawei.com>
>>>> ---
>>>>    drivers/md/dm.c | 26 +++++++++++++-------------
>>>>    1 file changed, 13 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
>>>> index 60549b65c799..379525313628 100644
>>>> --- a/drivers/md/dm.c
>>>> +++ b/drivers/md/dm.c
>>>> @@ -2420,6 +2420,19 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>>>>    	blk_mark_disk_dead(md->disk);
>>>> +	/*
>>>> +	 * Rare, but there may be I/O requests still going to complete,
>>>> +	 * for example.  Wait for all references to disappear.
>>>> +	 * No one should increment the reference count of the mapped_device,
>>>> +	 * after the mapped_device state becomes DMF_FREEING.
>>>> +	 */
>>>> +	if (wait)
>>>> +		while (atomic_read(&md->holders))
>>>> +			msleep(1);
>>>> +	else if (atomic_read(&md->holders))
>>>> +		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
>>>> +		       dm_device_name(md), atomic_read(&md->holders));
>>>> +
>>>>    	/*
>>>>    	 * Take suspend_lock so that presuspend and postsuspend methods
>>>>    	 * do not race with internal suspend.
>>>> @@ -2436,19 +2449,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>>>>    	dm_put_live_table(md, srcu_idx);
>>>>    	mutex_unlock(&md->suspend_lock);
>>>> -	/*
>>>> -	 * Rare, but there may be I/O requests still going to complete,
>>>> -	 * for example.  Wait for all references to disappear.
>>>> -	 * No one should increment the reference count of the mapped_device,
>>>> -	 * after the mapped_device state becomes DMF_FREEING.
>>>> -	 */
>>>> -	if (wait)
>>>> -		while (atomic_read(&md->holders))
>>>> -			msleep(1);
>>>> -	else if (atomic_read(&md->holders))
>>>> -		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
>>>> -		       dm_device_name(md), atomic_read(&md->holders));
>>>> -
>>>>    	dm_table_destroy(__unbind(md));
>>>>    	free_dev(md);
>>>>    }
>>>> -- 
>>>> 2.31.1
>>>>
>>>
>>> Thanks for the report but your fix seems wrong.  A thin-pool specific
>>> fix seems much more appropriate.  Does this fix the issue?
>>>
>>> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
>>> index e76c96c760a9..dc271c107fb5 100644
>>> --- a/drivers/md/dm-thin.c
>>> +++ b/drivers/md/dm-thin.c
>>> @@ -2889,6 +2889,8 @@ static void __pool_destroy(struct pool *pool)
>>>    	dm_bio_prison_destroy(pool->prison);
>>>    	dm_kcopyd_client_destroy(pool->copier);
>>> +	cancel_delayed_work_sync(&pool->waker);
>>> +	cancel_delayed_work_sync(&pool->no_space_timeout);
>>>    	if (pool->wq)
>>>    		destroy_workqueue(pool->wq);
>>>
>> Thanks for your reply.
>>
>> However this issue exits not only in thin-pool, cache_target also has
>> thisissus.  Generic fix maybe more appropriate.
> 
> No, you've pointed out a problem with dm-thinp not properly tearing
> down its resources. Same for dm-cache. So a generic fix doesn't make
> sense.  And your particular generic fix merely papers over the
> resource leak, while also causing DM core to no longer properly wait
> for outstanding IO to complete.
> 
>> After adding cancel_delayed_work_sync() in __pool_destroy(), this will make
>> it possible to call cancel_delayed_work_sync(&pool->waker) twice when dm is
>> not suspend.
> 
> That's not a problem, that's the cost of properly accounting for
> resources where we must.
> 
Thanks.
I will send v2 soon.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 60549b65c799..379525313628 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2420,6 +2420,19 @@  static void __dm_destroy(struct mapped_device *md, bool wait)
 
 	blk_mark_disk_dead(md->disk);
 
+	/*
+	 * Rare, but there may be I/O requests still going to complete,
+	 * for example.  Wait for all references to disappear.
+	 * No one should increment the reference count of the mapped_device,
+	 * after the mapped_device state becomes DMF_FREEING.
+	 */
+	if (wait)
+		while (atomic_read(&md->holders))
+			msleep(1);
+	else if (atomic_read(&md->holders))
+		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
+		       dm_device_name(md), atomic_read(&md->holders));
+
 	/*
 	 * Take suspend_lock so that presuspend and postsuspend methods
 	 * do not race with internal suspend.
@@ -2436,19 +2449,6 @@  static void __dm_destroy(struct mapped_device *md, bool wait)
 	dm_put_live_table(md, srcu_idx);
 	mutex_unlock(&md->suspend_lock);
 
-	/*
-	 * Rare, but there may be I/O requests still going to complete,
-	 * for example.  Wait for all references to disappear.
-	 * No one should increment the reference count of the mapped_device,
-	 * after the mapped_device state becomes DMF_FREEING.
-	 */
-	if (wait)
-		while (atomic_read(&md->holders))
-			msleep(1);
-	else if (atomic_read(&md->holders))
-		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
-		       dm_device_name(md), atomic_read(&md->holders));
-
 	dm_table_destroy(__unbind(md));
 	free_dev(md);
 }