diff mbox series

block: loop: fix deadlock between open and remove

Message ID 20210605140950.5800-1-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series block: loop: fix deadlock between open and remove | expand

Commit Message

Christoph Hellwig June 5, 2021, 2:09 p.m. UTC
Commit c76f48eb5c08 ("block: take bd_mutex around delete_partitions in
del_gendisk") adds disk->part0->bd_mutex in del_gendisk(), this way
causes the following AB/BA deadlock between removing loop and opening
loop:

 1) loop_control_ioctl(LOOP_CTL_REMOVE)
     -> mutex_lock(&loop_ctl_mutex)
     -> del_gendisk
         -> mutex_lock(&disk->part0->bd_mutex)

 2) blkdev_get_by_dev
     -> mutex_lock(&disk->part0->bd_mutex)
     -> lo_open
         -> mutex_lock(&loop_ctl_mutex)

Add a new Lo_deleting state to remove the need for clearing
->private_data and thus holding loop_ctl_mutex in the ioctl
LOOP_CTL_REMOVE path.

Based on an analysis and earlier patch from
Ming Lei <ming.lei@redhat.com>.

Reported-by: Colin Ian King <colin.king@canonical.com>
Fixes: c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/loop.c | 25 +++++++------------------
 drivers/block/loop.h |  1 +
 2 files changed, 8 insertions(+), 18 deletions(-)

Comments

Colin King June 5, 2021, 3 p.m. UTC | #1
On 05/06/2021 15:09, Christoph Hellwig wrote:
> Commit c76f48eb5c08 ("block: take bd_mutex around delete_partitions in
> del_gendisk") adds disk->part0->bd_mutex in del_gendisk(), this way
> causes the following AB/BA deadlock between removing loop and opening
> loop:
> 
>  1) loop_control_ioctl(LOOP_CTL_REMOVE)
>      -> mutex_lock(&loop_ctl_mutex)
>      -> del_gendisk
>          -> mutex_lock(&disk->part0->bd_mutex)
> 
>  2) blkdev_get_by_dev
>      -> mutex_lock(&disk->part0->bd_mutex)
>      -> lo_open
>          -> mutex_lock(&loop_ctl_mutex)
> 
> Add a new Lo_deleting state to remove the need for clearing
> ->private_data and thus holding loop_ctl_mutex in the ioctl
> LOOP_CTL_REMOVE path.
> 
> Based on an analysis and earlier patch from
> Ming Lei <ming.lei@redhat.com>.
> 
> Reported-by: Colin Ian King <colin.king@canonical.com>
> Fixes: c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/block/loop.c | 25 +++++++------------------
>  drivers/block/loop.h |  1 +
>  2 files changed, 8 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index d58d68f3c7cd..76e12f3482a9 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -1879,29 +1879,18 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
>  
>  static int lo_open(struct block_device *bdev, fmode_t mode)
>  {
> -	struct loop_device *lo;
> +	struct loop_device *lo = bdev->bd_disk->private_data;
>  	int err;
>  
> -	/*
> -	 * take loop_ctl_mutex to protect lo pointer from race with
> -	 * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention
> -	 * release it prior to updating lo->lo_refcnt.
> -	 */
> -	err = mutex_lock_killable(&loop_ctl_mutex);
> -	if (err)
> -		return err;
> -	lo = bdev->bd_disk->private_data;
> -	if (!lo) {
> -		mutex_unlock(&loop_ctl_mutex);
> -		return -ENXIO;
> -	}
>  	err = mutex_lock_killable(&lo->lo_mutex);
> -	mutex_unlock(&loop_ctl_mutex);
>  	if (err)
>  		return err;
> -	atomic_inc(&lo->lo_refcnt);
> +	if (lo->lo_state == Lo_deleting)
> +		err = -ENXIO;
> +	else
> +		atomic_inc(&lo->lo_refcnt);
>  	mutex_unlock(&lo->lo_mutex);
> -	return 0;
> +	return err;
>  }
>  
>  static void lo_release(struct gendisk *disk, fmode_t mode)
> @@ -2285,7 +2274,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
>  			mutex_unlock(&lo->lo_mutex);
>  			break;
>  		}
> -		lo->lo_disk->private_data = NULL;
> +		lo->lo_state = Lo_deleting;
>  		mutex_unlock(&lo->lo_mutex);
>  		idr_remove(&loop_index_idr, lo->lo_number);
>  		loop_remove(lo);
> diff --git a/drivers/block/loop.h b/drivers/block/loop.h
> index a3c04f310672..5beb959b94d3 100644
> --- a/drivers/block/loop.h
> +++ b/drivers/block/loop.h
> @@ -22,6 +22,7 @@ enum {
>  	Lo_unbound,
>  	Lo_bound,
>  	Lo_rundown,
> +	Lo_deleting,
>  };
>  
>  struct loop_func_table;
> 

Tested and no longer hangs with the stress-ng loop test. Thank you for
the speedy turnaround on the fix.

Tested-by: Colin Ian King <colin.king@canonical.com>

Colin
Ming Lei June 7, 2021, 12:11 a.m. UTC | #2
On Sat, Jun 05, 2021 at 05:09:50PM +0300, Christoph Hellwig wrote:
> Commit c76f48eb5c08 ("block: take bd_mutex around delete_partitions in
> del_gendisk") adds disk->part0->bd_mutex in del_gendisk(), this way
> causes the following AB/BA deadlock between removing loop and opening
> loop:
> 
>  1) loop_control_ioctl(LOOP_CTL_REMOVE)
>      -> mutex_lock(&loop_ctl_mutex)
>      -> del_gendisk
>          -> mutex_lock(&disk->part0->bd_mutex)
> 
>  2) blkdev_get_by_dev
>      -> mutex_lock(&disk->part0->bd_mutex)
>      -> lo_open
>          -> mutex_lock(&loop_ctl_mutex)
> 
> Add a new Lo_deleting state to remove the need for clearing
> ->private_data and thus holding loop_ctl_mutex in the ioctl
> LOOP_CTL_REMOVE path.
> 
> Based on an analysis and earlier patch from
> Ming Lei <ming.lei@redhat.com>.
> 
> Reported-by: Colin Ian King <colin.king@canonical.com>
> Fixes: c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk")
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Jens Axboe June 11, 2021, 5:51 p.m. UTC | #3
On 6/5/21 8:09 AM, Christoph Hellwig wrote:
> Commit c76f48eb5c08 ("block: take bd_mutex around delete_partitions in
> del_gendisk") adds disk->part0->bd_mutex in del_gendisk(), this way
> causes the following AB/BA deadlock between removing loop and opening
> loop:
> 
>  1) loop_control_ioctl(LOOP_CTL_REMOVE)
>      -> mutex_lock(&loop_ctl_mutex)
>      -> del_gendisk
>          -> mutex_lock(&disk->part0->bd_mutex)
> 
>  2) blkdev_get_by_dev
>      -> mutex_lock(&disk->part0->bd_mutex)
>      -> lo_open
>          -> mutex_lock(&loop_ctl_mutex)
> 
> Add a new Lo_deleting state to remove the need for clearing
> ->private_data and thus holding loop_ctl_mutex in the ioctl
> LOOP_CTL_REMOVE path.
> 
> Based on an analysis and earlier patch from
> Ming Lei <ming.lei@redhat.com>.

Applied, thanks.
diff mbox series

Patch

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d58d68f3c7cd..76e12f3482a9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1879,29 +1879,18 @@  static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 
 static int lo_open(struct block_device *bdev, fmode_t mode)
 {
-	struct loop_device *lo;
+	struct loop_device *lo = bdev->bd_disk->private_data;
 	int err;
 
-	/*
-	 * take loop_ctl_mutex to protect lo pointer from race with
-	 * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention
-	 * release it prior to updating lo->lo_refcnt.
-	 */
-	err = mutex_lock_killable(&loop_ctl_mutex);
-	if (err)
-		return err;
-	lo = bdev->bd_disk->private_data;
-	if (!lo) {
-		mutex_unlock(&loop_ctl_mutex);
-		return -ENXIO;
-	}
 	err = mutex_lock_killable(&lo->lo_mutex);
-	mutex_unlock(&loop_ctl_mutex);
 	if (err)
 		return err;
-	atomic_inc(&lo->lo_refcnt);
+	if (lo->lo_state == Lo_deleting)
+		err = -ENXIO;
+	else
+		atomic_inc(&lo->lo_refcnt);
 	mutex_unlock(&lo->lo_mutex);
-	return 0;
+	return err;
 }
 
 static void lo_release(struct gendisk *disk, fmode_t mode)
@@ -2285,7 +2274,7 @@  static long loop_control_ioctl(struct file *file, unsigned int cmd,
 			mutex_unlock(&lo->lo_mutex);
 			break;
 		}
-		lo->lo_disk->private_data = NULL;
+		lo->lo_state = Lo_deleting;
 		mutex_unlock(&lo->lo_mutex);
 		idr_remove(&loop_index_idr, lo->lo_number);
 		loop_remove(lo);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index a3c04f310672..5beb959b94d3 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -22,6 +22,7 @@  enum {
 	Lo_unbound,
 	Lo_bound,
 	Lo_rundown,
+	Lo_deleting,
 };
 
 struct loop_func_table;