diff mbox series

[v2] btrfs: support device name lookup in forget

Message ID 40d13cb5a18a2fcb5e667ee0bc61f2b7a12c93e4.1709233065.git.boris@bur.io (mailing list archive)
State New, archived
Headers show
Series [v2] btrfs: support device name lookup in forget | expand

Commit Message

Boris Burkov Feb. 29, 2024, 6:59 p.m. UTC
btrfs forget assumes the device still exists in the block layer and
that we can lookup its dev_t. For handling some tricky cases with
changing devt across device recreation, we need udev rules that run on
device removal. However, at that point, there is no node to lookup, so
we need to rely on the cached name. Refactor the forget code to handle
this case, while still preferring to use dev_t when possible.

Reproducing the underlying issue is a bit of a boondoggle, but can be
done with a script like the following. It assumes three devices we can
run parted on safely, for which I have been making loop devices. I sent
a parallel patch in fstests with this script fully fleshed out.

==========================
DEV0=<dev0> # primary device we will be corruptin
DEV1=<dev1> # second partition device to trigger devt swap
DEV2=<dev2> # second device to mount, to trick temp_fsid code
D0P1=$DEV0"p1"
D1P1=$DEV1"p1"
MNT=$TEST_DIR/mnt
BIND=$TEST_DIR/bind

parted $DEV0 'mktable gpt' --script
parted $DEV1 'mktable gpt' --script
parted $DEV0 'mkpart mypart 1M 100%' --script # devt A
parted $DEV1 'mkpart mypart 1M 100%' --script # devt B

$MKFS_BTRFS_PROG -f -msingle -dsingle $D0P1 $DEV2 &>/dev/null

mkdir -p $MNT
mount $D0P1 $MNT
umount $MNT # multi-device, no cache freeing

do_rmpart $DEV0
do_rmpart $DEV1
do_mkpart $DEV1 # devt A
do_mkpart $DEV0 # devt B

mount $D0P1 $MNT
$BTRFS_UTIL_PROG device remove $DEV2 $MNT # open us up to temp_fsid

mkdir -p $BIND
mount $D0P1 $BIND # crazy dangerous duplicate mount on same dev
doStuffThatCorruptsTheDisk $BIND
==========================

Signed-off-by: Boris Burkov <boris@bur.io>
---
Changelog:
v2:
- updated commit message to include repro script

 fs/btrfs/super.c   | 11 ++++-------
 fs/btrfs/volumes.c | 46 +++++++++++++++++++++++++++++++++++++---------
 fs/btrfs/volumes.h |  1 +
 3 files changed, 42 insertions(+), 16 deletions(-)

Comments

Anand Jain March 1, 2024, 2:21 a.m. UTC | #1
On 3/1/24 00:29, Boris Burkov wrote:
> btrfs forget assumes the device still exists in the block layer and
> that we can lookup its dev_t. For handling some tricky cases with
> changing devt across device recreation, we need udev rules that run on
> device removal. However, at that point, there is no node to lookup, so
> we need to rely on the cached name. Refactor the forget code to handle
> this case, while still preferring to use dev_t when possible.

After the kernel patch ([PATCH] btrfs: validate device maj:min during
open), I don't think we need this patch to help fix the issue.

Or do you have any other scenario where we need this to help udev rule
to release the state / disappearing device from the btrfs kernel cache?

Otherwise, this patch will be a good to have patch but not specific to
fixing any issue.

But, we should make this patch to search for both path and devt
because these two should represent the same device at any given time.

Thanks, Anand


> Reproducing the underlying issue is a bit of a boondoggle, but can be
> done with a script like the following. It assumes three devices we can
> run parted on safely, for which I have been making loop devices. I sent
> a parallel patch in fstests with this script fully fleshed out.
> 
> ==========================
> DEV0=<dev0> # primary device we will be corruptin
> DEV1=<dev1> # second partition device to trigger devt swap
> DEV2=<dev2> # second device to mount, to trick temp_fsid code
> D0P1=$DEV0"p1"
> D1P1=$DEV1"p1"
> MNT=$TEST_DIR/mnt
> BIND=$TEST_DIR/bind
> 
> parted $DEV0 'mktable gpt' --script
> parted $DEV1 'mktable gpt' --script
> parted $DEV0 'mkpart mypart 1M 100%' --script # devt A
> parted $DEV1 'mkpart mypart 1M 100%' --script # devt B
> 
> $MKFS_BTRFS_PROG -f -msingle -dsingle $D0P1 $DEV2 &>/dev/null
> 
> mkdir -p $MNT
> mount $D0P1 $MNT
> umount $MNT # multi-device, no cache freeing
> 
> do_rmpart $DEV0
> do_rmpart $DEV1
> do_mkpart $DEV1 # devt A
> do_mkpart $DEV0 # devt B
> 
> mount $D0P1 $MNT
> $BTRFS_UTIL_PROG device remove $DEV2 $MNT # open us up to temp_fsid
> 
> mkdir -p $BIND
> mount $D0P1 $BIND # crazy dangerous duplicate mount on same dev
> doStuffThatCorruptsTheDisk $BIND
> ==========================
> 
> Signed-off-by: Boris Burkov <boris@bur.io>
> ---
> Changelog:
> v2:
> - updated commit message to include repro script
> 
>   fs/btrfs/super.c   | 11 ++++-------
>   fs/btrfs/volumes.c | 46 +++++++++++++++++++++++++++++++++++++---------
>   fs/btrfs/volumes.h |  1 +
>   3 files changed, 42 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 7e44ccaf348f..3609b9a773f7 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -2192,7 +2192,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
>   {
>   	struct btrfs_ioctl_vol_args *vol;
>   	struct btrfs_device *device = NULL;
> -	dev_t devt = 0;
> +	char *name = NULL;
>   	int ret = -ENOTTY;
>   
>   	if (!capable(CAP_SYS_ADMIN))
> @@ -2217,12 +2217,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
>   		mutex_unlock(&uuid_mutex);
>   		break;
>   	case BTRFS_IOC_FORGET_DEV:
> -		if (vol->name[0] != 0) {
> -			ret = lookup_bdev(vol->name, &devt);
> -			if (ret)
> -				break;
> -		}
> -		ret = btrfs_forget_devices(devt);
> +		if (vol->name[0] != 0)
> +			name = vol->name;
> +		ret = btrfs_forget_devices_by_name(name);
>   		break;
>   	case BTRFS_IOC_DEVICES_READY:
>   		mutex_lock(&uuid_mutex);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 3cc947a42116..68fb0b64ab3f 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -503,11 +503,13 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
>   }
>   
>   /*
> - *  Search and remove all stale devices (which are not mounted).  When both
> + *  Search and remove all stale devices (which are not mounted).  When all
>    *  inputs are NULL, it will search and release all stale devices.
>    *
>    *  @devt:         Optional. When provided will it release all unmounted devices
> - *                 matching this devt only.
> + *                 matching this devt only. Don't set together with name.
> + *  @name:         Optional. When provided will it release all unmounted devices
> + *                 matching this name only. Don't set together with devt.
>    *  @skip_device:  Optional. Will skip this device when searching for the stale
>    *                 devices.
>    *
> @@ -515,14 +517,16 @@ btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
>    *		-EBUSY if @devt is a mounted device.
>    *		-ENOENT if @devt does not match any device in the list.
>    */
> -static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
> +static int btrfs_free_stale_devices(dev_t devt, char *name, struct btrfs_device *skip_device)
>   {
>   	struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
>   	struct btrfs_device *device, *tmp_device;
>   	int ret;
>   	bool freed = false;
> +	bool searching = devt || name;
>   
>   	lockdep_assert_held(&uuid_mutex);
> +	ASSERT(!(devt && name));
>   
>   	/* Return good status if there is no instance of devt. */
>   	ret = 0;
> @@ -533,14 +537,18 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
>   					 &fs_devices->devices, dev_list) {
>   			if (skip_device && skip_device == device)
>   				continue;
> +			if (!searching)
> +				goto found;
>   			if (devt && devt != device->devt)
>   				continue;
> +			if (name && device->name && strcmp(device->name->str, name))
> +				continue;
> +found:
>   			if (fs_devices->opened) {
> -				if (devt)
> +				if (searching)
>   					ret = -EBUSY;
>   				break;
>   			}
> -
>   			/* delete the stale device */
>   			fs_devices->num_devices--;
>   			list_del(&device->dev_list);
> @@ -561,7 +569,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
>   	if (freed)
>   		return 0;
>   
> -	return ret;
> +	return ret ? ret : -ENODEV;
>   }
>   
>   static struct btrfs_fs_devices *find_fsid_by_device(
> @@ -1288,12 +1296,32 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
>   	return disk_super;
>   }
>   
> +int btrfs_forget_devices_by_name(char *name)
> +{
> +	int ret;
> +	dev_t devt = 0;
> +
> +	/*
> +	 * Ideally, use devt, but if not, use name.
> +	 * Note: Assumes lookup_bdev handles NULL name gracefully.
> +	 */
> +	ret = lookup_bdev(name, &devt);
> +	if (!ret)
> +		name = NULL;
> +
> +	mutex_lock(&uuid_mutex);
> +	ret = btrfs_free_stale_devices(devt, name, NULL);
> +	mutex_unlock(&uuid_mutex);
> +
> +	return ret;
> +}
> +
>   int btrfs_forget_devices(dev_t devt)
>   {
>   	int ret;
>   
>   	mutex_lock(&uuid_mutex);
> -	ret = btrfs_free_stale_devices(devt, NULL);
> +	ret = btrfs_free_stale_devices(devt, NULL, NULL);
>   	mutex_unlock(&uuid_mutex);
>   
>   	return ret;
> @@ -1364,7 +1392,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>   			btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
>   				   path, ret);
>   		else
> -			btrfs_free_stale_devices(devt, NULL);
> +			btrfs_free_stale_devices(devt, NULL, NULL);
>   
>   		pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
>   		device = NULL;
> @@ -1373,7 +1401,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>   
>   	device = device_list_add(path, disk_super, &new_device_added);
>   	if (!IS_ERR(device) && new_device_added)
> -		btrfs_free_stale_devices(device->devt, device);
> +		btrfs_free_stale_devices(device->devt, NULL, device);
>   
>   free_disk_super:
>   	btrfs_release_disk_super(disk_super);
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index feba8d53526c..a5388a6b2969 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -681,6 +681,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
>   		       blk_mode_t flags, void *holder);
>   struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>   					   bool mount_arg_dev);
> +int btrfs_forget_devices_by_name(char *name);
>   int btrfs_forget_devices(dev_t devt);
>   void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
>   void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
diff mbox series

Patch

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 7e44ccaf348f..3609b9a773f7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2192,7 +2192,7 @@  static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 {
 	struct btrfs_ioctl_vol_args *vol;
 	struct btrfs_device *device = NULL;
-	dev_t devt = 0;
+	char *name = NULL;
 	int ret = -ENOTTY;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -2217,12 +2217,9 @@  static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_FORGET_DEV:
-		if (vol->name[0] != 0) {
-			ret = lookup_bdev(vol->name, &devt);
-			if (ret)
-				break;
-		}
-		ret = btrfs_forget_devices(devt);
+		if (vol->name[0] != 0)
+			name = vol->name;
+		ret = btrfs_forget_devices_by_name(name);
 		break;
 	case BTRFS_IOC_DEVICES_READY:
 		mutex_lock(&uuid_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3cc947a42116..68fb0b64ab3f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -503,11 +503,13 @@  btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
 }
 
 /*
- *  Search and remove all stale devices (which are not mounted).  When both
+ *  Search and remove all stale devices (which are not mounted).  When all
  *  inputs are NULL, it will search and release all stale devices.
  *
  *  @devt:         Optional. When provided will it release all unmounted devices
- *                 matching this devt only.
+ *                 matching this devt only. Don't set together with name.
+ *  @name:         Optional. When provided will it release all unmounted devices
+ *                 matching this name only. Don't set together with devt.
  *  @skip_device:  Optional. Will skip this device when searching for the stale
  *                 devices.
  *
@@ -515,14 +517,16 @@  btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
  *		-EBUSY if @devt is a mounted device.
  *		-ENOENT if @devt does not match any device in the list.
  */
-static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
+static int btrfs_free_stale_devices(dev_t devt, char *name, struct btrfs_device *skip_device)
 {
 	struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
 	struct btrfs_device *device, *tmp_device;
 	int ret;
 	bool freed = false;
+	bool searching = devt || name;
 
 	lockdep_assert_held(&uuid_mutex);
+	ASSERT(!(devt && name));
 
 	/* Return good status if there is no instance of devt. */
 	ret = 0;
@@ -533,14 +537,18 @@  static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
 					 &fs_devices->devices, dev_list) {
 			if (skip_device && skip_device == device)
 				continue;
+			if (!searching)
+				goto found;
 			if (devt && devt != device->devt)
 				continue;
+			if (name && device->name && strcmp(device->name->str, name))
+				continue;
+found:
 			if (fs_devices->opened) {
-				if (devt)
+				if (searching)
 					ret = -EBUSY;
 				break;
 			}
-
 			/* delete the stale device */
 			fs_devices->num_devices--;
 			list_del(&device->dev_list);
@@ -561,7 +569,7 @@  static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
 	if (freed)
 		return 0;
 
-	return ret;
+	return ret ? ret : -ENODEV;
 }
 
 static struct btrfs_fs_devices *find_fsid_by_device(
@@ -1288,12 +1296,32 @@  static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
 	return disk_super;
 }
 
+int btrfs_forget_devices_by_name(char *name)
+{
+	int ret;
+	dev_t devt = 0;
+
+	/*
+	 * Ideally, use devt, but if not, use name.
+	 * Note: Assumes lookup_bdev handles NULL name gracefully.
+	 */
+	ret = lookup_bdev(name, &devt);
+	if (!ret)
+		name = NULL;
+
+	mutex_lock(&uuid_mutex);
+	ret = btrfs_free_stale_devices(devt, name, NULL);
+	mutex_unlock(&uuid_mutex);
+
+	return ret;
+}
+
 int btrfs_forget_devices(dev_t devt)
 {
 	int ret;
 
 	mutex_lock(&uuid_mutex);
-	ret = btrfs_free_stale_devices(devt, NULL);
+	ret = btrfs_free_stale_devices(devt, NULL, NULL);
 	mutex_unlock(&uuid_mutex);
 
 	return ret;
@@ -1364,7 +1392,7 @@  struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
 			btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
 				   path, ret);
 		else
-			btrfs_free_stale_devices(devt, NULL);
+			btrfs_free_stale_devices(devt, NULL, NULL);
 
 		pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
 		device = NULL;
@@ -1373,7 +1401,7 @@  struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
 
 	device = device_list_add(path, disk_super, &new_device_added);
 	if (!IS_ERR(device) && new_device_added)
-		btrfs_free_stale_devices(device->devt, device);
+		btrfs_free_stale_devices(device->devt, NULL, device);
 
 free_disk_super:
 	btrfs_release_disk_super(disk_super);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index feba8d53526c..a5388a6b2969 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -681,6 +681,7 @@  int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       blk_mode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
 					   bool mount_arg_dev);
+int btrfs_forget_devices_by_name(char *name);
 int btrfs_forget_devices(dev_t devt);
 void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);