diff mbox series

[v2,2/4] btrfs: delete identified alien device in open_fs_devices

Message ID 20191006024711.4666-1-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Anand Jain Oct. 6, 2019, 2:47 a.m. UTC
In open_fs_devices() we identify alien device but we don't reset its
the device::name. So progs device list does not show the device missing
as shown in the script below.

mkfs.btrfs -fq /dev/sdd && mount /dev/sdd /btrfs
mkfs.btrfs -fq -draid1 -mraid1 /dev/sdc /dev/sdb
sleep 3 # avoid racing with udev's useless scans if needed
btrfs dev add -f /dev/sdb /btrfs
mount -o degraded /dev/sdc /btrfs1

No missing device:
btrfs fi show -m /btrfs1
Label: none  uuid: 3eb7cd50-4594-458f-9d68-c243cc49954d
	Total devices 2 FS bytes used 128.00KiB
	devid    1 size 12.00GiB used 1.26GiB path /dev/sdc
	devid    2 size 12.00GiB used 1.26GiB path /dev/sdb

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v2: Move free alien part to its parent function btrfs_open_one_device.
    Thanks Nikolay.

PS: Fundamentally its wrong approach that btrfs-progs deduces the device
missing state in the userland instead of obtaining it from the kernel.
I objected on the patch, but still those patches got merged, this bug is
one of its side effects. Ironically I wrote patches to read device_state
from the kernel using ioctl, procfs and sysfs but didn't get the due
attention till a merger.

 fs/btrfs/volumes.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

Comments

Nikolay Borisov Oct. 6, 2019, 11:44 a.m. UTC | #1
On 6.10.19 г. 5:47 ч., Anand Jain wrote:
> In open_fs_devices() we identify alien device but we don't reset its
> the device::name. So progs device list does not show the device missing
> as shown in the script below.
> 
> mkfs.btrfs -fq /dev/sdd && mount /dev/sdd /btrfs
> mkfs.btrfs -fq -draid1 -mraid1 /dev/sdc /dev/sdb
> sleep 3 # avoid racing with udev's useless scans if needed
> btrfs dev add -f /dev/sdb /btrfs
> mount -o degraded /dev/sdc /btrfs1
> 
> No missing device:
> btrfs fi show -m /btrfs1
> Label: none  uuid: 3eb7cd50-4594-458f-9d68-c243cc49954d
> 	Total devices 2 FS bytes used 128.00KiB
> 	devid    1 size 12.00GiB used 1.26GiB path /dev/sdc
> 	devid    2 size 12.00GiB used 1.26GiB path /dev/sdb
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
> v2: Move free alien part to its parent function btrfs_open_one_device.
>     Thanks Nikolay.
> 
> PS: Fundamentally its wrong approach that btrfs-progs deduces the device
> missing state in the userland instead of obtaining it from the kernel.
> I objected on the patch, but still those patches got merged, this bug is
> one of its side effects. Ironically I wrote patches to read device_state
> from the kernel using ioctl, procfs and sysfs but didn't get the due
> attention till a merger.
> 
>  fs/btrfs/volumes.c | 32 +++++++++++++++++++++++---------
>  1 file changed, 23 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index c223a8147bfd..21aaf64c59b2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -591,13 +591,18 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
>  	if (ret)
>  		return ret;
>  
> +	ret = -EINVAL;
>  	disk_super = (struct btrfs_super_block *)bh->b_data;
>  	devid = btrfs_stack_device_id(&disk_super->dev_item);
> -	if (devid != device->devid)
> +	if (devid != device->devid) {
> +		ret = -EUCLEAN;
>  		goto error_brelse;
> +	}
>  
> -	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
> +	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE)) {
> +		ret = -EUCLEAN;
>  		goto error_brelse;
> +	}
>  
>  	device->generation = btrfs_super_generation(disk_super);
>  
> @@ -640,7 +645,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
>  	brelse(bh);
>  	blkdev_put(bdev, flags);
>  
> -	return -EINVAL;
> +	return ret;
>  }
>  
>  /*
> @@ -1121,19 +1126,28 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
>  static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
>  				fmode_t flags, void *holder)
>  {
> +	int ret;
>  	struct btrfs_device *device;
> +	struct btrfs_device *tmp_device;
>  	struct btrfs_device *latest_dev = NULL;
>  
>  	flags |= FMODE_EXCL;
>  
> -	list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +	list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
> +				 dev_list) {
>  		/* Just open everything we can; ignore failures here */
> -		if (btrfs_open_one_device(fs_devices, device, flags, holder))
> -			continue;
> -
> -		if (!latest_dev ||
> -		    device->generation > latest_dev->generation)
> +		ret = btrfs_open_one_device(fs_devices, device, flags, holder);
> +		if (ret == 0 && (!latest_dev ||
> +		    device->generation > latest_dev->generation)) {
>  			latest_dev = device;
> +			continue;
> +		}

nit: Had you used if () {} else if {}  you could have done away with the
continue.

> +		if (ret == -EUCLEAN) {
> +			/* An alien device. Clean it up */
> +			fs_devices->num_devices--;
> +			list_del(&device->dev_list);
> +			btrfs_free_device(device);
> +		}
>  	}
>  	if (fs_devices->open_devices == 0)
>  		return -EINVAL;
>
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c223a8147bfd..21aaf64c59b2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -591,13 +591,18 @@  static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 	if (ret)
 		return ret;
 
+	ret = -EINVAL;
 	disk_super = (struct btrfs_super_block *)bh->b_data;
 	devid = btrfs_stack_device_id(&disk_super->dev_item);
-	if (devid != device->devid)
+	if (devid != device->devid) {
+		ret = -EUCLEAN;
 		goto error_brelse;
+	}
 
-	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
+	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE)) {
+		ret = -EUCLEAN;
 		goto error_brelse;
+	}
 
 	device->generation = btrfs_super_generation(disk_super);
 
@@ -640,7 +645,7 @@  static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 	brelse(bh);
 	blkdev_put(bdev, flags);
 
-	return -EINVAL;
+	return ret;
 }
 
 /*
@@ -1121,19 +1126,28 @@  int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
 				fmode_t flags, void *holder)
 {
+	int ret;
 	struct btrfs_device *device;
+	struct btrfs_device *tmp_device;
 	struct btrfs_device *latest_dev = NULL;
 
 	flags |= FMODE_EXCL;
 
-	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+	list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
+				 dev_list) {
 		/* Just open everything we can; ignore failures here */
-		if (btrfs_open_one_device(fs_devices, device, flags, holder))
-			continue;
-
-		if (!latest_dev ||
-		    device->generation > latest_dev->generation)
+		ret = btrfs_open_one_device(fs_devices, device, flags, holder);
+		if (ret == 0 && (!latest_dev ||
+		    device->generation > latest_dev->generation)) {
 			latest_dev = device;
+			continue;
+		}
+		if (ret == -EUCLEAN) {
+			/* An alien device. Clean it up */
+			fs_devices->num_devices--;
+			list_del(&device->dev_list);
+			btrfs_free_device(device);
+		}
 	}
 	if (fs_devices->open_devices == 0)
 		return -EINVAL;