[3/6] btrfs: split-brain case for scanned changing device with INCOMPAT_METADATA_UUID
diff mbox series

Message ID 20191212110132.11063-4-Damenly_Su@gmx.com
State New
Headers show
Series
  • btrfs: metadata uuid fixes and enhancements
Related show

Commit Message

Damenly Su Dec. 12, 2019, 11:01 a.m. UTC
From: Su Yue <Damenly_Su@gmx.com>

This patch adds the case for scanned changing device with
INCOMPAT_METADATA_UUID.
For this situation, the origin code only handles the case
the devices already pulled into disk with INCOMPAT_METADATA_UUID set.
There is an another case that the successful changed devices synced
without INCOMPAT_METADATA_UUID.
So add the check of Heather fsid of scanned device equals
metadata_uuid of fs_devices which is with INCOMPAT_METADATA_UUID
feature.

Signed-off-by: Su Yue <Damenly_Su@gmx.com>
---
 fs/btrfs/volumes.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

Comments

Su Yue Dec. 12, 2019, 1:24 p.m. UTC | #1
On 2019/12/12 7:01 PM, damenly.su@gmail.com wrote:
> From: Su Yue <Damenly_Su@gmx.com>
>
> This patch adds the case for scanned changing device with
> INCOMPAT_METADATA_UUID.
> For this situation, the origin code only handles the case
> the devices already pulled into disk with INCOMPAT_METADATA_UUID set.
> There is an another case that the successful changed devices synced
> without INCOMPAT_METADATA_UUID.
> So add the check of Heather fsid of scanned device equals

s/Heather/whether/g, blame my spellchecker.

> metadata_uuid of fs_devices which is with INCOMPAT_METADATA_UUID
> feature.
>
> Signed-off-by: Su Yue <Damenly_Su@gmx.com>
> ---
>   fs/btrfs/volumes.c | 29 ++++++++++++++++++++++++++---
>   1 file changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index b08b06a89a77..61b4a107bb58 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -654,7 +654,6 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
>   	return NULL;
>   }
>
> -
>   static struct btrfs_fs_devices *find_fsid_changed(
>   					struct btrfs_super_block *disk_super)
>   {
> @@ -663,9 +662,14 @@ static struct btrfs_fs_devices *find_fsid_changed(
>   	/*
>   	 * Handles the case where scanned device is part of an fs that had
>   	 * multiple successful changes of FSID but curently device didn't
> -	 * observe it. Meaning our fsid will be different than theirs.
> +	 * observe it.
> +	 *
> +	 * Case 1: the devices already changed still owns the feature, their
> +	 * fsid must differ from the disk_super->fsid.
>   	 */
>   	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
> +		if (fs_devices->fsid_change)
> +			continue;
>   		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
>   			   BTRFS_FSID_SIZE) != 0 &&
>   		    memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
> @@ -676,7 +680,26 @@ static struct btrfs_fs_devices *find_fsid_changed(
>   		}
>   	}
>
> -	return NULL;
> +	/*
> +	 * Case 2: the synced devices doesn't have the metadata_uuid feature.
> +	 * NOTE: the fs_devices has same metadata_uuid and fsid in memory, but
> +	 * they differs in disk, because fs_id is copied to
> +	 * fs_devices->metadata_id while alloc_fs_devices if no metadata
> +	 * feature.
> +	 */
> +	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
> +		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
> +			   BTRFS_FSID_SIZE) == 0 &&
> +		    memcmp(fs_devices->fsid, disk_super->metadata_uuid,
> +			   BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change)
> +			return fs_devices;
> +	}
> +
> +	/*
> +	 * Okay, can't found any fs_devices already synced, back to
> +	 * search devices unchanged or changing like the device.
> +	 */
> +	return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
>   }
>
>   static struct btrfs_fs_devices *find_fsid_changing_metada_uuid(
>
Nikolay Borisov Dec. 12, 2019, 1:34 p.m. UTC | #2
On 12.12.19 г. 13:01 ч., damenly.su@gmail.com wrote:
> From: Su Yue <Damenly_Su@gmx.com>
> 
> This patch adds the case for scanned changing device with
> INCOMPAT_METADATA_UUID.
> For this situation, the origin code only handles the case
> the devices already pulled into disk with INCOMPAT_METADATA_UUID set.
> There is an another case that the successful changed devices synced
> without INCOMPAT_METADATA_UUID.
> So add the check of Heather fsid of scanned device equals
> metadata_uuid of fs_devices which is with INCOMPAT_METADATA_UUID
> feature.
> 

This is hard for me to parse and correctly understand what you mean.

> Signed-off-by: Su Yue <Damenly_Su@gmx.com>
> ---
>  fs/btrfs/volumes.c | 29 ++++++++++++++++++++++++++---
>  1 file changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index b08b06a89a77..61b4a107bb58 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -654,7 +654,6 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
>  	return NULL;
>  }
>  
> -
>  static struct btrfs_fs_devices *find_fsid_changed(
>  					struct btrfs_super_block *disk_super)


find_fsid_changed handles the case where a device belongs to a
filesystem which had multiple successful fsid changed but it failed on
the last one.

>  {
> @@ -663,9 +662,14 @@ static struct btrfs_fs_devices *find_fsid_changed(
>  	/*
>  	 * Handles the case where scanned device is part of an fs that had
>  	 * multiple successful changes of FSID but curently device didn't
> -	 * observe it. Meaning our fsid will be different than theirs.
> +	 * observe it.
> +	 *
> +	 * Case 1: the devices already changed still owns the feature, their
> +	 * fsid must differ from the disk_super->fsid.

What do you mean by device to still owns the feature? Has the bit set or
something else?

>  	 */
>  	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
> +		if (fs_devices->fsid_change)
> +			continue;

Why do you do this?

>  		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
>  			   BTRFS_FSID_SIZE) != 0 &&
>  		    memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
> @@ -676,7 +680,26 @@ static struct btrfs_fs_devices *find_fsid_changed(
>  		}
>  	}
>  
> -	return NULL;
> +	/*
> +	 * Case 2: the synced devices doesn't have the metadata_uuid feature.
> +	 * NOTE: the fs_devices has same metadata_uuid and fsid in memory, but
> +	 * they differs in disk, because fs_id is copied to
> +	 * fs_devices->metadata_id while alloc_fs_devices if no metadata

It's not possible for the device to have metadata_uuid feature because
this function is called from device_list_add iff the device has
METADATA_UUID flag:

if (fsid_change_in_progress) {

if (!has_metadata_uuid) {
} else {
 find_fsid_changed <-- here we are sure our device has METADATA_UUID set.
}
}

> +	 * feature.
> +	 */
> +	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
> +		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
> +			   BTRFS_FSID_SIZE) == 0 &&
> +		    memcmp(fs_devices->fsid, disk_super->metadata_uuid,
> +			   BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change)
> +			return fs_devices;
> +	}
> +
> +	/*
> +	 * Okay, can't found any fs_devices already synced, back to
> +	 * search devices unchanged or changing like the device.
> +	 */
> +	return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
>  }
>  
>  static struct btrfs_fs_devices *find_fsid_changing_metada_uuid(
>
Su Yue Dec. 12, 2019, 2:19 p.m. UTC | #3
On 2019/12/12 9:34 PM, Nikolay Borisov wrote:
>
>
> On 12.12.19 г. 13:01 ч., damenly.su@gmail.com wrote:
>> From: Su Yue <Damenly_Su@gmx.com>
>>
>> This patch adds the case for scanned changing device with
>> INCOMPAT_METADATA_UUID.
>> For this situation, the origin code only handles the case
>> the devices already pulled into disk with INCOMPAT_METADATA_UUID set.
>> There is an another case that the successful changed devices synced
>> without INCOMPAT_METADATA_UUID.
>> So add the check of Heather fsid of scanned device equals
>> metadata_uuid of fs_devices which is with INCOMPAT_METADATA_UUID
>> feature.
>>
>
> This is hard for me to parse and correctly understand what you mean.
>

Sorry..

>> Signed-off-by: Su Yue <Damenly_Su@gmx.com>
>> ---
>>   fs/btrfs/volumes.c | 29 ++++++++++++++++++++++++++---
>>   1 file changed, 26 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index b08b06a89a77..61b4a107bb58 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -654,7 +654,6 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
>>   	return NULL;
>>   }
>>
>> -
>>   static struct btrfs_fs_devices *find_fsid_changed(
>>   					struct btrfs_super_block *disk_super)
>
>
> find_fsid_changed handles the case where a device belongs to a
> filesystem which had multiple successful fsid changed but it failed on
> the last one.
>
I go it while reading code. And "the last one" is the one where the
@disk_super read from. It has the metadata_uuid and FSID_CHANGING_V2.
Right?
What I want to express in changelog  is that those successful changed
devices *may*
not have the metadata_uuid feature. The code in btrfstune.c: line 141

         if (new_uuid && uuid_changed && memcmp(disk_super->metadata_uuid,
                                                new_fsid,
BTRFS_FSID_SIZE) == 0) {
                 /*
                  * Changing fsid to be the same as metadata uuid, so just
                  * disable the flag
                  */
                 memcpy(disk_super->fsid, &new_fsid, BTRFS_FSID_SIZE);
                 incompat_flags &= ~BTRFS_FEATURE_INCOMPAT_METADATA_UUID;
                 btrfs_set_super_incompat_flags(disk_super, incompat_flags);
                 memset(disk_super->metadata_uuid, 0, BTRFS_FSID_SIZE);

The INCOMPAT_METADATA_UUID can be cleared if changing fsid is same with
the metadata_uuid in deivce.

>>   {
>> @@ -663,9 +662,14 @@ static struct btrfs_fs_devices *find_fsid_changed(
>>   	/*
>>   	 * Handles the case where scanned device is part of an fs that had
>>   	 * multiple successful changes of FSID but curently device didn't
>> -	 * observe it. Meaning our fsid will be different than theirs.
>> +	 * observe it.
>> +	 *
>> +	 * Case 1: the devices already changed still owns the feature, their
>> +	 * fsid must differ from the disk_super->fsid.
>
> What do you mean by device to still owns the feature? Has the bit set or
> something else?
>
The metadata_uuid feature.
>>   	 */
>>   	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
>> +		if (fs_devices->fsid_change)
>> +			continue;
>
> Why do you do this?

Just make cases separated.

>
>>   		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
>>   			   BTRFS_FSID_SIZE) != 0 &&
>>   		    memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
>> @@ -676,7 +680,26 @@ static struct btrfs_fs_devices *find_fsid_changed(
>>   		}
>>   	}
>>
>> -	return NULL;
>> +	/*
>> +	 * Case 2: the synced devices doesn't have the metadata_uuid feature.
>> +	 * NOTE: the fs_devices has same metadata_uuid and fsid in memory, but
>> +	 * they differs in disk, because fs_id is copied to
>> +	 * fs_devices->metadata_id while alloc_fs_devices if no metadata
>
> It's not possible for the device to have metadata_uuid feature because
> this function is called from device_list_add iff the device has
> METADATA_UUID flag:
>

Get and agree what you mean. As the reply above, the fs_devices already
allocated there(not the device scanning) may not have METADATA_UUID flag.

> if (fsid_change_in_progress) {
>
> if (!has_metadata_uuid) {
> } else {
>   find_fsid_changed <-- here we are sure our device has METADATA_UUID set.
> }
> }
>
>> +	 * feature.
>> +	 */
>> +	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
>> +		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
>> +			   BTRFS_FSID_SIZE) == 0 &&
>> +		    memcmp(fs_devices->fsid, disk_super->metadata_uuid,
>> +			   BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change)
>> +			return fs_devices;
>> +	}
>> +
>> +	/*
>> +	 * Okay, can't found any fs_devices already synced, back to
>> +	 * search devices unchanged or changing like the device.
>> +	 */
>> +	return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
>>   }
>>
>>   static struct btrfs_fs_devices *find_fsid_changing_metada_uuid(
>>

Patch
diff mbox series

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b08b06a89a77..61b4a107bb58 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -654,7 +654,6 @@  static struct btrfs_fs_devices *find_fsid_inprogress(
 	return NULL;
 }
 
-
 static struct btrfs_fs_devices *find_fsid_changed(
 					struct btrfs_super_block *disk_super)
 {
@@ -663,9 +662,14 @@  static struct btrfs_fs_devices *find_fsid_changed(
 	/*
 	 * Handles the case where scanned device is part of an fs that had
 	 * multiple successful changes of FSID but curently device didn't
-	 * observe it. Meaning our fsid will be different than theirs.
+	 * observe it.
+	 *
+	 * Case 1: the devices already changed still owns the feature, their
+	 * fsid must differ from the disk_super->fsid.
 	 */
 	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+		if (fs_devices->fsid_change)
+			continue;
 		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
 			   BTRFS_FSID_SIZE) != 0 &&
 		    memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
@@ -676,7 +680,26 @@  static struct btrfs_fs_devices *find_fsid_changed(
 		}
 	}
 
-	return NULL;
+	/*
+	 * Case 2: the synced devices doesn't have the metadata_uuid feature.
+	 * NOTE: the fs_devices has same metadata_uuid and fsid in memory, but
+	 * they differs in disk, because fs_id is copied to
+	 * fs_devices->metadata_id while alloc_fs_devices if no metadata
+	 * feature.
+	 */
+	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+		if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
+			   BTRFS_FSID_SIZE) == 0 &&
+		    memcmp(fs_devices->fsid, disk_super->metadata_uuid,
+			   BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change)
+			return fs_devices;
+	}
+
+	/*
+	 * Okay, can't found any fs_devices already synced, back to
+	 * search devices unchanged or changing like the device.
+	 */
+	return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
 }
 
 static struct btrfs_fs_devices *find_fsid_changing_metada_uuid(