diff mbox series

[v2,1/2] btrfs: avoid unnecessary device path update for the same device

Message ID 3b02eabf87e477dd25e21a4c2cf7720e530d7531.1727222308.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: enhance btrfs device path rename | expand

Commit Message

Qu Wenruo Sept. 25, 2024, 12:05 a.m. UTC
[PROBLEM]
It is very common for udev to trigger device scan, and every time a
mounted btrfs device got re-scan from different soft links, we will get
some of unnecessary device path updates, this is especially common
for LVM based storage:

 # lvs
  scratch1 test -wi-ao---- 10.00g
  scratch2 test -wi-a----- 10.00g
  scratch3 test -wi-a----- 10.00g
  scratch4 test -wi-a----- 10.00g
  scratch5 test -wi-a----- 10.00g
  test     test -wi-a----- 10.00g

 # mkfs.btrfs -f /dev/test/scratch1
 # mount /dev/test/scratch1 /mnt/btrfs
 # dmesg -c
 [  205.705234] BTRFS: device fsid 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9 devid 1 transid 6 /dev/mapper/test-scratch1 (253:4) scanned by mount (1154)
 [  205.710864] BTRFS info (device dm-4): first mount of filesystem 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9
 [  205.711923] BTRFS info (device dm-4): using crc32c (crc32c-intel) checksum algorithm
 [  205.713856] BTRFS info (device dm-4): using free-space-tree
 [  205.722324] BTRFS info (device dm-4): checking UUID tree

So far so good, but even if we just touched any soft link of
"dm-4", we will get quite some unnecessary device path updates.

 # touch /dev/mapper/test-scratch1
 # dmesg -c
 [  469.295796] BTRFS info: devid 1 device path /dev/mapper/test-scratch1 changed to /dev/dm-4 scanned by (udev-worker) (1221)
 [  469.300494] BTRFS info: devid 1 device path /dev/dm-4 changed to /dev/mapper/test-scratch1 scanned by (udev-worker) (1221)

Such device path rename is unnecessary and can lead to random path
change due to the udev race.

[CAUSE]
Inside device_list_add(), we are using a very primitive way checking if
the device has changed, strcmp().

Which can never handle links well, no matter if it's hard or soft links.

So every different link of the same device will be treated as a different
device, causing the unnecessary device path update.

[FIX]
Introduce a helper, is_same_device(), and use path_equal() to properly
detect the same block device.
So that the different soft links won't trigger the rename race.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
Reported-by: Fabian Vogt <fvogt@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/volumes.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

Comments

Filipe Manana Oct. 2, 2024, 2:14 p.m. UTC | #1
On Wed, Sep 25, 2024 at 1:14 AM Qu Wenruo <wqu@suse.com> wrote:
>
> [PROBLEM]
> It is very common for udev to trigger device scan, and every time a
> mounted btrfs device got re-scan from different soft links, we will get
> some of unnecessary device path updates, this is especially common
> for LVM based storage:
>
>  # lvs
>   scratch1 test -wi-ao---- 10.00g
>   scratch2 test -wi-a----- 10.00g
>   scratch3 test -wi-a----- 10.00g
>   scratch4 test -wi-a----- 10.00g
>   scratch5 test -wi-a----- 10.00g
>   test     test -wi-a----- 10.00g
>
>  # mkfs.btrfs -f /dev/test/scratch1
>  # mount /dev/test/scratch1 /mnt/btrfs
>  # dmesg -c
>  [  205.705234] BTRFS: device fsid 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9 devid 1 transid 6 /dev/mapper/test-scratch1 (253:4) scanned by mount (1154)
>  [  205.710864] BTRFS info (device dm-4): first mount of filesystem 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9
>  [  205.711923] BTRFS info (device dm-4): using crc32c (crc32c-intel) checksum algorithm
>  [  205.713856] BTRFS info (device dm-4): using free-space-tree
>  [  205.722324] BTRFS info (device dm-4): checking UUID tree
>
> So far so good, but even if we just touched any soft link of
> "dm-4", we will get quite some unnecessary device path updates.
>
>  # touch /dev/mapper/test-scratch1
>  # dmesg -c
>  [  469.295796] BTRFS info: devid 1 device path /dev/mapper/test-scratch1 changed to /dev/dm-4 scanned by (udev-worker) (1221)
>  [  469.300494] BTRFS info: devid 1 device path /dev/dm-4 changed to /dev/mapper/test-scratch1 scanned by (udev-worker) (1221)
>
> Such device path rename is unnecessary and can lead to random path
> change due to the udev race.
>
> [CAUSE]
> Inside device_list_add(), we are using a very primitive way checking if
> the device has changed, strcmp().
>
> Which can never handle links well, no matter if it's hard or soft links.
>
> So every different link of the same device will be treated as a different
> device, causing the unnecessary device path update.
>
> [FIX]
> Introduce a helper, is_same_device(), and use path_equal() to properly
> detect the same block device.
> So that the different soft links won't trigger the rename race.
>
> Reviewed-by: Filipe Manana <fdmanana@suse.com>
> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
> Reported-by: Fabian Vogt <fvogt@suse.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/volumes.c | 28 +++++++++++++++++++++++++++-
>  1 file changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 995b0647f538..b713e4ebb362 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -732,6 +732,32 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
>         return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>  }
>
> +static bool is_same_device(struct btrfs_device *device, const char *new_path)
> +{
> +       struct path old = { .mnt = NULL, .dentry = NULL };
> +       struct path new = { .mnt = NULL, .dentry = NULL };
> +       char *old_path;
> +       bool is_same = false;
> +       int ret;
> +
> +       if (!device->name)
> +               goto out;
> +
> +       old_path = rcu_str_deref(device->name);

There's a missing rcu_read_lock / unlock btw. It's triggering warnings
in the test vms.

Thanks.

> +       ret = kern_path(old_path, LOOKUP_FOLLOW, &old);
> +       if (ret)
> +               goto out;
> +       ret = kern_path(new_path, LOOKUP_FOLLOW, &new);
> +       if (ret)
> +               goto out;
> +       if (path_equal(&old, &new))
> +               is_same = true;
> +out:
> +       path_put(&old);
> +       path_put(&new);
> +       return is_same;
> +}
> +
>  /*
>   * Add new device to list of registered devices
>   *
> @@ -852,7 +878,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
>                                 MAJOR(path_devt), MINOR(path_devt),
>                                 current->comm, task_pid_nr(current));
>
> -       } else if (!device->name || strcmp(device->name->str, path)) {
> +       } else if (!device->name || !is_same_device(device, path)) {
>                 /*
>                  * When FS is already mounted.
>                  * 1. If you are here and if the device->name is NULL that
> --
> 2.46.1
>
>
Qu Wenruo Oct. 2, 2024, 9:09 p.m. UTC | #2
在 2024/10/2 23:44, Filipe Manana 写道:
> On Wed, Sep 25, 2024 at 1:14 AM Qu Wenruo <wqu@suse.com> wrote:
>>
>> [PROBLEM]
>> It is very common for udev to trigger device scan, and every time a
>> mounted btrfs device got re-scan from different soft links, we will get
>> some of unnecessary device path updates, this is especially common
>> for LVM based storage:
>>
>>   # lvs
>>    scratch1 test -wi-ao---- 10.00g
>>    scratch2 test -wi-a----- 10.00g
>>    scratch3 test -wi-a----- 10.00g
>>    scratch4 test -wi-a----- 10.00g
>>    scratch5 test -wi-a----- 10.00g
>>    test     test -wi-a----- 10.00g
>>
>>   # mkfs.btrfs -f /dev/test/scratch1
>>   # mount /dev/test/scratch1 /mnt/btrfs
>>   # dmesg -c
>>   [  205.705234] BTRFS: device fsid 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9 devid 1 transid 6 /dev/mapper/test-scratch1 (253:4) scanned by mount (1154)
>>   [  205.710864] BTRFS info (device dm-4): first mount of filesystem 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9
>>   [  205.711923] BTRFS info (device dm-4): using crc32c (crc32c-intel) checksum algorithm
>>   [  205.713856] BTRFS info (device dm-4): using free-space-tree
>>   [  205.722324] BTRFS info (device dm-4): checking UUID tree
>>
>> So far so good, but even if we just touched any soft link of
>> "dm-4", we will get quite some unnecessary device path updates.
>>
>>   # touch /dev/mapper/test-scratch1
>>   # dmesg -c
>>   [  469.295796] BTRFS info: devid 1 device path /dev/mapper/test-scratch1 changed to /dev/dm-4 scanned by (udev-worker) (1221)
>>   [  469.300494] BTRFS info: devid 1 device path /dev/dm-4 changed to /dev/mapper/test-scratch1 scanned by (udev-worker) (1221)
>>
>> Such device path rename is unnecessary and can lead to random path
>> change due to the udev race.
>>
>> [CAUSE]
>> Inside device_list_add(), we are using a very primitive way checking if
>> the device has changed, strcmp().
>>
>> Which can never handle links well, no matter if it's hard or soft links.
>>
>> So every different link of the same device will be treated as a different
>> device, causing the unnecessary device path update.
>>
>> [FIX]
>> Introduce a helper, is_same_device(), and use path_equal() to properly
>> detect the same block device.
>> So that the different soft links won't trigger the rename race.
>>
>> Reviewed-by: Filipe Manana <fdmanana@suse.com>
>> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
>> Reported-by: Fabian Vogt <fvogt@suse.com>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>   fs/btrfs/volumes.c | 28 +++++++++++++++++++++++++++-
>>   1 file changed, 27 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 995b0647f538..b713e4ebb362 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -732,6 +732,32 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
>>          return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>>   }
>>
>> +static bool is_same_device(struct btrfs_device *device, const char *new_path)
>> +{
>> +       struct path old = { .mnt = NULL, .dentry = NULL };
>> +       struct path new = { .mnt = NULL, .dentry = NULL };
>> +       char *old_path;
>> +       bool is_same = false;
>> +       int ret;
>> +
>> +       if (!device->name)
>> +               goto out;
>> +
>> +       old_path = rcu_str_deref(device->name);
>
> There's a missing rcu_read_lock / unlock btw. It's triggering warnings
> in the test vms.

Thanks a lot, I was also wondering if it's the case, but the zoned code
gave me a bad example of not calling rcu_read_lock().

Shouldn't all btrfs_dev_name() and the usages inside zoned code also be
fixed?

Thanks,
Qu
>
> Thanks.
>
>> +       ret = kern_path(old_path, LOOKUP_FOLLOW, &old);
>> +       if (ret)
>> +               goto out;
>> +       ret = kern_path(new_path, LOOKUP_FOLLOW, &new);
>> +       if (ret)
>> +               goto out;
>> +       if (path_equal(&old, &new))
>> +               is_same = true;
>> +out:
>> +       path_put(&old);
>> +       path_put(&new);
>> +       return is_same;
>> +}
>> +
>>   /*
>>    * Add new device to list of registered devices
>>    *
>> @@ -852,7 +878,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
>>                                  MAJOR(path_devt), MINOR(path_devt),
>>                                  current->comm, task_pid_nr(current));
>>
>> -       } else if (!device->name || strcmp(device->name->str, path)) {
>> +       } else if (!device->name || !is_same_device(device, path)) {
>>                  /*
>>                   * When FS is already mounted.
>>                   * 1. If you are here and if the device->name is NULL that
>> --
>> 2.46.1
>>
>>
>
Filipe Manana Oct. 2, 2024, 9:20 p.m. UTC | #3
On Wed, Oct 2, 2024 at 10:09 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> 在 2024/10/2 23:44, Filipe Manana 写道:
> > On Wed, Sep 25, 2024 at 1:14 AM Qu Wenruo <wqu@suse.com> wrote:
> >>
> >> [PROBLEM]
> >> It is very common for udev to trigger device scan, and every time a
> >> mounted btrfs device got re-scan from different soft links, we will get
> >> some of unnecessary device path updates, this is especially common
> >> for LVM based storage:
> >>
> >>   # lvs
> >>    scratch1 test -wi-ao---- 10.00g
> >>    scratch2 test -wi-a----- 10.00g
> >>    scratch3 test -wi-a----- 10.00g
> >>    scratch4 test -wi-a----- 10.00g
> >>    scratch5 test -wi-a----- 10.00g
> >>    test     test -wi-a----- 10.00g
> >>
> >>   # mkfs.btrfs -f /dev/test/scratch1
> >>   # mount /dev/test/scratch1 /mnt/btrfs
> >>   # dmesg -c
> >>   [  205.705234] BTRFS: device fsid 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9 devid 1 transid 6 /dev/mapper/test-scratch1 (253:4) scanned by mount (1154)
> >>   [  205.710864] BTRFS info (device dm-4): first mount of filesystem 7be2602f-9e35-4ecf-a6ff-9e91d2c182c9
> >>   [  205.711923] BTRFS info (device dm-4): using crc32c (crc32c-intel) checksum algorithm
> >>   [  205.713856] BTRFS info (device dm-4): using free-space-tree
> >>   [  205.722324] BTRFS info (device dm-4): checking UUID tree
> >>
> >> So far so good, but even if we just touched any soft link of
> >> "dm-4", we will get quite some unnecessary device path updates.
> >>
> >>   # touch /dev/mapper/test-scratch1
> >>   # dmesg -c
> >>   [  469.295796] BTRFS info: devid 1 device path /dev/mapper/test-scratch1 changed to /dev/dm-4 scanned by (udev-worker) (1221)
> >>   [  469.300494] BTRFS info: devid 1 device path /dev/dm-4 changed to /dev/mapper/test-scratch1 scanned by (udev-worker) (1221)
> >>
> >> Such device path rename is unnecessary and can lead to random path
> >> change due to the udev race.
> >>
> >> [CAUSE]
> >> Inside device_list_add(), we are using a very primitive way checking if
> >> the device has changed, strcmp().
> >>
> >> Which can never handle links well, no matter if it's hard or soft links.
> >>
> >> So every different link of the same device will be treated as a different
> >> device, causing the unnecessary device path update.
> >>
> >> [FIX]
> >> Introduce a helper, is_same_device(), and use path_equal() to properly
> >> detect the same block device.
> >> So that the different soft links won't trigger the rename race.
> >>
> >> Reviewed-by: Filipe Manana <fdmanana@suse.com>
> >> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
> >> Reported-by: Fabian Vogt <fvogt@suse.com>
> >> Signed-off-by: Qu Wenruo <wqu@suse.com>
> >> ---
> >>   fs/btrfs/volumes.c | 28 +++++++++++++++++++++++++++-
> >>   1 file changed, 27 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> >> index 995b0647f538..b713e4ebb362 100644
> >> --- a/fs/btrfs/volumes.c
> >> +++ b/fs/btrfs/volumes.c
> >> @@ -732,6 +732,32 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
> >>          return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
> >>   }
> >>
> >> +static bool is_same_device(struct btrfs_device *device, const char *new_path)
> >> +{
> >> +       struct path old = { .mnt = NULL, .dentry = NULL };
> >> +       struct path new = { .mnt = NULL, .dentry = NULL };
> >> +       char *old_path;
> >> +       bool is_same = false;
> >> +       int ret;
> >> +
> >> +       if (!device->name)
> >> +               goto out;
> >> +
> >> +       old_path = rcu_str_deref(device->name);
> >
> > There's a missing rcu_read_lock / unlock btw. It's triggering warnings
> > in the test vms.
>
> Thanks a lot, I was also wondering if it's the case, but the zoned code
> gave me a bad example of not calling rcu_read_lock().

It doesn't call rcu_read_lock() explicitly because it uses the
btrfs_*_in_rcu() log functions, which do the locking.
Except for one place for which I've sent a patch earlier today.

>
> Shouldn't all btrfs_dev_name() and the usages inside zoned code also be
> fixed?

Except for that single case, there's no other case anywhere else,
zoned code or non-zoned code.

>
> Thanks,
> Qu
> >
> > Thanks.
> >
> >> +       ret = kern_path(old_path, LOOKUP_FOLLOW, &old);
> >> +       if (ret)
> >> +               goto out;
> >> +       ret = kern_path(new_path, LOOKUP_FOLLOW, &new);
> >> +       if (ret)
> >> +               goto out;
> >> +       if (path_equal(&old, &new))
> >> +               is_same = true;
> >> +out:
> >> +       path_put(&old);
> >> +       path_put(&new);
> >> +       return is_same;
> >> +}
> >> +
> >>   /*
> >>    * Add new device to list of registered devices
> >>    *
> >> @@ -852,7 +878,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
> >>                                  MAJOR(path_devt), MINOR(path_devt),
> >>                                  current->comm, task_pid_nr(current));
> >>
> >> -       } else if (!device->name || strcmp(device->name->str, path)) {
> >> +       } else if (!device->name || !is_same_device(device, path)) {
> >>                  /*
> >>                   * When FS is already mounted.
> >>                   * 1. If you are here and if the device->name is NULL that
> >> --
> >> 2.46.1
> >>
> >>
> >
>
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 995b0647f538..b713e4ebb362 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -732,6 +732,32 @@  const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
 	return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
 }
 
+static bool is_same_device(struct btrfs_device *device, const char *new_path)
+{
+	struct path old = { .mnt = NULL, .dentry = NULL };
+	struct path new = { .mnt = NULL, .dentry = NULL };
+	char *old_path;
+	bool is_same = false;
+	int ret;
+
+	if (!device->name)
+		goto out;
+
+	old_path = rcu_str_deref(device->name);
+	ret = kern_path(old_path, LOOKUP_FOLLOW, &old);
+	if (ret)
+		goto out;
+	ret = kern_path(new_path, LOOKUP_FOLLOW, &new);
+	if (ret)
+		goto out;
+	if (path_equal(&old, &new))
+		is_same = true;
+out:
+	path_put(&old);
+	path_put(&new);
+	return is_same;
+}
+
 /*
  * Add new device to list of registered devices
  *
@@ -852,7 +878,7 @@  static noinline struct btrfs_device *device_list_add(const char *path,
 				MAJOR(path_devt), MINOR(path_devt),
 				current->comm, task_pid_nr(current));
 
-	} else if (!device->name || strcmp(device->name->str, path)) {
+	} else if (!device->name || !is_same_device(device, path)) {
 		/*
 		 * When FS is already mounted.
 		 * 1. If you are here and if the device->name is NULL that