diff mbox series

[v2,2/2] btrfs: canonicalize the device path before adding it

Message ID 3b0eb4cd4b55ae567abfc849935b4a72cea88efb.1727222308.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: enhance btrfs device path rename | expand

Commit Message

Qu Wenruo Sept. 25, 2024, 12:06 a.m. UTC
[PROBLEM]
Currently btrfs accepts any file path for its device, resulting some
weird situation:

 # ./mount_by_fd /dev/test/scratch1  /mnt/btrfs/

The program has the following source code:

 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/mount.h>

 int main(int argc, char *argv[]) {
	int fd = open(argv[1], O_RDWR);
	char path[256];
	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
	return mount(path, argv[2], "btrfs", 0, NULL);
 }

Then we can have the following weird device path:

 BTRFS: device fsid 2378be81-fe12-46d2-a9e8-68cf08dd98d5 devid 1 transid 7 /proc/self/fd/3 (253:2) scanned by mount_by_fd (18440)

Normally it's not a big deal, and later udev can trigger a device path
rename. But if udev didn't trigger, the device path "/proc/self/fd/3"
will show up in mtab.

[CAUSE]
For filename "/proc/self/fd/3", it means the opened file descriptor 3.
In above case, it's exactly the device we want to open, aka points to
"/dev/test/scratch1" which is another softlink pointing to "/dev/dm-2".

Inside kernel we solve the mount source using LOOKUP_FOLLOW, which
follows the symbolic link and grab the proper block device.

But inside btrfs we also save the filename into btrfs_device::name, and
utilize that member to report our mount source, which leads to the above
situation.

[FIX]
Instead of unconditionally trust the path, check if the original file
(not following the symbolic link) is inside "/dev/", if not, then
manually lookup the path to its final destination, and use that as our
device path.

This allows us to still use symbolic links, like
"/dev/mapper/test-scratch" from LVM2, which is required for fstests runs
with LVM2 setup.

And for really weird names, like the above case, we solve it to
"/dev/dm-2" instead.

Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
Reported-by: Fabian Vogt <fvogt@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/volumes.c | 79 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

Comments

Filipe Manana Sept. 25, 2024, 11:04 a.m. UTC | #1
On Wed, Sep 25, 2024 at 1:06 AM Qu Wenruo <wqu@suse.com> wrote:
>
> [PROBLEM]
> Currently btrfs accepts any file path for its device, resulting some
> weird situation:
>
>  # ./mount_by_fd /dev/test/scratch1  /mnt/btrfs/
>
> The program has the following source code:
>
>  #include <fcntl.h>
>  #include <stdio.h>
>  #include <sys/mount.h>
>
>  int main(int argc, char *argv[]) {
>         int fd = open(argv[1], O_RDWR);
>         char path[256];
>         snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
>         return mount(path, argv[2], "btrfs", 0, NULL);
>  }
>
> Then we can have the following weird device path:
>
>  BTRFS: device fsid 2378be81-fe12-46d2-a9e8-68cf08dd98d5 devid 1 transid 7 /proc/self/fd/3 (253:2) scanned by mount_by_fd (18440)
>
> Normally it's not a big deal, and later udev can trigger a device path
> rename. But if udev didn't trigger, the device path "/proc/self/fd/3"
> will show up in mtab.
>
> [CAUSE]
> For filename "/proc/self/fd/3", it means the opened file descriptor 3.
> In above case, it's exactly the device we want to open, aka points to
> "/dev/test/scratch1" which is another softlink pointing to "/dev/dm-2".
>
> Inside kernel we solve the mount source using LOOKUP_FOLLOW, which
> follows the symbolic link and grab the proper block device.
>
> But inside btrfs we also save the filename into btrfs_device::name, and
> utilize that member to report our mount source, which leads to the above
> situation.
>
> [FIX]
> Instead of unconditionally trust the path, check if the original file
> (not following the symbolic link) is inside "/dev/", if not, then
> manually lookup the path to its final destination, and use that as our
> device path.
>
> This allows us to still use symbolic links, like
> "/dev/mapper/test-scratch" from LVM2, which is required for fstests runs
> with LVM2 setup.
>
> And for really weird names, like the above case, we solve it to
> "/dev/dm-2" instead.
>
> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
> Reported-by: Fabian Vogt <fvogt@suse.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>

Reviewed-by: Filipe Manana <fdmanana@suse.com>

Looks good, thanks.

> ---
>  fs/btrfs/volumes.c | 79 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 78 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index b713e4ebb362..668138451f7c 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -732,6 +732,70 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
>         return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>  }
>
> +/*
> + * We can have very weird soft links passed in.
> + * One example is "/proc/self/fd/<fd>", which can be a soft link to
> + * a block device.
> + *
> + * But it's never a good idea to use those weird names.
> + * Here we check if the path (not following symlinks) is a good one inside
> + * "/dev/".
> + */
> +static bool is_good_dev_path(const char *dev_path)
> +{
> +       struct path path = { .mnt = NULL, .dentry = NULL };
> +       char *path_buf = NULL;
> +       char *resolved_path;
> +       bool is_good = false;
> +       int ret;
> +
> +       path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
> +       if (!path_buf)
> +               goto out;
> +
> +       /*
> +        * Do not follow soft link, just check if the original path is inside
> +        * "/dev/".
> +        */
> +       ret = kern_path(dev_path, 0, &path);
> +       if (ret)
> +               goto out;
> +       resolved_path = d_path(&path, path_buf, PATH_MAX);
> +       if (IS_ERR(resolved_path))
> +               goto out;
> +       if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
> +               goto out;
> +       is_good = true;
> +out:
> +       kfree(path_buf);
> +       path_put(&path);
> +       return is_good;
> +}
> +
> +static int get_canonical_dev_path(const char *dev_path, char *canonical)
> +{
> +       struct path path = { .mnt = NULL, .dentry = NULL };
> +       char *path_buf = NULL;
> +       char *resolved_path;
> +       int ret;
> +
> +       path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
> +       if (!path_buf) {
> +               ret = -ENOMEM;
> +               goto out;
> +       }
> +
> +       ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
> +       if (ret)
> +               goto out;
> +       resolved_path = d_path(&path, path_buf, PATH_MAX);
> +       ret = strscpy(canonical, resolved_path, PATH_MAX);
> +out:
> +       kfree(path_buf);
> +       path_put(&path);
> +       return ret;
> +}
> +
>  static bool is_same_device(struct btrfs_device *device, const char *new_path)
>  {
>         struct path old = { .mnt = NULL, .dentry = NULL };
> @@ -1408,12 +1472,23 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>         bool new_device_added = false;
>         struct btrfs_device *device = NULL;
>         struct file *bdev_file;
> +       char *canonical_path = NULL;
>         u64 bytenr;
>         dev_t devt;
>         int ret;
>
>         lockdep_assert_held(&uuid_mutex);
>
> +       if (!is_good_dev_path(path)) {
> +               canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
> +               if (canonical_path) {
> +                       ret = get_canonical_dev_path(path, canonical_path);
> +                       if (ret < 0) {
> +                               kfree(canonical_path);
> +                               canonical_path = NULL;
> +                       }
> +               }
> +       }
>         /*
>          * Avoid an exclusive open here, as the systemd-udev may initiate the
>          * device scan which may race with the user's mount or mkfs command,
> @@ -1458,7 +1533,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>                 goto free_disk_super;
>         }
>
> -       device = device_list_add(path, disk_super, &new_device_added);
> +       device = device_list_add(canonical_path ? : path, disk_super,
> +                                &new_device_added);
>         if (!IS_ERR(device) && new_device_added)
>                 btrfs_free_stale_devices(device->devt, device);
>
> @@ -1467,6 +1543,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>
>  error_bdev_put:
>         fput(bdev_file);
> +       kfree(canonical_path);
>
>         return device;
>  }
> --
> 2.46.1
>
>
Anand Jain Sept. 30, 2024, 5:26 a.m. UTC | #2
On 25/9/24 5:36 am, Qu Wenruo wrote:
> [PROBLEM]
> Currently btrfs accepts any file path for its device, resulting some
> weird situation:
> 


>   # ./mount_by_fd /dev/test/scratch1  /mnt/btrfs/
> 
> The program has the following source code:
> 
>   #include <fcntl.h>
>   #include <stdio.h>
>   #include <sys/mount.h>
> 
>   int main(int argc, char *argv[]) {
> 	int fd = open(argv[1], O_RDWR);
> 	char path[256];
> 	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
> 	return mount(path, argv[2], "btrfs", 0, NULL);
>   }
> 

Sorry for the late comments. Patches look good.

Quick question: both XFS and ext4 fail this test case—any idea why?
Can a generic/btrfs specific testcase be added to fstests?

Thanks, Anand

> Then we can have the following weird device path:
> 
>   BTRFS: device fsid 2378be81-fe12-46d2-a9e8-68cf08dd98d5 devid 1 transid 7 /proc/self/fd/3 (253:2) scanned by mount_by_fd (18440)
> 
> Normally it's not a big deal, and later udev can trigger a device path
> rename. But if udev didn't trigger, the device path "/proc/self/fd/3"
> will show up in mtab.
> 
> [CAUSE]
> For filename "/proc/self/fd/3", it means the opened file descriptor 3.
> In above case, it's exactly the device we want to open, aka points to
> "/dev/test/scratch1" which is another softlink pointing to "/dev/dm-2".
> 
> Inside kernel we solve the mount source using LOOKUP_FOLLOW, which
> follows the symbolic link and grab the proper block device.
> 
> But inside btrfs we also save the filename into btrfs_device::name, and
> utilize that member to report our mount source, which leads to the above
> situation.
> 
> [FIX]
> Instead of unconditionally trust the path, check if the original file
> (not following the symbolic link) is inside "/dev/", if not, then
> manually lookup the path to its final destination, and use that as our
> device path.
> 
> This allows us to still use symbolic links, like
> "/dev/mapper/test-scratch" from LVM2, which is required for fstests runs
> with LVM2 setup.
> 
> And for really weird names, like the above case, we solve it to
> "/dev/dm-2" instead.
> 
> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
> Reported-by: Fabian Vogt <fvogt@suse.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/volumes.c | 79 +++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 78 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index b713e4ebb362..668138451f7c 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -732,6 +732,70 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
>   	return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>   }
>   
> +/*
> + * We can have very weird soft links passed in.
> + * One example is "/proc/self/fd/<fd>", which can be a soft link to
> + * a block device.
> + *
> + * But it's never a good idea to use those weird names.
> + * Here we check if the path (not following symlinks) is a good one inside
> + * "/dev/".
> + */
> +static bool is_good_dev_path(const char *dev_path)
> +{
> +	struct path path = { .mnt = NULL, .dentry = NULL };
> +	char *path_buf = NULL;
> +	char *resolved_path;
> +	bool is_good = false;
> +	int ret;
> +
> +	path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
> +	if (!path_buf)
> +		goto out;
> +
> +	/*
> +	 * Do not follow soft link, just check if the original path is inside
> +	 * "/dev/".
> +	 */
> +	ret = kern_path(dev_path, 0, &path);
> +	if (ret)
> +		goto out;
> +	resolved_path = d_path(&path, path_buf, PATH_MAX);
> +	if (IS_ERR(resolved_path))
> +		goto out;
> +	if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
> +		goto out;
> +	is_good = true;
> +out:
> +	kfree(path_buf);
> +	path_put(&path);
> +	return is_good;
> +}
> +
> +static int get_canonical_dev_path(const char *dev_path, char *canonical)
> +{
> +	struct path path = { .mnt = NULL, .dentry = NULL };
> +	char *path_buf = NULL;
> +	char *resolved_path;
> +	int ret;
> +
> +	path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
> +	if (!path_buf) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
> +	if (ret)
> +		goto out;
> +	resolved_path = d_path(&path, path_buf, PATH_MAX);
> +	ret = strscpy(canonical, resolved_path, PATH_MAX);
> +out:
> +	kfree(path_buf);
> +	path_put(&path);
> +	return ret;
> +}
> +
>   static bool is_same_device(struct btrfs_device *device, const char *new_path)
>   {
>   	struct path old = { .mnt = NULL, .dentry = NULL };
> @@ -1408,12 +1472,23 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>   	bool new_device_added = false;
>   	struct btrfs_device *device = NULL;
>   	struct file *bdev_file;
> +	char *canonical_path = NULL;
>   	u64 bytenr;
>   	dev_t devt;
>   	int ret;
>   
>   	lockdep_assert_held(&uuid_mutex);
>   
> +	if (!is_good_dev_path(path)) {
> +		canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
> +		if (canonical_path) {
> +			ret = get_canonical_dev_path(path, canonical_path);
> +			if (ret < 0) {
> +				kfree(canonical_path);
> +				canonical_path = NULL;
> +			}
> +		}
> +	}
>   	/*
>   	 * Avoid an exclusive open here, as the systemd-udev may initiate the
>   	 * device scan which may race with the user's mount or mkfs command,
> @@ -1458,7 +1533,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>   		goto free_disk_super;
>   	}
>   
> -	device = device_list_add(path, disk_super, &new_device_added);
> +	device = device_list_add(canonical_path ? : path, disk_super,
> +				 &new_device_added);
>   	if (!IS_ERR(device) && new_device_added)
>   		btrfs_free_stale_devices(device->devt, device);
>   
> @@ -1467,6 +1543,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>   
>   error_bdev_put:
>   	fput(bdev_file);
> +	kfree(canonical_path);
>   
>   	return device;
>   }
Qu Wenruo Sept. 30, 2024, 5:31 a.m. UTC | #3
在 2024/9/30 14:56, Anand Jain 写道:
> On 25/9/24 5:36 am, Qu Wenruo wrote:
>> [PROBLEM]
>> Currently btrfs accepts any file path for its device, resulting some
>> weird situation:
>>
> 
> 
>>   # ./mount_by_fd /dev/test/scratch1  /mnt/btrfs/
>>
>> The program has the following source code:
>>
>>   #include <fcntl.h>
>>   #include <stdio.h>
>>   #include <sys/mount.h>
>>
>>   int main(int argc, char *argv[]) {
>>     int fd = open(argv[1], O_RDWR);
>>     char path[256];
>>     snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
>>     return mount(path, argv[2], "btrfs", 0, NULL);
>>   }
>>
> 
> Sorry for the late comments. Patches look good.
> 
> Quick question: both XFS and ext4 fail this test case—any idea why?

Because they are single device filesystems (except the log device), thus 
they do not need to nor are able to update their device path halfway.

> Can a generic/btrfs specific testcase be added to fstests?

I can add it as a btrfs specific one, but I guess not a generic one?

Thanks,
Qu

> 
> Thanks, Anand
> 
>> Then we can have the following weird device path:
>>
>>   BTRFS: device fsid 2378be81-fe12-46d2-a9e8-68cf08dd98d5 devid 1 
>> transid 7 /proc/self/fd/3 (253:2) scanned by mount_by_fd (18440)
>>
>> Normally it's not a big deal, and later udev can trigger a device path
>> rename. But if udev didn't trigger, the device path "/proc/self/fd/3"
>> will show up in mtab.
>>
>> [CAUSE]
>> For filename "/proc/self/fd/3", it means the opened file descriptor 3.
>> In above case, it's exactly the device we want to open, aka points to
>> "/dev/test/scratch1" which is another softlink pointing to "/dev/dm-2".
>>
>> Inside kernel we solve the mount source using LOOKUP_FOLLOW, which
>> follows the symbolic link and grab the proper block device.
>>
>> But inside btrfs we also save the filename into btrfs_device::name, and
>> utilize that member to report our mount source, which leads to the above
>> situation.
>>
>> [FIX]
>> Instead of unconditionally trust the path, check if the original file
>> (not following the symbolic link) is inside "/dev/", if not, then
>> manually lookup the path to its final destination, and use that as our
>> device path.
>>
>> This allows us to still use symbolic links, like
>> "/dev/mapper/test-scratch" from LVM2, which is required for fstests runs
>> with LVM2 setup.
>>
>> And for really weird names, like the above case, we solve it to
>> "/dev/dm-2" instead.
>>
>> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
>> Reported-by: Fabian Vogt <fvogt@suse.com>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>   fs/btrfs/volumes.c | 79 +++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 78 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index b713e4ebb362..668138451f7c 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -732,6 +732,70 @@ const u8 *btrfs_sb_fsid_ptr(const struct 
>> btrfs_super_block *sb)
>>       return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>>   }
>> +/*
>> + * We can have very weird soft links passed in.
>> + * One example is "/proc/self/fd/<fd>", which can be a soft link to
>> + * a block device.
>> + *
>> + * But it's never a good idea to use those weird names.
>> + * Here we check if the path (not following symlinks) is a good one 
>> inside
>> + * "/dev/".
>> + */
>> +static bool is_good_dev_path(const char *dev_path)
>> +{
>> +    struct path path = { .mnt = NULL, .dentry = NULL };
>> +    char *path_buf = NULL;
>> +    char *resolved_path;
>> +    bool is_good = false;
>> +    int ret;
>> +
>> +    path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
>> +    if (!path_buf)
>> +        goto out;
>> +
>> +    /*
>> +     * Do not follow soft link, just check if the original path is 
>> inside
>> +     * "/dev/".
>> +     */
>> +    ret = kern_path(dev_path, 0, &path);
>> +    if (ret)
>> +        goto out;
>> +    resolved_path = d_path(&path, path_buf, PATH_MAX);
>> +    if (IS_ERR(resolved_path))
>> +        goto out;
>> +    if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
>> +        goto out;
>> +    is_good = true;
>> +out:
>> +    kfree(path_buf);
>> +    path_put(&path);
>> +    return is_good;
>> +}
>> +
>> +static int get_canonical_dev_path(const char *dev_path, char *canonical)
>> +{
>> +    struct path path = { .mnt = NULL, .dentry = NULL };
>> +    char *path_buf = NULL;
>> +    char *resolved_path;
>> +    int ret;
>> +
>> +    path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
>> +    if (!path_buf) {
>> +        ret = -ENOMEM;
>> +        goto out;
>> +    }
>> +
>> +    ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
>> +    if (ret)
>> +        goto out;
>> +    resolved_path = d_path(&path, path_buf, PATH_MAX);
>> +    ret = strscpy(canonical, resolved_path, PATH_MAX);
>> +out:
>> +    kfree(path_buf);
>> +    path_put(&path);
>> +    return ret;
>> +}
>> +
>>   static bool is_same_device(struct btrfs_device *device, const char 
>> *new_path)
>>   {
>>       struct path old = { .mnt = NULL, .dentry = NULL };
>> @@ -1408,12 +1472,23 @@ struct btrfs_device 
>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>       bool new_device_added = false;
>>       struct btrfs_device *device = NULL;
>>       struct file *bdev_file;
>> +    char *canonical_path = NULL;
>>       u64 bytenr;
>>       dev_t devt;
>>       int ret;
>>       lockdep_assert_held(&uuid_mutex);
>> +    if (!is_good_dev_path(path)) {
>> +        canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
>> +        if (canonical_path) {
>> +            ret = get_canonical_dev_path(path, canonical_path);
>> +            if (ret < 0) {
>> +                kfree(canonical_path);
>> +                canonical_path = NULL;
>> +            }
>> +        }
>> +    }
>>       /*
>>        * Avoid an exclusive open here, as the systemd-udev may 
>> initiate the
>>        * device scan which may race with the user's mount or mkfs 
>> command,
>> @@ -1458,7 +1533,8 @@ struct btrfs_device *btrfs_scan_one_device(const 
>> char *path, blk_mode_t flags,
>>           goto free_disk_super;
>>       }
>> -    device = device_list_add(path, disk_super, &new_device_added);
>> +    device = device_list_add(canonical_path ? : path, disk_super,
>> +                 &new_device_added);
>>       if (!IS_ERR(device) && new_device_added)
>>           btrfs_free_stale_devices(device->devt, device);
>> @@ -1467,6 +1543,7 @@ struct btrfs_device *btrfs_scan_one_device(const 
>> char *path, blk_mode_t flags,
>>   error_bdev_put:
>>       fput(bdev_file);
>> +    kfree(canonical_path);
>>       return device;
>>   }
>
Anand Jain Sept. 30, 2024, 12:40 p.m. UTC | #4
On 30/9/24 11:01 am, Qu Wenruo wrote:
> 
> 
> 在 2024/9/30 14:56, Anand Jain 写道:
>> On 25/9/24 5:36 am, Qu Wenruo wrote:
>>> [PROBLEM]
>>> Currently btrfs accepts any file path for its device, resulting some
>>> weird situation:
>>>
>>
>>
>>>   # ./mount_by_fd /dev/test/scratch1  /mnt/btrfs/
>>>
>>> The program has the following source code:
>>>
>>>   #include <fcntl.h>
>>>   #include <stdio.h>
>>>   #include <sys/mount.h>
>>>
>>>   int main(int argc, char *argv[]) {
>>>     int fd = open(argv[1], O_RDWR);
>>>     char path[256];
>>>     snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
>>>     return mount(path, argv[2], "btrfs", 0, NULL);
>>>   }
>>>
>>
>> Sorry for the late comments. Patches look good.
>>

>> Quick question: both XFS and ext4 fail this test case—any idea why?
> 
> Because they are single device filesystems (except the log device), thus 
> they do not need to nor are able to update their device path halfway.
> 

How is a single or multi-device filesystem relevant here?

>> Can a generic/btrfs specific testcase be added to fstests?
> 
> I can add it as a btrfs specific one, but I guess not a generic one?
> 

Yes, you can make it a Btrfs-specific test case, as XFS and EXT4
currently fail on this test.

Thanks, Anand

> Thanks,
> Qu
> 
>>
>> Thanks, Anand
>>
>>> Then we can have the following weird device path:
>>>
>>>   BTRFS: device fsid 2378be81-fe12-46d2-a9e8-68cf08dd98d5 devid 1 
>>> transid 7 /proc/self/fd/3 (253:2) scanned by mount_by_fd (18440)
>>>
>>> Normally it's not a big deal, and later udev can trigger a device path
>>> rename. But if udev didn't trigger, the device path "/proc/self/fd/3"
>>> will show up in mtab.
>>>
>>> [CAUSE]
>>> For filename "/proc/self/fd/3", it means the opened file descriptor 3.
>>> In above case, it's exactly the device we want to open, aka points to
>>> "/dev/test/scratch1" which is another softlink pointing to "/dev/dm-2".
>>>
>>> Inside kernel we solve the mount source using LOOKUP_FOLLOW, which
>>> follows the symbolic link and grab the proper block device.
>>>
>>> But inside btrfs we also save the filename into btrfs_device::name, and
>>> utilize that member to report our mount source, which leads to the above
>>> situation.
>>>
>>> [FIX]
>>> Instead of unconditionally trust the path, check if the original file
>>> (not following the symbolic link) is inside "/dev/", if not, then
>>> manually lookup the path to its final destination, and use that as our
>>> device path.
>>>
>>> This allows us to still use symbolic links, like
>>> "/dev/mapper/test-scratch" from LVM2, which is required for fstests runs
>>> with LVM2 setup.
>>>
>>> And for really weird names, like the above case, we solve it to
>>> "/dev/dm-2" instead.
>>>
>>> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
>>> Reported-by: Fabian Vogt <fvogt@suse.com>
>>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>>> ---
>>>   fs/btrfs/volumes.c | 79 +++++++++++++++++++++++++++++++++++++++++++++-
>>>   1 file changed, 78 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>> index b713e4ebb362..668138451f7c 100644
>>> --- a/fs/btrfs/volumes.c
>>> +++ b/fs/btrfs/volumes.c
>>> @@ -732,6 +732,70 @@ const u8 *btrfs_sb_fsid_ptr(const struct 
>>> btrfs_super_block *sb)
>>>       return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>>>   }
>>> +/*
>>> + * We can have very weird soft links passed in.
>>> + * One example is "/proc/self/fd/<fd>", which can be a soft link to
>>> + * a block device.
>>> + *
>>> + * But it's never a good idea to use those weird names.
>>> + * Here we check if the path (not following symlinks) is a good one 
>>> inside
>>> + * "/dev/".
>>> + */
>>> +static bool is_good_dev_path(const char *dev_path)
>>> +{
>>> +    struct path path = { .mnt = NULL, .dentry = NULL };
>>> +    char *path_buf = NULL;
>>> +    char *resolved_path;
>>> +    bool is_good = false;
>>> +    int ret;
>>> +
>>> +    path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
>>> +    if (!path_buf)
>>> +        goto out;
>>> +
>>> +    /*
>>> +     * Do not follow soft link, just check if the original path is 
>>> inside
>>> +     * "/dev/".
>>> +     */
>>> +    ret = kern_path(dev_path, 0, &path);
>>> +    if (ret)
>>> +        goto out;
>>> +    resolved_path = d_path(&path, path_buf, PATH_MAX);
>>> +    if (IS_ERR(resolved_path))
>>> +        goto out;
>>> +    if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
>>> +        goto out;
>>> +    is_good = true;
>>> +out:
>>> +    kfree(path_buf);
>>> +    path_put(&path);
>>> +    return is_good;
>>> +}
>>> +
>>> +static int get_canonical_dev_path(const char *dev_path, char 
>>> *canonical)
>>> +{
>>> +    struct path path = { .mnt = NULL, .dentry = NULL };
>>> +    char *path_buf = NULL;
>>> +    char *resolved_path;
>>> +    int ret;
>>> +
>>> +    path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
>>> +    if (!path_buf) {
>>> +        ret = -ENOMEM;
>>> +        goto out;
>>> +    }
>>> +
>>> +    ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
>>> +    if (ret)
>>> +        goto out;
>>> +    resolved_path = d_path(&path, path_buf, PATH_MAX);
>>> +    ret = strscpy(canonical, resolved_path, PATH_MAX);
>>> +out:
>>> +    kfree(path_buf);
>>> +    path_put(&path);
>>> +    return ret;
>>> +}
>>> +
>>>   static bool is_same_device(struct btrfs_device *device, const char 
>>> *new_path)
>>>   {
>>>       struct path old = { .mnt = NULL, .dentry = NULL };
>>> @@ -1408,12 +1472,23 @@ struct btrfs_device 
>>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>>       bool new_device_added = false;
>>>       struct btrfs_device *device = NULL;
>>>       struct file *bdev_file;
>>> +    char *canonical_path = NULL;
>>>       u64 bytenr;
>>>       dev_t devt;
>>>       int ret;
>>>       lockdep_assert_held(&uuid_mutex);
>>> +    if (!is_good_dev_path(path)) {
>>> +        canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
>>> +        if (canonical_path) {
>>> +            ret = get_canonical_dev_path(path, canonical_path);
>>> +            if (ret < 0) {
>>> +                kfree(canonical_path);
>>> +                canonical_path = NULL;
>>> +            }
>>> +        }
>>> +    }
>>>       /*
>>>        * Avoid an exclusive open here, as the systemd-udev may 
>>> initiate the
>>>        * device scan which may race with the user's mount or mkfs 
>>> command,
>>> @@ -1458,7 +1533,8 @@ struct btrfs_device 
>>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>>           goto free_disk_super;
>>>       }
>>> -    device = device_list_add(path, disk_super, &new_device_added);
>>> +    device = device_list_add(canonical_path ? : path, disk_super,
>>> +                 &new_device_added);
>>>       if (!IS_ERR(device) && new_device_added)
>>>           btrfs_free_stale_devices(device->devt, device);
>>> @@ -1467,6 +1543,7 @@ struct btrfs_device 
>>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>>   error_bdev_put:
>>>       fput(bdev_file);
>>> +    kfree(canonical_path);
>>>       return device;
>>>   }
>>
>
Qu Wenruo Sept. 30, 2024, 9:42 p.m. UTC | #5
在 2024/9/30 22:10, Anand Jain 写道:
[...]
>>> Quick question: both XFS and ext4 fail this test case—any idea why?
>>
>> Because they are single device filesystems (except the log device),
>> thus they do not need to nor are able to update their device path
>> halfway.
>>
>
> How is a single or multi-device filesystem relevant here?

Since it's a single device fs, they won't bother about device scanning
nor updating their device path to handle re-occuring device.

Thus they just accept whatever path passed in.

Thanks,
Qu
>
>>> Can a generic/btrfs specific testcase be added to fstests?
>>
>> I can add it as a btrfs specific one, but I guess not a generic one?
>>
>
> Yes, you can make it a Btrfs-specific test case, as XFS and EXT4
> currently fail on this test.
>
> Thanks, Anand
>
>> Thanks,
>> Qu
>>
>>>
>>> Thanks, Anand
>>>
>>>> Then we can have the following weird device path:
>>>>
>>>>   BTRFS: device fsid 2378be81-fe12-46d2-a9e8-68cf08dd98d5 devid 1
>>>> transid 7 /proc/self/fd/3 (253:2) scanned by mount_by_fd (18440)
>>>>
>>>> Normally it's not a big deal, and later udev can trigger a device path
>>>> rename. But if udev didn't trigger, the device path "/proc/self/fd/3"
>>>> will show up in mtab.
>>>>
>>>> [CAUSE]
>>>> For filename "/proc/self/fd/3", it means the opened file descriptor 3.
>>>> In above case, it's exactly the device we want to open, aka points to
>>>> "/dev/test/scratch1" which is another softlink pointing to "/dev/dm-2".
>>>>
>>>> Inside kernel we solve the mount source using LOOKUP_FOLLOW, which
>>>> follows the symbolic link and grab the proper block device.
>>>>
>>>> But inside btrfs we also save the filename into btrfs_device::name, and
>>>> utilize that member to report our mount source, which leads to the
>>>> above
>>>> situation.
>>>>
>>>> [FIX]
>>>> Instead of unconditionally trust the path, check if the original file
>>>> (not following the symbolic link) is inside "/dev/", if not, then
>>>> manually lookup the path to its final destination, and use that as our
>>>> device path.
>>>>
>>>> This allows us to still use symbolic links, like
>>>> "/dev/mapper/test-scratch" from LVM2, which is required for fstests
>>>> runs
>>>> with LVM2 setup.
>>>>
>>>> And for really weird names, like the above case, we solve it to
>>>> "/dev/dm-2" instead.
>>>>
>>>> Link: https://bugzilla.suse.com/show_bug.cgi?id=1230641
>>>> Reported-by: Fabian Vogt <fvogt@suse.com>
>>>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>>>> ---
>>>>   fs/btrfs/volumes.c | 79 ++++++++++++++++++++++++++++++++++++++++++
>>>> +++-
>>>>   1 file changed, 78 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>>> index b713e4ebb362..668138451f7c 100644
>>>> --- a/fs/btrfs/volumes.c
>>>> +++ b/fs/btrfs/volumes.c
>>>> @@ -732,6 +732,70 @@ const u8 *btrfs_sb_fsid_ptr(const struct
>>>> btrfs_super_block *sb)
>>>>       return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
>>>>   }
>>>> +/*
>>>> + * We can have very weird soft links passed in.
>>>> + * One example is "/proc/self/fd/<fd>", which can be a soft link to
>>>> + * a block device.
>>>> + *
>>>> + * But it's never a good idea to use those weird names.
>>>> + * Here we check if the path (not following symlinks) is a good one
>>>> inside
>>>> + * "/dev/".
>>>> + */
>>>> +static bool is_good_dev_path(const char *dev_path)
>>>> +{
>>>> +    struct path path = { .mnt = NULL, .dentry = NULL };
>>>> +    char *path_buf = NULL;
>>>> +    char *resolved_path;
>>>> +    bool is_good = false;
>>>> +    int ret;
>>>> +
>>>> +    path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
>>>> +    if (!path_buf)
>>>> +        goto out;
>>>> +
>>>> +    /*
>>>> +     * Do not follow soft link, just check if the original path is
>>>> inside
>>>> +     * "/dev/".
>>>> +     */
>>>> +    ret = kern_path(dev_path, 0, &path);
>>>> +    if (ret)
>>>> +        goto out;
>>>> +    resolved_path = d_path(&path, path_buf, PATH_MAX);
>>>> +    if (IS_ERR(resolved_path))
>>>> +        goto out;
>>>> +    if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
>>>> +        goto out;
>>>> +    is_good = true;
>>>> +out:
>>>> +    kfree(path_buf);
>>>> +    path_put(&path);
>>>> +    return is_good;
>>>> +}
>>>> +
>>>> +static int get_canonical_dev_path(const char *dev_path, char
>>>> *canonical)
>>>> +{
>>>> +    struct path path = { .mnt = NULL, .dentry = NULL };
>>>> +    char *path_buf = NULL;
>>>> +    char *resolved_path;
>>>> +    int ret;
>>>> +
>>>> +    path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
>>>> +    if (!path_buf) {
>>>> +        ret = -ENOMEM;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
>>>> +    if (ret)
>>>> +        goto out;
>>>> +    resolved_path = d_path(&path, path_buf, PATH_MAX);
>>>> +    ret = strscpy(canonical, resolved_path, PATH_MAX);
>>>> +out:
>>>> +    kfree(path_buf);
>>>> +    path_put(&path);
>>>> +    return ret;
>>>> +}
>>>> +
>>>>   static bool is_same_device(struct btrfs_device *device, const char
>>>> *new_path)
>>>>   {
>>>>       struct path old = { .mnt = NULL, .dentry = NULL };
>>>> @@ -1408,12 +1472,23 @@ struct btrfs_device
>>>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>>>       bool new_device_added = false;
>>>>       struct btrfs_device *device = NULL;
>>>>       struct file *bdev_file;
>>>> +    char *canonical_path = NULL;
>>>>       u64 bytenr;
>>>>       dev_t devt;
>>>>       int ret;
>>>>       lockdep_assert_held(&uuid_mutex);
>>>> +    if (!is_good_dev_path(path)) {
>>>> +        canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
>>>> +        if (canonical_path) {
>>>> +            ret = get_canonical_dev_path(path, canonical_path);
>>>> +            if (ret < 0) {
>>>> +                kfree(canonical_path);
>>>> +                canonical_path = NULL;
>>>> +            }
>>>> +        }
>>>> +    }
>>>>       /*
>>>>        * Avoid an exclusive open here, as the systemd-udev may
>>>> initiate the
>>>>        * device scan which may race with the user's mount or mkfs
>>>> command,
>>>> @@ -1458,7 +1533,8 @@ struct btrfs_device
>>>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>>>           goto free_disk_super;
>>>>       }
>>>> -    device = device_list_add(path, disk_super, &new_device_added);
>>>> +    device = device_list_add(canonical_path ? : path, disk_super,
>>>> +                 &new_device_added);
>>>>       if (!IS_ERR(device) && new_device_added)
>>>>           btrfs_free_stale_devices(device->devt, device);
>>>> @@ -1467,6 +1543,7 @@ struct btrfs_device
>>>> *btrfs_scan_one_device(const char *path, blk_mode_t flags,
>>>>   error_bdev_put:
>>>>       fput(bdev_file);
>>>> +    kfree(canonical_path);
>>>>       return device;
>>>>   }
>>>
>>
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b713e4ebb362..668138451f7c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -732,6 +732,70 @@  const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
 	return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
 }
 
+/*
+ * We can have very weird soft links passed in.
+ * One example is "/proc/self/fd/<fd>", which can be a soft link to
+ * a block device.
+ *
+ * But it's never a good idea to use those weird names.
+ * Here we check if the path (not following symlinks) is a good one inside
+ * "/dev/".
+ */
+static bool is_good_dev_path(const char *dev_path)
+{
+	struct path path = { .mnt = NULL, .dentry = NULL };
+	char *path_buf = NULL;
+	char *resolved_path;
+	bool is_good = false;
+	int ret;
+
+	path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (!path_buf)
+		goto out;
+
+	/*
+	 * Do not follow soft link, just check if the original path is inside
+	 * "/dev/".
+	 */
+	ret = kern_path(dev_path, 0, &path);
+	if (ret)
+		goto out;
+	resolved_path = d_path(&path, path_buf, PATH_MAX);
+	if (IS_ERR(resolved_path))
+		goto out;
+	if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
+		goto out;
+	is_good = true;
+out:
+	kfree(path_buf);
+	path_put(&path);
+	return is_good;
+}
+
+static int get_canonical_dev_path(const char *dev_path, char *canonical)
+{
+	struct path path = { .mnt = NULL, .dentry = NULL };
+	char *path_buf = NULL;
+	char *resolved_path;
+	int ret;
+
+	path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (!path_buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
+	if (ret)
+		goto out;
+	resolved_path = d_path(&path, path_buf, PATH_MAX);
+	ret = strscpy(canonical, resolved_path, PATH_MAX);
+out:
+	kfree(path_buf);
+	path_put(&path);
+	return ret;
+}
+
 static bool is_same_device(struct btrfs_device *device, const char *new_path)
 {
 	struct path old = { .mnt = NULL, .dentry = NULL };
@@ -1408,12 +1472,23 @@  struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
 	bool new_device_added = false;
 	struct btrfs_device *device = NULL;
 	struct file *bdev_file;
+	char *canonical_path = NULL;
 	u64 bytenr;
 	dev_t devt;
 	int ret;
 
 	lockdep_assert_held(&uuid_mutex);
 
+	if (!is_good_dev_path(path)) {
+		canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
+		if (canonical_path) {
+			ret = get_canonical_dev_path(path, canonical_path);
+			if (ret < 0) {
+				kfree(canonical_path);
+				canonical_path = NULL;
+			}
+		}
+	}
 	/*
 	 * Avoid an exclusive open here, as the systemd-udev may initiate the
 	 * device scan which may race with the user's mount or mkfs command,
@@ -1458,7 +1533,8 @@  struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
 		goto free_disk_super;
 	}
 
-	device = device_list_add(path, disk_super, &new_device_added);
+	device = device_list_add(canonical_path ? : path, disk_super,
+				 &new_device_added);
 	if (!IS_ERR(device) && new_device_added)
 		btrfs_free_stale_devices(device->devt, device);
 
@@ -1467,6 +1543,7 @@  struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
 
 error_bdev_put:
 	fput(bdev_file);
+	kfree(canonical_path);
 
 	return device;
 }