diff mbox series

[v5] btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl

Message ID 20200706150924.40218-1-johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series [v5] btrfs: pass checksum type via BTRFS_IOC_FS_INFO ioctl | expand

Commit Message

Johannes Thumshirn July 6, 2020, 3:09 p.m. UTC
With the recent addition of filesystem checksum types other than CRC32c,
it is not anymore hard-coded which checksum type a btrfs filesystem uses.

Up to now there is no good way to read the filesystem checksum, apart from
reading the filesystem UUID and then query sysfs for the checksum type.

Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl
command which usually is used to query filesystem features. Also add a
flags member indicating that the kernel responded with a set csum_type and
csum_size field.

For compatibility reasons, only return the csum_type and csum_size if the
BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE flag was passed to the kernel. Also
clear any unknown flags so we don't pass false positives to user-space
newer than the kernel.

To simplify further additions to the ioctl, also switch the padding to a
u8 array. Pahole was used to verify the result of this switch:

pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
struct btrfs_ioctl_fs_info_args {
        __u64                      max_id;               /*     0     8 */
        __u64                      num_devices;          /*     8     8 */
        __u8                       fsid[16];             /*    16    16 */
        __u32                      nodesize;             /*    32     4 */
        __u32                      sectorsize;           /*    36     4 */
        __u32                      clone_alignment;      /*    40     4 */
        __u32                      flags;                /*    44     4 */
        __u16                      csum_type;            /*    48     2 */
        __u16                      csum_size;            /*    50     2 */
        __u8                       reserved[972];        /*    52   972 */

        /* size: 1024, cachelines: 16, members: 10 */
};

Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
CC: stable@vger.kernel.org # 5.5+
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
Changes to v4:
* zero all data passed in from user-space
  (I've chosen this variant as I think it is the most complete)

Changes to v3:
* make flags in/out (David)
* make csum return opt-in (Hans)

Changes to v2:
* add additional csum_size (David)
* rename flag value to BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE to reflect
  additional size

Changes to v1:
* add 'out' comment to be consistent (Hans)
* remove le16_to_cpu() (kbuild robot)
* switch padding to be all u8 (David)
---
 fs/btrfs/ioctl.c           | 16 +++++++++++++---
 include/uapi/linux/btrfs.h | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 5 deletions(-)

Comments

Anand Jain July 7, 2020, 7:20 a.m. UTC | #1
> pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
> struct btrfs_ioctl_fs_info_args {
>          __u64                      max_id;               /*     0     8 */
>          __u64                      num_devices;          /*     8     8 */
>          __u8                       fsid[16];             /*    16    16 */
>          __u32                      nodesize;             /*    32     4 */
>          __u32                      sectorsize;           /*    36     4 */
>          __u32                      clone_alignment;      /*    40     4 */

>          __u32                      flags;                /*    44     4 */
>          __u16                      csum_type;            /*    48     2 */
>          __u16                      csum_size;            /*    50     2 */

>          __u8                       reserved[972];        /*    52   972 */
> 
>          /* size: 1024, cachelines: 16, members: 10 */
> };


Newer progs shall read the value of flags/csum_type/csum_size from the 
updated kernel and zeros from the older kernel. Nice fix.

Reviewed-by: Anand Jain <anand.jain@oracle.com>

Thanks.
David Sterba July 8, 2020, 4:28 p.m. UTC | #2
On Tue, Jul 07, 2020 at 12:09:24AM +0900, Johannes Thumshirn wrote:
> With the recent addition of filesystem checksum types other than CRC32c,
> it is not anymore hard-coded which checksum type a btrfs filesystem uses.
> 
> Up to now there is no good way to read the filesystem checksum, apart from
> reading the filesystem UUID and then query sysfs for the checksum type.
> 
> Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl
> command which usually is used to query filesystem features. Also add a
> flags member indicating that the kernel responded with a set csum_type and
> csum_size field.
> 
> For compatibility reasons, only return the csum_type and csum_size if the
> BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE flag was passed to the kernel. Also
> clear any unknown flags so we don't pass false positives to user-space
> newer than the kernel.
> 
> To simplify further additions to the ioctl, also switch the padding to a
> u8 array. Pahole was used to verify the result of this switch:
> 
> pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
> struct btrfs_ioctl_fs_info_args {
>         __u64                      max_id;               /*     0     8 */
>         __u64                      num_devices;          /*     8     8 */
>         __u8                       fsid[16];             /*    16    16 */
>         __u32                      nodesize;             /*    32     4 */
>         __u32                      sectorsize;           /*    36     4 */
>         __u32                      clone_alignment;      /*    40     4 */
>         __u32                      flags;                /*    44     4 */
>         __u16                      csum_type;            /*    48     2 */
>         __u16                      csum_size;            /*    50     2 */
>         __u8                       reserved[972];        /*    52   972 */
> 
>         /* size: 1024, cachelines: 16, members: 10 */
> };
> 
> Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
> Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
> CC: stable@vger.kernel.org # 5.5+
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> Changes to v4:
> * zero all data passed in from user-space
>   (I've chosen this variant as I think it is the most complete)

I'll have to refresh the whole evolution since v1 but the memset sounds
reasonable to me.
David Sterba July 9, 2020, 2:52 p.m. UTC | #3
On Tue, Jul 07, 2020 at 12:09:24AM +0900, Johannes Thumshirn wrote:
> With the recent addition of filesystem checksum types other than CRC32c,
> it is not anymore hard-coded which checksum type a btrfs filesystem uses.
> 
> Up to now there is no good way to read the filesystem checksum, apart from
> reading the filesystem UUID and then query sysfs for the checksum type.
> 
> Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl
> command which usually is used to query filesystem features. Also add a
> flags member indicating that the kernel responded with a set csum_type and
> csum_size field.
> 
> For compatibility reasons, only return the csum_type and csum_size if the
> BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE flag was passed to the kernel. Also
> clear any unknown flags so we don't pass false positives to user-space
> newer than the kernel.
> 
> To simplify further additions to the ioctl, also switch the padding to a
> u8 array. Pahole was used to verify the result of this switch:
> 
> pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
> struct btrfs_ioctl_fs_info_args {
>         __u64                      max_id;               /*     0     8 */
>         __u64                      num_devices;          /*     8     8 */
>         __u8                       fsid[16];             /*    16    16 */
>         __u32                      nodesize;             /*    32     4 */
>         __u32                      sectorsize;           /*    36     4 */
>         __u32                      clone_alignment;      /*    40     4 */
>         __u32                      flags;                /*    44     4 */
>         __u16                      csum_type;            /*    48     2 */
>         __u16                      csum_size;            /*    50     2 */
>         __u8                       reserved[972];        /*    52   972 */
> 
>         /* size: 1024, cachelines: 16, members: 10 */
> };
> 
> Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
> Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
> CC: stable@vger.kernel.org # 5.5+
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> Changes to v4:
> * zero all data passed in from user-space
>   (I've chosen this variant as I think it is the most complete)
> 
> Changes to v3:
> * make flags in/out (David)
> * make csum return opt-in (Hans)
> 
> Changes to v2:
> * add additional csum_size (David)
> * rename flag value to BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE to reflect
>   additional size
> 
> Changes to v1:
> * add 'out' comment to be consistent (Hans)
> * remove le16_to_cpu() (kbuild robot)
> * switch padding to be all u8 (David)
> ---
>  fs/btrfs/ioctl.c           | 16 +++++++++++++---
>  include/uapi/linux/btrfs.h | 14 ++++++++++++--
>  2 files changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index ab34179d7cbc..df8a6ba91055 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3217,11 +3217,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
>  	struct btrfs_ioctl_fs_info_args *fi_args;
>  	struct btrfs_device *device;
>  	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +	u32 inflags;
>  	int ret = 0;
>  
> -	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
> -	if (!fi_args)
> -		return -ENOMEM;
> +	fi_args = memdup_user(arg, sizeof(*fi_args));
> +	if (IS_ERR(fi_args))
> +		return PTR_ERR(fi_args);
> +
> +	inflags = fi_args->flags;
> +	memset(fi_args, 0, sizeof(*fi_args));
>  
>  	rcu_read_lock();
>  	fi_args->num_devices = fs_devices->num_devices;
> @@ -3237,6 +3241,12 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
>  	fi_args->sectorsize = fs_info->sectorsize;
>  	fi_args->clone_alignment = fs_info->sectorsize;
>  
> +	if (inflags & BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE) {
> +		fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
> +		fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
> +		fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE;
> +	}

That's safe, but we'll have to add a new flag for all new members and we
have already 2 ready (generation, metadata_uuid) and ioctl user has to
set the bits. Minor inconvenience, in exchange for future extensions, so
let it be.

> +
>  	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
>  		ret = -EFAULT;
>  
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index e6b6cb0f8bc6..c130eaea416e 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -250,10 +250,20 @@ struct btrfs_ioctl_fs_info_args {
>  	__u32 nodesize;				/* out */
>  	__u32 sectorsize;			/* out */
>  	__u32 clone_alignment;			/* out */
> -	__u32 reserved32;
> -	__u64 reserved[122];			/* pad to 1k */
> +	__u32 flags;				/* in/out */
> +	__u16 csum_type;			/* out */
> +	__u16 csum_size;			/* out */
> +	__u8 reserved[972];			/* pad to 1k */
>  };
>  
> +/*
> + * fs_info ioctl flags
> + *
> + * Used by:
> + * struct btrfs_ioctl_fs_info_args
> + */
> +#define BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE		(1 << 0)

Flags should be defined before the structure, but that I can fix up.
Hans van Kranenburg July 9, 2020, 4:20 p.m. UTC | #4
On 7/9/20 4:52 PM, David Sterba wrote:
> On Tue, Jul 07, 2020 at 12:09:24AM +0900, Johannes Thumshirn wrote:
>> With the recent addition of filesystem checksum types other than CRC32c,
>> it is not anymore hard-coded which checksum type a btrfs filesystem uses.
>>
>> Up to now there is no good way to read the filesystem checksum, apart from
>> reading the filesystem UUID and then query sysfs for the checksum type.
>>
>> Add a new csum_type and csum_size fields to the BTRFS_IOC_FS_INFO ioctl
>> command which usually is used to query filesystem features. Also add a
>> flags member indicating that the kernel responded with a set csum_type and
>> csum_size field.
>>
>> For compatibility reasons, only return the csum_type and csum_size if the
>> BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE flag was passed to the kernel. Also
>> clear any unknown flags so we don't pass false positives to user-space
>> newer than the kernel.
>>
>> To simplify further additions to the ioctl, also switch the padding to a
>> u8 array. Pahole was used to verify the result of this switch:
>>
>> pahole -C btrfs_ioctl_fs_info_args fs/btrfs/btrfs.ko
>> struct btrfs_ioctl_fs_info_args {
>>         __u64                      max_id;               /*     0     8 */
>>         __u64                      num_devices;          /*     8     8 */
>>         __u8                       fsid[16];             /*    16    16 */
>>         __u32                      nodesize;             /*    32     4 */
>>         __u32                      sectorsize;           /*    36     4 */
>>         __u32                      clone_alignment;      /*    40     4 */
>>         __u32                      flags;                /*    44     4 */
>>         __u16                      csum_type;            /*    48     2 */
>>         __u16                      csum_size;            /*    50     2 */
>>         __u8                       reserved[972];        /*    52   972 */
>>
>>         /* size: 1024, cachelines: 16, members: 10 */
>> };
>>
>> Fixes: 3951e7f050ac ("btrfs: add xxhash64 to checksumming algorithms")
>> Fixes: 3831bf0094ab ("btrfs: add sha256 to checksumming algorithm")
>> CC: stable@vger.kernel.org # 5.5+
>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>> ---
>> Changes to v4:
>> * zero all data passed in from user-space
>>   (I've chosen this variant as I think it is the most complete)

Yes, I think that's a good choice.

We still have to remember that the specifics of what's being done here
is only applicable for this scenario:

* ioctl did never check incoming data to be zeroed
* we take advantage of that missing check (pfew), so that we can add a
new flags-in field
* setting requested flags will not lead to an error on an older running
kernel, and the returned values will by accident cleanly signal that
none of the new features are supported (less developer annoyance at the
calling side)
* kernel code resets the requested flags to what is understood and
returned, so it does not matter if a buffer full of garbage was provided
* the flags are *only* used to request additional buffer contents, they
are *NOT* used to change the behavior of the functionality. this is a
key thing that allows the whole thing to work out.

What I tried to say in earlier messages is, that, it turns out that best
practices might not be as simple as "for a new ioctl always check
incoming buffer to be zeroed and refuse otherwise!"... because being
able to do the above is actually pretty convenient. :)

So each time a new one or an extension happens, it's good to really
think through about what's 'at stake'.

>> Changes to v3:
>> * make flags in/out (David)
>> * make csum return opt-in (Hans)
>>
>> Changes to v2:
>> * add additional csum_size (David)
>> * rename flag value to BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE to reflect
>>   additional size
>>
>> Changes to v1:
>> * add 'out' comment to be consistent (Hans)
>> * remove le16_to_cpu() (kbuild robot)
>> * switch padding to be all u8 (David)
>> ---
>>  fs/btrfs/ioctl.c           | 16 +++++++++++++---
>>  include/uapi/linux/btrfs.h | 14 ++++++++++++--
>>  2 files changed, 25 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index ab34179d7cbc..df8a6ba91055 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -3217,11 +3217,15 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
>>  	struct btrfs_ioctl_fs_info_args *fi_args;
>>  	struct btrfs_device *device;
>>  	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>> +	u32 inflags;
>>  	int ret = 0;
>>  
>> -	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
>> -	if (!fi_args)
>> -		return -ENOMEM;
>> +	fi_args = memdup_user(arg, sizeof(*fi_args));
>> +	if (IS_ERR(fi_args))
>> +		return PTR_ERR(fi_args);
>> +
>> +	inflags = fi_args->flags;
>> +	memset(fi_args, 0, sizeof(*fi_args));
>>  
>>  	rcu_read_lock();
>>  	fi_args->num_devices = fs_devices->num_devices;
>> @@ -3237,6 +3241,12 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
>>  	fi_args->sectorsize = fs_info->sectorsize;
>>  	fi_args->clone_alignment = fs_info->sectorsize;
>>  
>> +	if (inflags & BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE) {
>> +		fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
>> +		fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
>> +		fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE;
>> +	}
> 
> That's safe, but we'll have to add a new flag for all new members and we
> have already 2 ready (generation, metadata_uuid) and ioctl user has to
> set the bits. Minor inconvenience, in exchange for future extensions, so
> let it be.

I think I'm just gonna send a buffer set to all ones for FS_INFO from
now on. :) It will automatically get me all new future stuff.

>> +
>>  	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
>>  		ret = -EFAULT;
>>  
>> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
>> index e6b6cb0f8bc6..c130eaea416e 100644
>> --- a/include/uapi/linux/btrfs.h
>> +++ b/include/uapi/linux/btrfs.h
>> @@ -250,10 +250,20 @@ struct btrfs_ioctl_fs_info_args {
>>  	__u32 nodesize;				/* out */
>>  	__u32 sectorsize;			/* out */
>>  	__u32 clone_alignment;			/* out */
>> -	__u32 reserved32;
>> -	__u64 reserved[122];			/* pad to 1k */
>> +	__u32 flags;				/* in/out */
>> +	__u16 csum_type;			/* out */
>> +	__u16 csum_size;			/* out */
>> +	__u8 reserved[972];			/* pad to 1k */
>>  };
>>  
>> +/*
>> + * fs_info ioctl flags
>> + *
>> + * Used by:
>> + * struct btrfs_ioctl_fs_info_args
>> + */
>> +#define BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE		(1 << 0)
> 
> Flags should be defined before the structure, but that I can fix up.
> 

Next task is to write changes for strace:

https://btrfs.wiki.kernel.org/index.php/Development_notes#Adding_a_new_ioctl.2C_extending_an_existing_one


K
Johannes Thumshirn July 10, 2020, 7:16 a.m. UTC | #5
On 09/07/2020 18:20, Hans van Kranenburg wrote:
[...]

> Next task is to write changes for strace:
> 
> https://btrfs.wiki.kernel.org/index.php/Development_notes#Adding_a_new_ioctl.2C_extending_an_existing_one

Yes I do have code for v1 of this for strace, need to adopt it.
diff mbox series

Patch

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ab34179d7cbc..df8a6ba91055 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3217,11 +3217,15 @@  static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
 	struct btrfs_ioctl_fs_info_args *fi_args;
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	u32 inflags;
 	int ret = 0;
 
-	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
-	if (!fi_args)
-		return -ENOMEM;
+	fi_args = memdup_user(arg, sizeof(*fi_args));
+	if (IS_ERR(fi_args))
+		return PTR_ERR(fi_args);
+
+	inflags = fi_args->flags;
+	memset(fi_args, 0, sizeof(*fi_args));
 
 	rcu_read_lock();
 	fi_args->num_devices = fs_devices->num_devices;
@@ -3237,6 +3241,12 @@  static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
 	fi_args->sectorsize = fs_info->sectorsize;
 	fi_args->clone_alignment = fs_info->sectorsize;
 
+	if (inflags & BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE) {
+		fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
+		fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
+		fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE;
+	}
+
 	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
 		ret = -EFAULT;
 
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index e6b6cb0f8bc6..c130eaea416e 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -250,10 +250,20 @@  struct btrfs_ioctl_fs_info_args {
 	__u32 nodesize;				/* out */
 	__u32 sectorsize;			/* out */
 	__u32 clone_alignment;			/* out */
-	__u32 reserved32;
-	__u64 reserved[122];			/* pad to 1k */
+	__u32 flags;				/* in/out */
+	__u16 csum_type;			/* out */
+	__u16 csum_size;			/* out */
+	__u8 reserved[972];			/* pad to 1k */
 };
 
+/*
+ * fs_info ioctl flags
+ *
+ * Used by:
+ * struct btrfs_ioctl_fs_info_args
+ */
+#define BTRFS_FS_INFO_FLAG_CSUM_TYPE_SIZE		(1 << 0)
+
 /*
  * feature flags
  *