diff mbox series

[1/3] btrfs: zoned: clone zoned device info when cloning a device

Message ID af4caecf1d6fac27654cfd47b72eea865cdcbf61.1667571005.git.johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: misc fixes for problems uncovered by fstests | expand

Commit Message

Johannes Thumshirn Nov. 4, 2022, 2:12 p.m. UTC
When cloning a btrfs_device, we're not cloning the associated
btrfs_zoned_device_info structure of the device in case of a zoned
filesystem.

This then later leads to a nullptr dereference when accessing the device's
zone_info for instance when setting a zone as active.

This was uncovered by fstests' testcase btrfs/161.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/volumes.c | 12 ++++++++++++
 fs/btrfs/zoned.c   | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/zoned.h   | 10 ++++++++++
 3 files changed, 64 insertions(+)

Comments

Naohiro Aota Nov. 7, 2022, 3:17 p.m. UTC | #1
On Fri, Nov 04, 2022 at 07:12:33AM -0700, Johannes Thumshirn wrote:
> When cloning a btrfs_device, we're not cloning the associated
> btrfs_zoned_device_info structure of the device in case of a zoned
> filesystem.
> 
> This then later leads to a nullptr dereference when accessing the device's
> zone_info for instance when setting a zone as active.
> 
> This was uncovered by fstests' testcase btrfs/161.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/volumes.c | 12 ++++++++++++
>  fs/btrfs/zoned.c   | 42 ++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/zoned.h   | 10 ++++++++++
>  3 files changed, 64 insertions(+)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index f81aa7f9ef19..f72ce52c67ce 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1021,6 +1021,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
>  			rcu_assign_pointer(device->name, name);
>  		}
>  
> +		if (orig_dev->zone_info) {
> +			struct btrfs_zoned_device_info *zone_info;
> +
> +			zone_info = btrfs_clone_dev_zone_info(orig_dev);
> +			if (!zone_info) {
> +				btrfs_free_device(device);
> +				ret = -ENOMEM;
> +				goto error;
> +			}
> +			device->zone_info = zone_info;
> +		}
> +
>  		list_add(&device->dev_list, &fs_devices->devices);
>  		device->fs_devices = fs_devices;
>  		fs_devices->num_devices++;

This part looks good.

> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 9d12a23e1a59..f830f70fc214 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -641,6 +641,48 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
>  	device->zone_info = NULL;
>  }
>  
> +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev)

I have a concern about this function. Since this function duplicates the
zone_info, it might cause a split-brain state of zone_info. Actually, that
won't happen because the current callers are the seeding device
functions. And, another solution, reference counting the zone_info, is not
an option as it is only used for the seeding case.

So, this function is safe only when a caller ensures read-only access to
the copied zone_info. I'd like to have a comment here so future developer
notices the proper usage.

> +{
> +	struct btrfs_zoned_device_info *zone_info;
> +
> +	zone_info = kmemdup(orig_dev->zone_info,
> +			    sizeof(*zone_info), GFP_KERNEL);
> +	if (!zone_info)
> +		return NULL;
> +
> +
> +	zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
> +	if (!zone_info->seq_zones)
> +		goto out;
> +
> +	bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones,
> +		    zone_info->nr_zones);
> +
> +	zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
> +	if (!zone_info->empty_zones)
> +		goto out;
> +
> +	bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones,
> +		    zone_info->nr_zones);
> +
> +	zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
> +	if (!zone_info->active_zones)
> +		goto out;
> +
> +	bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones,
> +		    zone_info->nr_zones);
> +	zone_info->zone_cache = NULL;
> +
> +	return zone_info;
> +
> +out:
> +	bitmap_free(zone_info->seq_zones);
> +	bitmap_free(zone_info->empty_zones);
> +	bitmap_free(zone_info->active_zones);
> +	kfree(zone_info);
> +	return NULL;
> +}
> +
>  int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
>  		       struct blk_zone *zone)
>  {
> diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
> index 8d7e6be853c6..69fb1af89a53 100644
> --- a/fs/btrfs/zoned.h
> +++ b/fs/btrfs/zoned.h
> @@ -37,6 +37,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
>  int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
>  int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
>  void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
> +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
>  int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
>  int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
>  int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
> @@ -104,6 +105,15 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
>  
>  static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
>  
> +/* In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never
> + * call into btrfs_clone_dev_zone_info() so it's save to return NULL here.
> + */
> +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(
> +						 struct btrfs_device *orig_dev)
> +{
> +	return NULL;
> +}
> +
>  static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
>  {
>  	if (!btrfs_is_zoned(fs_info))
> -- 
> 2.37.3
>
Johannes Thumshirn Nov. 7, 2022, 3:23 p.m. UTC | #2
On 07.11.22 16:17, Naohiro Aota wrote:
> 
> I have a concern about this function. Since this function duplicates the
> zone_info, it might cause a split-brain state of zone_info. Actually, that
> won't happen because the current callers are the seeding device
> functions. And, another solution, reference counting the zone_info, is not
> an option as it is only used for the seeding case.
> 
> So, this function is safe only when a caller ensures read-only access to
> the copied zone_info. I'd like to have a comment here so future developer
> notices the proper usage.

But isn't this the same for all other fields of btrfs_device? Given it's 
only called by clone_fs_devices() which in turn is only called by either
open_seed_devices() or btrfs_init_sprout() it's a save context.
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f81aa7f9ef19..f72ce52c67ce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1021,6 +1021,18 @@  static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 			rcu_assign_pointer(device->name, name);
 		}
 
+		if (orig_dev->zone_info) {
+			struct btrfs_zoned_device_info *zone_info;
+
+			zone_info = btrfs_clone_dev_zone_info(orig_dev);
+			if (!zone_info) {
+				btrfs_free_device(device);
+				ret = -ENOMEM;
+				goto error;
+			}
+			device->zone_info = zone_info;
+		}
+
 		list_add(&device->dev_list, &fs_devices->devices);
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 9d12a23e1a59..f830f70fc214 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -641,6 +641,48 @@  void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
 	device->zone_info = NULL;
 }
 
+struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev)
+{
+	struct btrfs_zoned_device_info *zone_info;
+
+	zone_info = kmemdup(orig_dev->zone_info,
+			    sizeof(*zone_info), GFP_KERNEL);
+	if (!zone_info)
+		return NULL;
+
+
+	zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+	if (!zone_info->seq_zones)
+		goto out;
+
+	bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones,
+		    zone_info->nr_zones);
+
+	zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+	if (!zone_info->empty_zones)
+		goto out;
+
+	bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones,
+		    zone_info->nr_zones);
+
+	zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+	if (!zone_info->active_zones)
+		goto out;
+
+	bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones,
+		    zone_info->nr_zones);
+	zone_info->zone_cache = NULL;
+
+	return zone_info;
+
+out:
+	bitmap_free(zone_info->seq_zones);
+	bitmap_free(zone_info->empty_zones);
+	bitmap_free(zone_info->active_zones);
+	kfree(zone_info);
+	return NULL;
+}
+
 int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
 		       struct blk_zone *zone)
 {
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 8d7e6be853c6..69fb1af89a53 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -37,6 +37,7 @@  int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
 int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
 int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
 void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
+struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
 int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
 int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
 int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
@@ -104,6 +105,15 @@  static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
 
 static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
 
+/* In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never
+ * call into btrfs_clone_dev_zone_info() so it's save to return NULL here.
+ */
+static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(
+						 struct btrfs_device *orig_dev)
+{
+	return NULL;
+}
+
 static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
 {
 	if (!btrfs_is_zoned(fs_info))