Message ID | af4caecf1d6fac27654cfd47b72eea865cdcbf61.1667571005.git.johannes.thumshirn@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: zoned: misc fixes for problems uncovered by fstests | expand |
On Fri, Nov 04, 2022 at 07:12:33AM -0700, Johannes Thumshirn wrote: > When cloning a btrfs_device, we're not cloning the associated > btrfs_zoned_device_info structure of the device in case of a zoned > filesystem. > > This then later leads to a nullptr dereference when accessing the device's > zone_info for instance when setting a zone as active. > > This was uncovered by fstests' testcase btrfs/161. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/volumes.c | 12 ++++++++++++ > fs/btrfs/zoned.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/zoned.h | 10 ++++++++++ > 3 files changed, 64 insertions(+) > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index f81aa7f9ef19..f72ce52c67ce 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -1021,6 +1021,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) > rcu_assign_pointer(device->name, name); > } > > + if (orig_dev->zone_info) { > + struct btrfs_zoned_device_info *zone_info; > + > + zone_info = btrfs_clone_dev_zone_info(orig_dev); > + if (!zone_info) { > + btrfs_free_device(device); > + ret = -ENOMEM; > + goto error; > + } > + device->zone_info = zone_info; > + } > + > list_add(&device->dev_list, &fs_devices->devices); > device->fs_devices = fs_devices; > fs_devices->num_devices++; This part looks good. > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index 9d12a23e1a59..f830f70fc214 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -641,6 +641,48 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) > device->zone_info = NULL; > } > > +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) I have a concern about this function. Since this function duplicates the zone_info, it might cause a split-brain state of zone_info. Actually, that won't happen because the current callers are the seeding device functions. And, another solution, reference counting the zone_info, is not an option as it is only used for the seeding case. So, this function is safe only when a caller ensures read-only access to the copied zone_info. I'd like to have a comment here so future developer notices the proper usage. > +{ > + struct btrfs_zoned_device_info *zone_info; > + > + zone_info = kmemdup(orig_dev->zone_info, > + sizeof(*zone_info), GFP_KERNEL); > + if (!zone_info) > + return NULL; > + > + > + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); > + if (!zone_info->seq_zones) > + goto out; > + > + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, > + zone_info->nr_zones); > + > + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); > + if (!zone_info->empty_zones) > + goto out; > + > + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, > + zone_info->nr_zones); > + > + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); > + if (!zone_info->active_zones) > + goto out; > + > + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, > + zone_info->nr_zones); > + zone_info->zone_cache = NULL; > + > + return zone_info; > + > +out: > + bitmap_free(zone_info->seq_zones); > + bitmap_free(zone_info->empty_zones); > + bitmap_free(zone_info->active_zones); > + kfree(zone_info); > + return NULL; > +} > + > int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, > struct blk_zone *zone) > { > diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h > index 8d7e6be853c6..69fb1af89a53 100644 > --- a/fs/btrfs/zoned.h > +++ b/fs/btrfs/zoned.h > @@ -37,6 +37,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, > int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info); > int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache); > void btrfs_destroy_dev_zone_info(struct btrfs_device *device); > +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev); > int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info); > int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info); > int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, > @@ -104,6 +105,15 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device, > > static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { } > > +/* In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never > + * call into btrfs_clone_dev_zone_info() so it's save to return NULL here. > + */ > +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info( > + struct btrfs_device *orig_dev) > +{ > + return NULL; > +} > + > static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info) > { > if (!btrfs_is_zoned(fs_info)) > -- > 2.37.3 >
On 07.11.22 16:17, Naohiro Aota wrote: > > I have a concern about this function. Since this function duplicates the > zone_info, it might cause a split-brain state of zone_info. Actually, that > won't happen because the current callers are the seeding device > functions. And, another solution, reference counting the zone_info, is not > an option as it is only used for the seeding case. > > So, this function is safe only when a caller ensures read-only access to > the copied zone_info. I'd like to have a comment here so future developer > notices the proper usage. But isn't this the same for all other fields of btrfs_device? Given it's only called by clone_fs_devices() which in turn is only called by either open_seed_devices() or btrfs_init_sprout() it's a save context.
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f81aa7f9ef19..f72ce52c67ce 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1021,6 +1021,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) rcu_assign_pointer(device->name, name); } + if (orig_dev->zone_info) { + struct btrfs_zoned_device_info *zone_info; + + zone_info = btrfs_clone_dev_zone_info(orig_dev); + if (!zone_info) { + btrfs_free_device(device); + ret = -ENOMEM; + goto error; + } + device->zone_info = zone_info; + } + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 9d12a23e1a59..f830f70fc214 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -641,6 +641,48 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) device->zone_info = NULL; } +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) +{ + struct btrfs_zoned_device_info *zone_info; + + zone_info = kmemdup(orig_dev->zone_info, + sizeof(*zone_info), GFP_KERNEL); + if (!zone_info) + return NULL; + + + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->seq_zones) + goto out; + + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, + zone_info->nr_zones); + + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->empty_zones) + goto out; + + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, + zone_info->nr_zones); + + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->active_zones) + goto out; + + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, + zone_info->nr_zones); + zone_info->zone_cache = NULL; + + return zone_info; + +out: + bitmap_free(zone_info->seq_zones); + bitmap_free(zone_info->empty_zones); + bitmap_free(zone_info->active_zones); + kfree(zone_info); + return NULL; +} + int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) { diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 8d7e6be853c6..69fb1af89a53 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -37,6 +37,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info); int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache); void btrfs_destroy_dev_zone_info(struct btrfs_device *device); +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev); int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info); int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info); int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, @@ -104,6 +105,15 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device, static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { } +/* In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never + * call into btrfs_clone_dev_zone_info() so it's save to return NULL here. + */ +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info( + struct btrfs_device *orig_dev) +{ + return NULL; +} + static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info) { if (!btrfs_is_zoned(fs_info))
When cloning a btrfs_device, we're not cloning the associated btrfs_zoned_device_info structure of the device in case of a zoned filesystem. This then later leads to a nullptr dereference when accessing the device's zone_info for instance when setting a zone as active. This was uncovered by fstests' testcase btrfs/161. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- fs/btrfs/volumes.c | 12 ++++++++++++ fs/btrfs/zoned.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 10 ++++++++++ 3 files changed, 64 insertions(+)