Message ID | c69adfe62944e32a0d2e37b25c34cd49edc15f43.1648543951.git.johannes.thumshirn@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: rework background block group relocation | expand |
LGTM. Tested it in QEMU with zcap == zsize and zcap!= zsize. Tested-by: Pankaj Raghav <p.raghav@samsung.com> On 2022-03-29 10:56, Johannes Thumshirn wrote: > The current auto-reclaim algorithm starts reclaiming all block-group's > with a zone_unusable value above a configured threshold. This is causing a > lot of reclaim IO even if there would be enough free zones on the device. > > Instead of only accounting a block-group's zone_unusable value, also take > the ratio of free and not usable (written as well as zone_unusable) bytes > a device has into account. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/block-group.c | 10 ++++++++++ > fs/btrfs/zoned.c | 28 ++++++++++++++++++++++++++++ > fs/btrfs/zoned.h | 6 ++++++ > 3 files changed, 44 insertions(+) > > diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c > index 628741ecb97b..12454304bb85 100644 > --- a/fs/btrfs/block-group.c > +++ b/fs/btrfs/block-group.c > @@ -1512,6 +1512,13 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a, > return bg1->used > bg2->used; > } > > +static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info) > +{ > + if (btrfs_is_zoned(fs_info)) > + return btrfs_zoned_should_reclaim(fs_info); > + return true; > +} > + > void btrfs_reclaim_bgs_work(struct work_struct *work) > { > struct btrfs_fs_info *fs_info = > @@ -1522,6 +1529,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) > if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) > return; > > + if (!btrfs_should_reclaim(fs_info)) > + return; > + > sb_start_write(fs_info->sb); > > if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index 1b1b310c3c51..c0c460749b74 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -2079,3 +2079,31 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) > } > mutex_unlock(&fs_devices->device_list_mutex); > } > + > +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; > + struct btrfs_device *device; > + u64 used = 0; > + u64 total = 0; > + u64 factor; > + > + ASSERT(btrfs_is_zoned(fs_info)); > + > + if (!fs_info->bg_reclaim_threshold) > + return false; > + > + mutex_lock(&fs_devices->device_list_mutex); > + list_for_each_entry(device, &fs_devices->devices, dev_list) { > + if (!device->bdev) > + continue; > + > + total += device->disk_total_bytes; > + used += device->bytes_used; > + > + } > + mutex_unlock(&fs_devices->device_list_mutex); > + > + factor = div64_u64(used * 100, total); > + return factor >= fs_info->bg_reclaim_threshold; > +} > diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h > index c489c08d7fd5..f2d16395087f 100644 > --- a/fs/btrfs/zoned.h > +++ b/fs/btrfs/zoned.h > @@ -74,6 +74,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, > u64 length); > void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); > void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); > +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info); > #else /* CONFIG_BLK_DEV_ZONED */ > static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, > struct blk_zone *zone) > @@ -232,6 +233,11 @@ static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, > static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } > > static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } > + > +static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) > +{ > + return false; > +} > #endif > > static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
On Tue, Mar 29, 2022 at 01:56:09AM -0700, Johannes Thumshirn wrote: > The current auto-reclaim algorithm starts reclaiming all block-group's Please write it as 'block group' in the text, also the use of 's is not plural. > with a zone_unusable value above a configured threshold. This is causing a > lot of reclaim IO even if there would be enough free zones on the device. > > Instead of only accounting a block-group's zone_unusable value, also take > the ratio of free and not usable (written as well as zone_unusable) bytes > a device has into account. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/block-group.c | 10 ++++++++++ > fs/btrfs/zoned.c | 28 ++++++++++++++++++++++++++++ > fs/btrfs/zoned.h | 6 ++++++ > 3 files changed, 44 insertions(+) > > diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c > index 628741ecb97b..12454304bb85 100644 > --- a/fs/btrfs/block-group.c > +++ b/fs/btrfs/block-group.c > @@ -1512,6 +1512,13 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a, > return bg1->used > bg2->used; > } > > +static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info) > +{ > + if (btrfs_is_zoned(fs_info)) > + return btrfs_zoned_should_reclaim(fs_info); > + return true; > +} > + > void btrfs_reclaim_bgs_work(struct work_struct *work) > { > struct btrfs_fs_info *fs_info = > @@ -1522,6 +1529,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) > if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) > return; > > + if (!btrfs_should_reclaim(fs_info)) > + return; > + > sb_start_write(fs_info->sb); > > if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index 1b1b310c3c51..c0c460749b74 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -2079,3 +2079,31 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) > } > mutex_unlock(&fs_devices->device_list_mutex); > } > + > +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; > + struct btrfs_device *device; > + u64 used = 0; > + u64 total = 0; > + u64 factor; > + > + ASSERT(btrfs_is_zoned(fs_info)); > + > + if (!fs_info->bg_reclaim_threshold) For integer values it's IMHO better to use == 0 as ! is for bool variables. > + return false; > + > + mutex_lock(&fs_devices->device_list_mutex); > + list_for_each_entry(device, &fs_devices->devices, dev_list) { > + if (!device->bdev) > + continue; > + > + total += device->disk_total_bytes; > + used += device->bytes_used; > + > + } > + mutex_unlock(&fs_devices->device_list_mutex); > + > + factor = div64_u64(used * 100, total); Seems we can't avoid 64bit division here, at least it's not perf critical. > + return factor >= fs_info->bg_reclaim_threshold; > +}
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 628741ecb97b..12454304bb85 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1512,6 +1512,13 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a, return bg1->used > bg2->used; } +static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info) +{ + if (btrfs_is_zoned(fs_info)) + return btrfs_zoned_should_reclaim(fs_info); + return true; +} + void btrfs_reclaim_bgs_work(struct work_struct *work) { struct btrfs_fs_info *fs_info = @@ -1522,6 +1529,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; + if (!btrfs_should_reclaim(fs_info)) + return; + sb_start_write(fs_info->sb); if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1b1b310c3c51..c0c460749b74 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2079,3 +2079,31 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) } mutex_unlock(&fs_devices->device_list_mutex); } + +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *device; + u64 used = 0; + u64 total = 0; + u64 factor; + + ASSERT(btrfs_is_zoned(fs_info)); + + if (!fs_info->bg_reclaim_threshold) + return false; + + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (!device->bdev) + continue; + + total += device->disk_total_bytes; + used += device->bytes_used; + + } + mutex_unlock(&fs_devices->device_list_mutex); + + factor = div64_u64(used * 100, total); + return factor >= fs_info->bg_reclaim_threshold; +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index c489c08d7fd5..f2d16395087f 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -74,6 +74,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length); void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -232,6 +233,11 @@ static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { } + +static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) +{ + return false; +} #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
The current auto-reclaim algorithm starts reclaiming all block-group's with a zone_unusable value above a configured threshold. This is causing a lot of reclaim IO even if there would be enough free zones on the device. Instead of only accounting a block-group's zone_unusable value, also take the ratio of free and not usable (written as well as zone_unusable) bytes a device has into account. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- fs/btrfs/block-group.c | 10 ++++++++++ fs/btrfs/zoned.c | 28 ++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 6 ++++++ 3 files changed, 44 insertions(+)