diff mbox series

[v3] btrfs: zoned: make auto-reclaim less aggressive

Message ID 74cbd8cdefe76136b3f9fb9b96bddfcbcd5b5861.1647342146.git.johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series [v3] btrfs: zoned: make auto-reclaim less aggressive | expand

Commit Message

Johannes Thumshirn March 15, 2022, 11:02 a.m. UTC
The current auto-reclaim algorithm starts reclaiming all block-group's
with a zone_unusable value above a configured threshold. This is causing a
lot of reclaim IO even if there would be enough free zones on the device.

Instead of only accounting a block-group's zone_unusable value, also take
the number of empty zones into account.

Cc: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

---
Changes since v4:
* Use div_u64()

Changes since RFC:
* Fix logic error
* Skip unavailable devices
* Use different metric working for non-zoned devices as well
---
 fs/btrfs/block-group.c |  3 +++
 fs/btrfs/zoned.c       | 30 ++++++++++++++++++++++++++++++
 fs/btrfs/zoned.h       |  6 ++++++
 3 files changed, 39 insertions(+)

Comments

Filipe Manana March 15, 2022, 11:46 a.m. UTC | #1
On Tue, Mar 15, 2022 at 04:02:57AM -0700, Johannes Thumshirn wrote:
> The current auto-reclaim algorithm starts reclaiming all block-group's
> with a zone_unusable value above a configured threshold. This is causing a
> lot of reclaim IO even if there would be enough free zones on the device.
> 
> Instead of only accounting a block-group's zone_unusable value, also take
> the number of empty zones into account.
> 
> Cc: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> ---
> Changes since v4:
> * Use div_u64()
> 
> Changes since RFC:
> * Fix logic error
> * Skip unavailable devices
> * Use different metric working for non-zoned devices as well
> ---
>  fs/btrfs/block-group.c |  3 +++
>  fs/btrfs/zoned.c       | 30 ++++++++++++++++++++++++++++++
>  fs/btrfs/zoned.h       |  6 ++++++
>  3 files changed, 39 insertions(+)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index c22d287e020b..2e77b38c538b 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1522,6 +1522,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
>  		return;
>  
> +	if (!btrfs_zoned_should_reclaim(fs_info))
> +		return;
> +
>  	sb_start_write(fs_info->sb);
>  
>  	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 49446bb5a5d1..dc62a14594de 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -15,6 +15,7 @@
>  #include "transaction.h"
>  #include "dev-replace.h"
>  #include "space-info.h"
> +#include "misc.h"

Why is this included added?
Did you intended to use div_factor()?

Thanks.

>  
>  /* Maximum number of zones to report per blkdev_report_zones() call */
>  #define BTRFS_REPORT_NR_ZONES   4096
> @@ -2078,3 +2079,32 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
>  	}
>  	mutex_unlock(&fs_devices->device_list_mutex);
>  }
> +
> +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +	struct btrfs_device *device;
> +	u64 bytes_used = 0;
> +	u64 total_bytes = 0;
> +	u64 factor;
> +
> +	if (!btrfs_is_zoned(fs_info))
> +		return false;
> +
> +	if (!fs_info->bg_reclaim_threshold)
> +		return false;
> +
> +	mutex_lock(&fs_devices->device_list_mutex);
> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +		if (!device->bdev)
> +			continue;
> +
> +		total_bytes += device->disk_total_bytes;
> +		bytes_used += device->bytes_used;
> +
> +	}
> +	mutex_unlock(&fs_devices->device_list_mutex);
> +
> +	factor = div_u64(bytes_used * 100, total_bytes);
> +	return factor >= fs_info->bg_reclaim_threshold;
> +}
> diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
> index cbf016a7bb5d..d0d0e5c02606 100644
> --- a/fs/btrfs/zoned.h
> +++ b/fs/btrfs/zoned.h
> @@ -78,6 +78,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
>  			     u64 length);
>  void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
>  void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
> +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
>  #else /* CONFIG_BLK_DEV_ZONED */
>  static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
>  				     struct blk_zone *zone)
> @@ -236,6 +237,11 @@ static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
>  static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
>  
>  static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
> +
> +static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
> +{
> +	return false;
> +}
>  #endif
>  
>  static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
> -- 
> 2.35.1
>
Johannes Thumshirn March 15, 2022, 1:51 p.m. UTC | #2
On 15/03/2022 12:47, Filipe Manana wrote:
> On Tue, Mar 15, 2022 at 04:02:57AM -0700, Johannes Thumshirn wrote:
>> The current auto-reclaim algorithm starts reclaiming all block-group's
>> with a zone_unusable value above a configured threshold. This is causing a
>> lot of reclaim IO even if there would be enough free zones on the device.
>>
>> Instead of only accounting a block-group's zone_unusable value, also take
>> the number of empty zones into account.
>>
>> Cc: Josef Bacik <josef@toxicpanda.com>
>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>>
>> ---
>> Changes since v4:
>> * Use div_u64()
>>
>> Changes since RFC:
>> * Fix logic error
>> * Skip unavailable devices
>> * Use different metric working for non-zoned devices as well
>> ---
>>  fs/btrfs/block-group.c |  3 +++
>>  fs/btrfs/zoned.c       | 30 ++++++++++++++++++++++++++++++
>>  fs/btrfs/zoned.h       |  6 ++++++
>>  3 files changed, 39 insertions(+)
>>
>> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
>> index c22d287e020b..2e77b38c538b 100644
>> --- a/fs/btrfs/block-group.c
>> +++ b/fs/btrfs/block-group.c
>> @@ -1522,6 +1522,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>>  	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
>>  		return;
>>  
>> +	if (!btrfs_zoned_should_reclaim(fs_info))
>> +		return;
>> +
>>  	sb_start_write(fs_info->sb);
>>  
>>  	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
>> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
>> index 49446bb5a5d1..dc62a14594de 100644
>> --- a/fs/btrfs/zoned.c
>> +++ b/fs/btrfs/zoned.c
>> @@ -15,6 +15,7 @@
>>  #include "transaction.h"
>>  #include "dev-replace.h"
>>  #include "space-info.h"
>> +#include "misc.h"
> 
> Why is this included added?
> Did you intended to use div_factor()?
> 

Indeed a earlier version of this patch (using a different metric) used 
div_factor_fine(). This is a left over, I'll remove it in the next version,
as it's not needed anymore.

Good catch, thanks :)
Josef Bacik March 15, 2022, 2:20 p.m. UTC | #3
On Tue, Mar 15, 2022 at 04:02:57AM -0700, Johannes Thumshirn wrote:
> The current auto-reclaim algorithm starts reclaiming all block-group's
> with a zone_unusable value above a configured threshold. This is causing a
> lot of reclaim IO even if there would be enough free zones on the device.
> 
> Instead of only accounting a block-group's zone_unusable value, also take
> the number of empty zones into account.
> 
> Cc: Josef Bacik <josef@toxicpanda.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> ---
> Changes since v4:
> * Use div_u64()
> 
> Changes since RFC:
> * Fix logic error
> * Skip unavailable devices
> * Use different metric working for non-zoned devices as well
> ---
>  fs/btrfs/block-group.c |  3 +++
>  fs/btrfs/zoned.c       | 30 ++++++++++++++++++++++++++++++
>  fs/btrfs/zoned.h       |  6 ++++++
>  3 files changed, 39 insertions(+)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index c22d287e020b..2e77b38c538b 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1522,6 +1522,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
>  		return;
>  
> +	if (!btrfs_zoned_should_reclaim(fs_info))
> +		return;
> +

Noooo I just purposefully turned it on for everybody.

>  	sb_start_write(fs_info->sb);
>  
>  	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 49446bb5a5d1..dc62a14594de 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -15,6 +15,7 @@
>  #include "transaction.h"
>  #include "dev-replace.h"
>  #include "space-info.h"
> +#include "misc.h"
>  
>  /* Maximum number of zones to report per blkdev_report_zones() call */
>  #define BTRFS_REPORT_NR_ZONES   4096
> @@ -2078,3 +2079,32 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
>  	}
>  	mutex_unlock(&fs_devices->device_list_mutex);
>  }
> +
> +bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +	struct btrfs_device *device;
> +	u64 bytes_used = 0;
> +	u64 total_bytes = 0;
> +	u64 factor;
> +
> +	if (!btrfs_is_zoned(fs_info))
> +		return false;
> +
> +	if (!fs_info->bg_reclaim_threshold)
> +		return false;
> +
> +	mutex_lock(&fs_devices->device_list_mutex);
> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +		if (!device->bdev)
> +			continue;
> +
> +		total_bytes += device->disk_total_bytes;
> +		bytes_used += device->bytes_used;
> +
> +	}
> +	mutex_unlock(&fs_devices->device_list_mutex);

We can probably export calc_available_free_space() and use that here instead,
I'd rather not be grabbing the device_list_mutex here if we can avoid it.

> +
> +	factor = div_u64(bytes_used * 100, total_bytes);
> +	return factor >= fs_info->bg_reclaim_threshold;

My patches removed the fs_info->bg_reclaim_threshold btw.  Thanks,

Josef
Johannes Thumshirn March 15, 2022, 2:36 p.m. UTC | #4
On 15/03/2022 15:20, Josef Bacik wrote:
> On Tue, Mar 15, 2022 at 04:02:57AM -0700, Johannes Thumshirn wrote:
>> The current auto-reclaim algorithm starts reclaiming all block-group's
>> with a zone_unusable value above a configured threshold. This is causing a
>> lot of reclaim IO even if there would be enough free zones on the device.
>>
>> Instead of only accounting a block-group's zone_unusable value, also take
>> the number of empty zones into account.
>>
>> Cc: Josef Bacik <josef@toxicpanda.com>
>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>>
>> ---
>> Changes since v4:
>> * Use div_u64()
>>
>> Changes since RFC:
>> * Fix logic error
>> * Skip unavailable devices
>> * Use different metric working for non-zoned devices as well
>> ---
>>  fs/btrfs/block-group.c |  3 +++
>>  fs/btrfs/zoned.c       | 30 ++++++++++++++++++++++++++++++
>>  fs/btrfs/zoned.h       |  6 ++++++
>>  3 files changed, 39 insertions(+)
>>
>> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
>> index c22d287e020b..2e77b38c538b 100644
>> --- a/fs/btrfs/block-group.c
>> +++ b/fs/btrfs/block-group.c
>> @@ -1522,6 +1522,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>>  	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
>>  		return;
>>  
>> +	if (!btrfs_zoned_should_reclaim(fs_info))
>> +		return;
>> +
> 
> Noooo I just purposefully turned it on for everybody.

Right, I think we need something like

	if (!btrfs_should_reclaim(fs_info))
		return;

and

static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
{
	if (btrfs_is_zoned(fs_info))
		return btrfs_zoned_should_reclaim(fs_info);
	return true; /* or whatever regular btrfs needs here */
}


>> +	mutex_lock(&fs_devices->device_list_mutex);
>> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
>> +		if (!device->bdev)
>> +			continue;
>> +
>> +		total_bytes += device->disk_total_bytes;
>> +		bytes_used += device->bytes_used;
>> +
>> +	}
>> +	mutex_unlock(&fs_devices->device_list_mutex);
> 
> We can probably export calc_available_free_space() and use that here instead,
> I'd rather not be grabbing the device_list_mutex here if we can avoid it.

Sure, i'll look into it.

>> +
>> +	factor = div_u64(bytes_used * 100, total_bytes);
>> +	return factor >= fs_info->bg_reclaim_threshold;
> 
> My patches removed the fs_info->bg_reclaim_threshold btw.  

Yeah, which I replied to you is a bad idea 'cause I need it :D
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c22d287e020b..2e77b38c538b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1522,6 +1522,9 @@  void btrfs_reclaim_bgs_work(struct work_struct *work)
 	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
 		return;
 
+	if (!btrfs_zoned_should_reclaim(fs_info))
+		return;
+
 	sb_start_write(fs_info->sb);
 
 	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 49446bb5a5d1..dc62a14594de 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -15,6 +15,7 @@ 
 #include "transaction.h"
 #include "dev-replace.h"
 #include "space-info.h"
+#include "misc.h"
 
 /* Maximum number of zones to report per blkdev_report_zones() call */
 #define BTRFS_REPORT_NR_ZONES   4096
@@ -2078,3 +2079,32 @@  void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 }
+
+bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_device *device;
+	u64 bytes_used = 0;
+	u64 total_bytes = 0;
+	u64 factor;
+
+	if (!btrfs_is_zoned(fs_info))
+		return false;
+
+	if (!fs_info->bg_reclaim_threshold)
+		return false;
+
+	mutex_lock(&fs_devices->device_list_mutex);
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		if (!device->bdev)
+			continue;
+
+		total_bytes += device->disk_total_bytes;
+		bytes_used += device->bytes_used;
+
+	}
+	mutex_unlock(&fs_devices->device_list_mutex);
+
+	factor = div_u64(bytes_used * 100, total_bytes);
+	return factor >= fs_info->bg_reclaim_threshold;
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index cbf016a7bb5d..d0d0e5c02606 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -78,6 +78,7 @@  void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
 			     u64 length);
 void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
 void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
+bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
 				     struct blk_zone *zone)
@@ -236,6 +237,11 @@  static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
 static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
 
 static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
+
+static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
+{
+	return false;
+}
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)