diff mbox series

[v2] btrfs: zoned: use greedy gc for auto reclaim

Message ID 75b42490e41e7c7bf49c07c76fb93764a726c621.1634035992.git.johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series [v2] btrfs: zoned: use greedy gc for auto reclaim | expand

Commit Message

Johannes Thumshirn Oct. 12, 2021, 12:37 p.m. UTC
Currently auto reclaim of unusable zones reclaims the block-groups in the
order they have been added to the reclaim list.

Change this to a greedy algorithm by sorting the list so we have the
block-groups with the least amount of valid bytes reclaimed first.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

---
Changes since v1:
-  Changed list_sort() comparator to 'boolean' style

Changes since RFC:
- Updated the patch description
- Don't sort the list under the spin_lock (David)
---
 fs/btrfs/block-group.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

Comments

Nikolay Borisov Oct. 12, 2021, 1:56 p.m. UTC | #1
On 12.10.21 г. 15:37, Johannes Thumshirn wrote:
> Currently auto reclaim of unusable zones reclaims the block-groups in the
> order they have been added to the reclaim list.
> 
> Change this to a greedy algorithm by sorting the list so we have the
> block-groups with the least amount of valid bytes reclaimed first.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> ---
> Changes since v1:
> -  Changed list_sort() comparator to 'boolean' style
> 
> Changes since RFC:
> - Updated the patch description
> - Don't sort the list under the spin_lock (David)

<snip>


> @@ -1510,17 +1528,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  	}
>  
>  	spin_lock(&fs_info->unused_bgs_lock);
> -	while (!list_empty(&fs_info->reclaim_bgs)) {
> +	list_splice_init(&fs_info->reclaim_bgs, &reclaim_list);
> +	spin_unlock(&fs_info->unused_bgs_lock);
> +
> +	list_sort(NULL, &reclaim_list, reclaim_bgs_cmp);
> +	while (!list_empty(&reclaim_list)) {

Nit: Now that you've switched to a local reclaim_list you can convert
the while to a list_for_each_entry_safe, since it's guaranteed that new
entries can't be added while you are iterating the list, which is
generally the reason why a while() is preferred to one of the iteration
helpers.

>  		u64 zone_unusable;
>  		int ret = 0;
>  
> -		bg = list_first_entry(&fs_info->reclaim_bgs,
> +		bg = list_first_entry(&reclaim_list,
>  				      struct btrfs_block_group,
>  				      bg_list);
>  		list_del_init(&bg->bg_list);
>  
>  		space_info = bg->space_info;
> -		spin_unlock(&fs_info->unused_bgs_lock);
>  
>  		/* Don't race with allocators so take the groups_sem */
>  		down_write(&space_info->groups_sem);
> @@ -1568,12 +1589,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  				  bg->start);
>  
>  next:
> -		spin_lock(&fs_info->unused_bgs_lock);
>  		if (ret == -EAGAIN && list_empty(&bg->bg_list))
>  			list_add_tail(&bg->bg_list, &again_list);
>  		else
>  			btrfs_put_block_group(bg);
>  	}
> +	spin_lock(&fs_info->unused_bgs_lock);
>  	list_splice_tail(&again_list, &fs_info->reclaim_bgs);
>  	spin_unlock(&fs_info->unused_bgs_lock);
>  	mutex_unlock(&fs_info->reclaim_bgs_lock);
>
David Sterba Oct. 13, 2021, 1:52 p.m. UTC | #2
On Tue, Oct 12, 2021 at 04:56:01PM +0300, Nikolay Borisov wrote:
> On 12.10.21 г. 15:37, Johannes Thumshirn wrote:
> > Currently auto reclaim of unusable zones reclaims the block-groups in the
> > order they have been added to the reclaim list.
> > 
> > Change this to a greedy algorithm by sorting the list so we have the
> > block-groups with the least amount of valid bytes reclaimed first.
> > 
> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > 
> > ---
> > Changes since v1:
> > -  Changed list_sort() comparator to 'boolean' style
> > 
> > Changes since RFC:
> > - Updated the patch description
> > - Don't sort the list under the spin_lock (David)
> 
> <snip>
> 
> 
> > @@ -1510,17 +1528,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> >  	}
> >  
> >  	spin_lock(&fs_info->unused_bgs_lock);
> > -	while (!list_empty(&fs_info->reclaim_bgs)) {
> > +	list_splice_init(&fs_info->reclaim_bgs, &reclaim_list);
> > +	spin_unlock(&fs_info->unused_bgs_lock);
> > +
> > +	list_sort(NULL, &reclaim_list, reclaim_bgs_cmp);
> > +	while (!list_empty(&reclaim_list)) {
> 
> Nit: Now that you've switched to a local reclaim_list you can convert
> the while to a list_for_each_entry_safe, since it's guaranteed that new
> entries can't be added while you are iterating the list, which is
> generally the reason why a while() is preferred to one of the iteration
> helpers.

Like the following? I'll fold it in if yes:

--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1507,7 +1507,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 {
        struct btrfs_fs_info *fs_info =
                container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
-       struct btrfs_block_group *bg;
+       struct btrfs_block_group *bg, *tmp;
        struct btrfs_space_info *space_info;
        LIST_HEAD(again_list);
        LIST_HEAD(reclaim_list);
@@ -1532,13 +1532,10 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
        spin_unlock(&fs_info->unused_bgs_lock);
 
        list_sort(NULL, &reclaim_list, reclaim_bgs_cmp);
-       while (!list_empty(&reclaim_list)) {
+       list_for_each_entry_safe(bg, tmp, &reclaim_list, bg_list) {
                u64 zone_unusable;
                int ret = 0;
 
-               bg = list_first_entry(&reclaim_list,
-                                     struct btrfs_block_group,
-                                     bg_list);
                list_del_init(&bg->bg_list);
 
                space_info = bg->space_info;
---
Nikolay Borisov Oct. 13, 2021, 2:14 p.m. UTC | #3
On 13.10.21 г. 16:52, David Sterba wrote:
> On Tue, Oct 12, 2021 at 04:56:01PM +0300, Nikolay Borisov wrote:
>> On 12.10.21 г. 15:37, Johannes Thumshirn wrote:
>>> Currently auto reclaim of unusable zones reclaims the block-groups in the
>>> order they have been added to the reclaim list.
>>>
>>> Change this to a greedy algorithm by sorting the list so we have the
>>> block-groups with the least amount of valid bytes reclaimed first.
>>>
>>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>>>
>>> ---
>>> Changes since v1:
>>> -  Changed list_sort() comparator to 'boolean' style
>>>
>>> Changes since RFC:
>>> - Updated the patch description
>>> - Don't sort the list under the spin_lock (David)
>>
>> <snip>
>>
>>
>>> @@ -1510,17 +1528,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>>>  	}
>>>  
>>>  	spin_lock(&fs_info->unused_bgs_lock);
>>> -	while (!list_empty(&fs_info->reclaim_bgs)) {
>>> +	list_splice_init(&fs_info->reclaim_bgs, &reclaim_list);
>>> +	spin_unlock(&fs_info->unused_bgs_lock);
>>> +
>>> +	list_sort(NULL, &reclaim_list, reclaim_bgs_cmp);
>>> +	while (!list_empty(&reclaim_list)) {
>>
>> Nit: Now that you've switched to a local reclaim_list you can convert
>> the while to a list_for_each_entry_safe, since it's guaranteed that new
>> entries can't be added while you are iterating the list, which is
>> generally the reason why a while() is preferred to one of the iteration
>> helpers.
> 
> Like the following? I'll fold it in if yes:

Yes
Nikolay Borisov Oct. 13, 2021, 2:18 p.m. UTC | #4
On 12.10.21 г. 15:37, Johannes Thumshirn wrote:
> Currently auto reclaim of unusable zones reclaims the block-groups in the
> order they have been added to the reclaim list.
> 
> Change this to a greedy algorithm by sorting the list so we have the
> block-groups with the least amount of valid bytes reclaimed first.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 

<snip>

> @@ -1510,17 +1528,20 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  	}
>  
>  	spin_lock(&fs_info->unused_bgs_lock);
> -	while (!list_empty(&fs_info->reclaim_bgs)) {
> +	list_splice_init(&fs_info->reclaim_bgs, &reclaim_list);
> +	spin_unlock(&fs_info->unused_bgs_lock);
> +
> +	list_sort(NULL, &reclaim_list, reclaim_bgs_cmp);
> +	while (!list_empty(&reclaim_list)) {
>  		u64 zone_unusable;
>  		int ret = 0;
>  
> -		bg = list_first_entry(&fs_info->reclaim_bgs,
> +		bg = list_first_entry(&reclaim_list,
>  				      struct btrfs_block_group,
>  				      bg_list);
>  		list_del_init(&bg->bg_list);
>  
>  		space_info = bg->space_info;
> -		spin_unlock(&fs_info->unused_bgs_lock);
>  
>  		/* Don't race with allocators so take the groups_sem */
>  		down_write(&space_info->groups_sem);
> @@ -1568,12 +1589,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
>  				  bg->start);
>  
>  next:
> -		spin_lock(&fs_info->unused_bgs_lock);
>  		if (ret == -EAGAIN && list_empty(&bg->bg_list))
>  			list_add_tail(&bg->bg_list, &again_list);

After this bg is unlinked from the loca list, can its bg_list be added
for reclaim again. Is it safe to access bg_list without unused_bgs_lock
being held? btrfs_mark_bg_to_reclaim for example will add a block group
for reclaim under this lock?

>  		else
>  			btrfs_put_block_group(bg);
>  	}
> +	spin_lock(&fs_info->unused_bgs_lock);
>  	list_splice_tail(&again_list, &fs_info->reclaim_bgs);
>  	spin_unlock(&fs_info->unused_bgs_lock);
>  	mutex_unlock(&fs_info->reclaim_bgs_lock);
>
Johannes Thumshirn Oct. 14, 2021, 6:59 a.m. UTC | #5
On 13/10/2021 16:18, Nikolay Borisov wrote:
>>  next:
>> -		spin_lock(&fs_info->unused_bgs_lock);
>>  		if (ret == -EAGAIN && list_empty(&bg->bg_list))
>>  			list_add_tail(&bg->bg_list, &again_list);
> 
> After this bg is unlinked from the loca list, can its bg_list be added
> for reclaim again. Is it safe to access bg_list without unused_bgs_lock
> being held? btrfs_mark_bg_to_reclaim for example will add a block group
> for reclaim under this lock?
> 

Correct me if I'm wrong, but I think this is OK, because i.e. 
btrfs_mark_bg_to_reclaim() won't run on the bg again as it is switched to
ro. So there should be no concurrent accesses to bg->bg_list possible.

Or do you mean, that I can remove the again_list and re-add bg->bg_list to
fs_info->reclaim_list directly? This would probably be possible and could
improve readability a bit, because we're not having to deal with three
different lists but only two.


Byte,
	Johannes
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 46fdef7bbe20..e9092eba71fe 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1,5 +1,7 @@ 
 // SPDX-License-Identifier: GPL-2.0
 
+#include <linux/list_sort.h>
+
 #include "misc.h"
 #include "ctree.h"
 #include "block-group.h"
@@ -1486,6 +1488,21 @@  void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
 	spin_unlock(&fs_info->unused_bgs_lock);
 }
 
+/*
+ * We want block groups with a low number of used bytes to be in the beginning
+ * of the list, so they will get reclaimed first.
+ */
+static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
+			   const struct list_head *b)
+{
+	const struct btrfs_block_group *bg1, *bg2;
+
+	bg1 = list_entry(a, struct btrfs_block_group, bg_list);
+	bg2 = list_entry(b, struct btrfs_block_group, bg_list);
+
+	return bg1->used > bg2->used;
+}
+
 void btrfs_reclaim_bgs_work(struct work_struct *work)
 {
 	struct btrfs_fs_info *fs_info =
@@ -1493,6 +1510,7 @@  void btrfs_reclaim_bgs_work(struct work_struct *work)
 	struct btrfs_block_group *bg;
 	struct btrfs_space_info *space_info;
 	LIST_HEAD(again_list);
+	LIST_HEAD(reclaim_list);
 
 	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
 		return;
@@ -1510,17 +1528,20 @@  void btrfs_reclaim_bgs_work(struct work_struct *work)
 	}
 
 	spin_lock(&fs_info->unused_bgs_lock);
-	while (!list_empty(&fs_info->reclaim_bgs)) {
+	list_splice_init(&fs_info->reclaim_bgs, &reclaim_list);
+	spin_unlock(&fs_info->unused_bgs_lock);
+
+	list_sort(NULL, &reclaim_list, reclaim_bgs_cmp);
+	while (!list_empty(&reclaim_list)) {
 		u64 zone_unusable;
 		int ret = 0;
 
-		bg = list_first_entry(&fs_info->reclaim_bgs,
+		bg = list_first_entry(&reclaim_list,
 				      struct btrfs_block_group,
 				      bg_list);
 		list_del_init(&bg->bg_list);
 
 		space_info = bg->space_info;
-		spin_unlock(&fs_info->unused_bgs_lock);
 
 		/* Don't race with allocators so take the groups_sem */
 		down_write(&space_info->groups_sem);
@@ -1568,12 +1589,12 @@  void btrfs_reclaim_bgs_work(struct work_struct *work)
 				  bg->start);
 
 next:
-		spin_lock(&fs_info->unused_bgs_lock);
 		if (ret == -EAGAIN && list_empty(&bg->bg_list))
 			list_add_tail(&bg->bg_list, &again_list);
 		else
 			btrfs_put_block_group(bg);
 	}
+	spin_lock(&fs_info->unused_bgs_lock);
 	list_splice_tail(&again_list, &fs_info->reclaim_bgs);
 	spin_unlock(&fs_info->unused_bgs_lock);
 	mutex_unlock(&fs_info->reclaim_bgs_lock);