diff mbox series

[RESEND] btrfs: kill update_block_group_flags

Message ID 20200117140826.42616-1-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series [RESEND] btrfs: kill update_block_group_flags | expand

Commit Message

Josef Bacik Jan. 17, 2020, 2:08 p.m. UTC
btrfs/061 has been failing consistently for me recently with a
transaction abort.  We run out of space in the system chunk array, which
means we've allocated way too many system chunks than we need.

Chris added this a long time ago for balance as a poor mans restriping.
If you had a single disk and then added another disk and then did a
balance, update_block_group_flags would then figure out which RAID level
you needed.

Fast forward to today and we have restriping behavior, so we can
explicitly tell the fs that we're trying to change the raid level.  This
is accomplished through the normal get_alloc_profile path.

Furthermore this code actually causes btrfs/061 to fail, because we do
things like mkfs -m dup -d single with multiple devices.  This trips
this check

alloc_flags = update_block_group_flags(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
	ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);

in btrfs_inc_block_group_ro.  Because we're balancing and scrubbing, but
not actually restriping, we keep forcing chunk allocation of RAID1
chunks.  This eventually causes us to run out of system space and the
file system aborts and flips read only.

We don't need this poor mans restriping any more, simply use the normal
get_alloc_profile helper, which will get the correct alloc_flags and
thus make the right decision for chunk allocation.  This keeps us from
allocating a billion system chunks and falling over.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
- Just rebased onto misc-next.

 fs/btrfs/block-group.c | 52 ++----------------------------------------
 1 file changed, 2 insertions(+), 50 deletions(-)

Comments

Holger Hoffstätte March 1, 2020, 5:58 p.m. UTC | #1
On 1/17/20 3:08 PM, Josef Bacik wrote:
> btrfs/061 has been failing consistently for me recently with a
> transaction abort.  We run out of space in the system chunk array, which
> means we've allocated way too many system chunks than we need.
> 
> Chris added this a long time ago for balance as a poor mans restriping.
> If you had a single disk and then added another disk and then did a
> balance, update_block_group_flags would then figure out which RAID level
> you needed.
> 
> Fast forward to today and we have restriping behavior, so we can
> explicitly tell the fs that we're trying to change the raid level.  This
> is accomplished through the normal get_alloc_profile path.
> 
> Furthermore this code actually causes btrfs/061 to fail, because we do
> things like mkfs -m dup -d single with multiple devices.  This trips
> this check
> 
> alloc_flags = update_block_group_flags(fs_info, cache->flags);
> if (alloc_flags != cache->flags) {
> 	ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
> 
> in btrfs_inc_block_group_ro.  Because we're balancing and scrubbing, but
> not actually restriping, we keep forcing chunk allocation of RAID1
> chunks.  This eventually causes us to run out of system space and the
> file system aborts and flips read only.
> 
> We don't need this poor mans restriping any more, simply use the normal
> get_alloc_profile helper, which will get the correct alloc_flags and
> thus make the right decision for chunk allocation.  This keeps us from
> allocating a billion system chunks and falling over.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
> - Just rebased onto misc-next.
> 
>   fs/btrfs/block-group.c | 52 ++----------------------------------------
>   1 file changed, 2 insertions(+), 50 deletions(-)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 7e71ec9682d0..77ec0597bd17 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -2132,54 +2132,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
>   	return 0;
>   }
>   
> -static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
> -{
> -	u64 num_devices;
> -	u64 stripped;
> -
> -	/*
> -	 * if restripe for this chunk_type is on pick target profile and
> -	 * return, otherwise do the usual balance
> -	 */
> -	stripped = get_restripe_target(fs_info, flags);
> -	if (stripped)
> -		return extended_to_chunk(stripped);
> -
> -	num_devices = fs_info->fs_devices->rw_devices;
> -
> -	stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
> -		BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
> -
> -	if (num_devices == 1) {
> -		stripped |= BTRFS_BLOCK_GROUP_DUP;
> -		stripped = flags & ~stripped;
> -
> -		/* turn raid0 into single device chunks */
> -		if (flags & BTRFS_BLOCK_GROUP_RAID0)
> -			return stripped;
> -
> -		/* turn mirroring into duplication */
> -		if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
> -			     BTRFS_BLOCK_GROUP_RAID10))
> -			return stripped | BTRFS_BLOCK_GROUP_DUP;
> -	} else {
> -		/* they already had raid on here, just return */
> -		if (flags & stripped)
> -			return flags;
> -
> -		stripped |= BTRFS_BLOCK_GROUP_DUP;
> -		stripped = flags & ~stripped;
> -
> -		/* switch duplicated blocks with raid1 */
> -		if (flags & BTRFS_BLOCK_GROUP_DUP)
> -			return stripped | BTRFS_BLOCK_GROUP_RAID1;
> -
> -		/* this is drive concat, leave it alone */
> -	}
> -
> -	return flags;
> -}
> -
>   /*
>    * Mark one block group RO, can be called several times for the same block
>    * group.
> @@ -2225,7 +2177,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
>   		 * If we are changing raid levels, try to allocate a
>   		 * corresponding block group with the new raid level.
>   		 */
> -		alloc_flags = update_block_group_flags(fs_info, cache->flags);
> +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
>   		if (alloc_flags != cache->flags) {
>   			ret = btrfs_chunk_alloc(trans, alloc_flags,
>   						CHUNK_ALLOC_FORCE);
> @@ -2252,7 +2204,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
>   	ret = inc_block_group_ro(cache, 0);
>   out:
>   	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
> -		alloc_flags = update_block_group_flags(fs_info, cache->flags);
> +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
>   		mutex_lock(&fs_info->chunk_mutex);
>   		check_system_chunk(trans, alloc_flags);
>   		mutex_unlock(&fs_info->chunk_mutex);
> 

It seems that this patch breaks forced metadata rebalance from dup to single;
all chunks remain dup (or are rewritten as dup again). I bisected the broken
balance behaviour to this commit which for some reason was in my tree ;-) and
reverting it immediately fixed things.

I don't (yet) see this applied anywhere, but couldn't find any discussion or
revocation either. Maybe the logic between update_block_group_flags() and
btrfs_get_alloc_profile() is not completely exchangeable?

thanks,
Holger
Josef Bacik March 2, 2020, 2:10 p.m. UTC | #2
On Sun, Mar 01, 2020 at 06:58:02PM +0100, Holger Hoffstätte wrote:
> On 1/17/20 3:08 PM, Josef Bacik wrote:
> > btrfs/061 has been failing consistently for me recently with a
> > transaction abort.  We run out of space in the system chunk array, which
> > means we've allocated way too many system chunks than we need.
> > 
> > Chris added this a long time ago for balance as a poor mans restriping.
> > If you had a single disk and then added another disk and then did a
> > balance, update_block_group_flags would then figure out which RAID level
> > you needed.
> > 
> > Fast forward to today and we have restriping behavior, so we can
> > explicitly tell the fs that we're trying to change the raid level.  This
> > is accomplished through the normal get_alloc_profile path.
> > 
> > Furthermore this code actually causes btrfs/061 to fail, because we do
> > things like mkfs -m dup -d single with multiple devices.  This trips
> > this check
> > 
> > alloc_flags = update_block_group_flags(fs_info, cache->flags);
> > if (alloc_flags != cache->flags) {
> > 	ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
> > 
> > in btrfs_inc_block_group_ro.  Because we're balancing and scrubbing, but
> > not actually restriping, we keep forcing chunk allocation of RAID1
> > chunks.  This eventually causes us to run out of system space and the
> > file system aborts and flips read only.
> > 
> > We don't need this poor mans restriping any more, simply use the normal
> > get_alloc_profile helper, which will get the correct alloc_flags and
> > thus make the right decision for chunk allocation.  This keeps us from
> > allocating a billion system chunks and falling over.
> > 
> > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> > ---
> > - Just rebased onto misc-next.
> > 
> >   fs/btrfs/block-group.c | 52 ++----------------------------------------
> >   1 file changed, 2 insertions(+), 50 deletions(-)
> > 
> > diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> > index 7e71ec9682d0..77ec0597bd17 100644
> > --- a/fs/btrfs/block-group.c
> > +++ b/fs/btrfs/block-group.c
> > @@ -2132,54 +2132,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
> >   	return 0;
> >   }
> > -static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
> > -{
> > -	u64 num_devices;
> > -	u64 stripped;
> > -
> > -	/*
> > -	 * if restripe for this chunk_type is on pick target profile and
> > -	 * return, otherwise do the usual balance
> > -	 */
> > -	stripped = get_restripe_target(fs_info, flags);
> > -	if (stripped)
> > -		return extended_to_chunk(stripped);
> > -
> > -	num_devices = fs_info->fs_devices->rw_devices;
> > -
> > -	stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
> > -		BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
> > -
> > -	if (num_devices == 1) {
> > -		stripped |= BTRFS_BLOCK_GROUP_DUP;
> > -		stripped = flags & ~stripped;
> > -
> > -		/* turn raid0 into single device chunks */
> > -		if (flags & BTRFS_BLOCK_GROUP_RAID0)
> > -			return stripped;
> > -
> > -		/* turn mirroring into duplication */
> > -		if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
> > -			     BTRFS_BLOCK_GROUP_RAID10))
> > -			return stripped | BTRFS_BLOCK_GROUP_DUP;
> > -	} else {
> > -		/* they already had raid on here, just return */
> > -		if (flags & stripped)
> > -			return flags;
> > -
> > -		stripped |= BTRFS_BLOCK_GROUP_DUP;
> > -		stripped = flags & ~stripped;
> > -
> > -		/* switch duplicated blocks with raid1 */
> > -		if (flags & BTRFS_BLOCK_GROUP_DUP)
> > -			return stripped | BTRFS_BLOCK_GROUP_RAID1;
> > -
> > -		/* this is drive concat, leave it alone */
> > -	}
> > -
> > -	return flags;
> > -}
> > -
> >   /*
> >    * Mark one block group RO, can be called several times for the same block
> >    * group.
> > @@ -2225,7 +2177,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
> >   		 * If we are changing raid levels, try to allocate a
> >   		 * corresponding block group with the new raid level.
> >   		 */
> > -		alloc_flags = update_block_group_flags(fs_info, cache->flags);
> > +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
> >   		if (alloc_flags != cache->flags) {
> >   			ret = btrfs_chunk_alloc(trans, alloc_flags,
> >   						CHUNK_ALLOC_FORCE);
> > @@ -2252,7 +2204,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
> >   	ret = inc_block_group_ro(cache, 0);
> >   out:
> >   	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
> > -		alloc_flags = update_block_group_flags(fs_info, cache->flags);
> > +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
> >   		mutex_lock(&fs_info->chunk_mutex);
> >   		check_system_chunk(trans, alloc_flags);
> >   		mutex_unlock(&fs_info->chunk_mutex);
> > 
> 
> It seems that this patch breaks forced metadata rebalance from dup to single;
> all chunks remain dup (or are rewritten as dup again). I bisected the broken
> balance behaviour to this commit which for some reason was in my tree ;-) and
> reverting it immediately fixed things.
> 
> I don't (yet) see this applied anywhere, but couldn't find any discussion or
> revocation either. Maybe the logic between update_block_group_flags() and
> btrfs_get_alloc_profile() is not completely exchangeable?
> 

Well cool, it looks like we just ignore the restriping stuff if it's not what we
already have available, which is silly considering we probably don't have any
block groups of the stripe that we had before.  The previous helpers just
unconditionally used the restripe target, so can you apply this patch ontop of
this one and see if it starts working?  I'll wire up a xfstest for this so we
don't miss it again.  Thanks,

Josef

From 01ec038b8fa64c2bbec6d117860e119a49c01f60 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 2 Mar 2020 09:08:33 -0500
Subject: [PATCH] we're restriping, use the target

---
 fs/btrfs/block-group.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 60e9bb136f34..becad9c7486b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -66,11 +66,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
 	spin_lock(&fs_info->balance_lock);
 	target = get_restripe_target(fs_info, flags);
 	if (target) {
-		/* Pick target profile only if it's already available */
-		if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
-			spin_unlock(&fs_info->balance_lock);
-			return extended_to_chunk(target);
-		}
+		spin_unlock(&fs_info->balance_lock);
+		return extended_to_chunk(target);
 	}
 	spin_unlock(&fs_info->balance_lock);
Holger Hoffstätte March 2, 2020, 3:02 p.m. UTC | #3
On 3/2/20 3:10 PM, Josef Bacik wrote:
> On Sun, Mar 01, 2020 at 06:58:02PM +0100, Holger Hoffstätte wrote:
>> On 1/17/20 3:08 PM, Josef Bacik wrote:
>>> btrfs/061 has been failing consistently for me recently with a
>>> transaction abort.  We run out of space in the system chunk array, which
>>> means we've allocated way too many system chunks than we need.
[snip]
>> It seems that this patch breaks forced metadata rebalance from dup to single;
>> all chunks remain dup (or are rewritten as dup again). I bisected the broken
>> balance behaviour to this commit which for some reason was in my tree ;-) and
>> reverting it immediately fixed things.
>>
>> I don't (yet) see this applied anywhere, but couldn't find any discussion or
>> revocation either. Maybe the logic between update_block_group_flags() and
>> btrfs_get_alloc_profile() is not completely exchangeable?
>>
> 
> Well cool, it looks like we just ignore the restriping stuff if it's not what we
> already have available, which is silly considering we probably don't have any
> block groups of the stripe that we had before.  The previous helpers just
> unconditionally used the restripe target, so can you apply this patch ontop of
> this one and see if it starts working?  I'll wire up a xfstest for this so we
> don't miss it again.  Thanks,
> 
> Josef
> 
>  From 01ec038b8fa64c2bbec6d117860e119a49c01f60 Mon Sep 17 00:00:00 2001
> From: Josef Bacik <josef@toxicpanda.com>
> Date: Mon, 2 Mar 2020 09:08:33 -0500
> Subject: [PATCH] we're restriping, use the target
> 
> ---
>   fs/btrfs/block-group.c | 7 ++-----
>   1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 60e9bb136f34..becad9c7486b 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -66,11 +66,8 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
>   	spin_lock(&fs_info->balance_lock);
>   	target = get_restripe_target(fs_info, flags);
>   	if (target) {
> -		/* Pick target profile only if it's already available */
> -		if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
> -			spin_unlock(&fs_info->balance_lock);
> -			return extended_to_chunk(target);
> -		}
> +		spin_unlock(&fs_info->balance_lock);
> +		return extended_to_chunk(target);
>   	}
>   	spin_unlock(&fs_info->balance_lock);
>   
> 

Applied it on top & it makes dup -> single balancing work again. \o/
Feel free to add my Tested-by.

Thanks!
Holger
David Sterba March 2, 2020, 8:18 p.m. UTC | #4
On Sun, Mar 01, 2020 at 06:58:02PM +0100, Holger Hoffstätte wrote:
> On 1/17/20 3:08 PM, Josef Bacik wrote:
> > +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
> >   		if (alloc_flags != cache->flags) {
> >   			ret = btrfs_chunk_alloc(trans, alloc_flags,
> >   						CHUNK_ALLOC_FORCE);
> > @@ -2252,7 +2204,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
> >   	ret = inc_block_group_ro(cache, 0);
> >   out:
> >   	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
> > -		alloc_flags = update_block_group_flags(fs_info, cache->flags);
> > +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
> >   		mutex_lock(&fs_info->chunk_mutex);
> >   		check_system_chunk(trans, alloc_flags);
> >   		mutex_unlock(&fs_info->chunk_mutex);
> > 
> 
> It seems that this patch breaks forced metadata rebalance from dup to single;
> all chunks remain dup (or are rewritten as dup again). I bisected the broken
> balance behaviour to this commit which for some reason was in my tree ;-) and
> reverting it immediately fixed things.
> 
> I don't (yet) see this applied anywhere, but couldn't find any discussion or
> revocation either. Maybe the logic between update_block_group_flags() and
> btrfs_get_alloc_profile() is not completely exchangeable?

The patch was not applied because I was not sure about it and had some
suspicion, https://lore.kernel.org/linux-btrfs/20200108170340.GK3929@twin.jikos.cz/
I don't want to apply the patch until I try the mentioned test with
raid1c34 but it's possible that it gets fixed by the updated patch.
Holger Hoffstätte June 30, 2020, 9:22 a.m. UTC | #5
On 2020-03-02 21:18, David Sterba wrote:
> On Sun, Mar 01, 2020 at 06:58:02PM +0100, Holger Hoffstätte wrote:
>> On 1/17/20 3:08 PM, Josef Bacik wrote:
>>> +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
>>>    		if (alloc_flags != cache->flags) {
>>>    			ret = btrfs_chunk_alloc(trans, alloc_flags,
>>>    						CHUNK_ALLOC_FORCE);
>>> @@ -2252,7 +2204,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
>>>    	ret = inc_block_group_ro(cache, 0);
>>>    out:
>>>    	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
>>> -		alloc_flags = update_block_group_flags(fs_info, cache->flags);
>>> +		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
>>>    		mutex_lock(&fs_info->chunk_mutex);
>>>    		check_system_chunk(trans, alloc_flags);
>>>    		mutex_unlock(&fs_info->chunk_mutex);
>>>
>>
>> It seems that this patch breaks forced metadata rebalance from dup to single;
>> all chunks remain dup (or are rewritten as dup again). I bisected the broken
>> balance behaviour to this commit which for some reason was in my tree ;-) and
>> reverting it immediately fixed things.
>>
>> I don't (yet) see this applied anywhere, but couldn't find any discussion or
>> revocation either. Maybe the logic between update_block_group_flags() and
>> btrfs_get_alloc_profile() is not completely exchangeable?
> 
> The patch was not applied because I was not sure about it and had some
> suspicion, https://lore.kernel.org/linux-btrfs/20200108170340.GK3929@twin.jikos.cz/
> I don't want to apply the patch until I try the mentioned test with
> raid1c34 but it's possible that it gets fixed by the updated patch.

I don't see this in misc-next or anywhere else, so a gentle reminder..

Original thread:
https://lore.kernel.org/linux-btrfs/20200117140826.42616-1-josef@toxicpanda.com/

As I wrote in the replies, the update to the patch fixed the balancing for me
(used with various profiles since then, no observed issues).

Josef, what about that xfstest?
David, can you try again with raid1c34?

thanks
Holger
Josef Bacik June 30, 2020, 1:35 p.m. UTC | #6
On 6/30/20 5:22 AM, Holger Hoffstätte wrote:
> On 2020-03-02 21:18, David Sterba wrote:
>> On Sun, Mar 01, 2020 at 06:58:02PM +0100, Holger Hoffstätte wrote:
>>> On 1/17/20 3:08 PM, Josef Bacik wrote:
>>>> +        alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
>>>>            if (alloc_flags != cache->flags) {
>>>>                ret = btrfs_chunk_alloc(trans, alloc_flags,
>>>>                            CHUNK_ALLOC_FORCE);
>>>> @@ -2252,7 +2204,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group 
>>>> *cache,
>>>>        ret = inc_block_group_ro(cache, 0);
>>>>    out:
>>>>        if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
>>>> -        alloc_flags = update_block_group_flags(fs_info, cache->flags);
>>>> +        alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
>>>>            mutex_lock(&fs_info->chunk_mutex);
>>>>            check_system_chunk(trans, alloc_flags);
>>>>            mutex_unlock(&fs_info->chunk_mutex);
>>>>
>>>
>>> It seems that this patch breaks forced metadata rebalance from dup to single;
>>> all chunks remain dup (or are rewritten as dup again). I bisected the broken
>>> balance behaviour to this commit which for some reason was in my tree ;-) and
>>> reverting it immediately fixed things.
>>>
>>> I don't (yet) see this applied anywhere, but couldn't find any discussion or
>>> revocation either. Maybe the logic between update_block_group_flags() and
>>> btrfs_get_alloc_profile() is not completely exchangeable?
>>
>> The patch was not applied because I was not sure about it and had some
>> suspicion, 
>> https://lore.kernel.org/linux-btrfs/20200108170340.GK3929@twin.jikos.cz/
>> I don't want to apply the patch until I try the mentioned test with
>> raid1c34 but it's possible that it gets fixed by the updated patch.
> 
> I don't see this in misc-next or anywhere else, so a gentle reminder..
> 
> Original thread:
> https://lore.kernel.org/linux-btrfs/20200117140826.42616-1-josef@toxicpanda.com/
> 
> As I wrote in the replies, the update to the patch fixed the balancing for me
> (used with various profiles since then, no observed issues).
> 
> Josef, what about that xfstest?
> David, can you try again with raid1c34?
> 

Ha I was just looking at this patch yesterday trying to remember why it wasn't 
merged.  I'm fixing to re-submit my other work, I'll apply these two patches and 
re-test and send them as well.  Sorry about that,

Josef
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 7e71ec9682d0..77ec0597bd17 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2132,54 +2132,6 @@  int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
 	return 0;
 }
 
-static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
-{
-	u64 num_devices;
-	u64 stripped;
-
-	/*
-	 * if restripe for this chunk_type is on pick target profile and
-	 * return, otherwise do the usual balance
-	 */
-	stripped = get_restripe_target(fs_info, flags);
-	if (stripped)
-		return extended_to_chunk(stripped);
-
-	num_devices = fs_info->fs_devices->rw_devices;
-
-	stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
-		BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
-
-	if (num_devices == 1) {
-		stripped |= BTRFS_BLOCK_GROUP_DUP;
-		stripped = flags & ~stripped;
-
-		/* turn raid0 into single device chunks */
-		if (flags & BTRFS_BLOCK_GROUP_RAID0)
-			return stripped;
-
-		/* turn mirroring into duplication */
-		if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
-			     BTRFS_BLOCK_GROUP_RAID10))
-			return stripped | BTRFS_BLOCK_GROUP_DUP;
-	} else {
-		/* they already had raid on here, just return */
-		if (flags & stripped)
-			return flags;
-
-		stripped |= BTRFS_BLOCK_GROUP_DUP;
-		stripped = flags & ~stripped;
-
-		/* switch duplicated blocks with raid1 */
-		if (flags & BTRFS_BLOCK_GROUP_DUP)
-			return stripped | BTRFS_BLOCK_GROUP_RAID1;
-
-		/* this is drive concat, leave it alone */
-	}
-
-	return flags;
-}
-
 /*
  * Mark one block group RO, can be called several times for the same block
  * group.
@@ -2225,7 +2177,7 @@  int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
 		 * If we are changing raid levels, try to allocate a
 		 * corresponding block group with the new raid level.
 		 */
-		alloc_flags = update_block_group_flags(fs_info, cache->flags);
+		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
 		if (alloc_flags != cache->flags) {
 			ret = btrfs_chunk_alloc(trans, alloc_flags,
 						CHUNK_ALLOC_FORCE);
@@ -2252,7 +2204,7 @@  int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
 	ret = inc_block_group_ro(cache, 0);
 out:
 	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
-		alloc_flags = update_block_group_flags(fs_info, cache->flags);
+		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
 		mutex_lock(&fs_info->chunk_mutex);
 		check_system_chunk(trans, alloc_flags);
 		mutex_unlock(&fs_info->chunk_mutex);