diff mbox series

btrfs: zoned: don't skip block group profile checks on conv zones

Message ID 534c381d897ad3f29948594014910310fe504bbc.1707475586.git.johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: don't skip block group profile checks on conv zones | expand

Commit Message

Johannes Thumshirn Feb. 9, 2024, 10:46 a.m. UTC
On a zoned filesystem with conventional zones, we're skipping the block
group profile checks for the conventional zones.

This allows converting a zoned filesystem's data block groups to RAID when
all of the zones backing the chunk are on conventional zones.  But this
will lead to problems, once we're trying to allocate chunks backed by
sequential zones.

So also check for conventional zones when loading a block group's profile
on them.

Reported-by: 韩于惟 <hrx@bupt.moe>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

Comments

Naohiro Aota Feb. 9, 2024, 2:29 p.m. UTC | #1
On Fri, Feb 09, 2024 at 02:46:26AM -0800, Johannes Thumshirn wrote:
> On a zoned filesystem with conventional zones, we're skipping the block
> group profile checks for the conventional zones.
> 
> This allows converting a zoned filesystem's data block groups to RAID when
> all of the zones backing the chunk are on conventional zones.  But this
> will lead to problems, once we're trying to allocate chunks backed by
> sequential zones.
> 
> So also check for conventional zones when loading a block group's profile
> on them.
> 
> Reported-by: 韩于惟 <hrx@bupt.moe>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++---
>  1 file changed, 27 insertions(+), 3 deletions(-)

This seems complex than necessary. It is duplicating
calculate_alloc_pointer() code into the per-profile loading functions. How
about adding a check equivalent below after the out label?

	if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
		btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
			  btrfs_bg_type_to_raid_name(map->type));
		return -EINVAL;
	}

> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index d9716456bce0..5beb6b936e61 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
>  		return -EIO;
>  	}
>  
> -	bg->alloc_offset = info->alloc_offset;
> -	bg->zone_capacity = info->capacity;
> +	if (info->alloc_offset != WP_CONVENTIONAL) {
> +		bg->alloc_offset = info->alloc_offset;
> +		bg->zone_capacity = info->capacity;
> +	}
>  	if (test_bit(0, active))
>  		set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
>  	return 0;
> @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
>  		return -EIO;
>  	}
>  
> +	if (zone_info[0].alloc_offset == WP_CONVENTIONAL) {
> +		zone_info[0].alloc_offset = bg->alloc_offset;
> +		zone_info[0].capacity = bg->length;
> +	}
> +
> +	if (zone_info[1].alloc_offset == WP_CONVENTIONAL) {
> +		zone_info[1].alloc_offset = bg->alloc_offset;
> +		zone_info[1].capacity = bg->length;
> +	}
> +
>  	if (test_bit(0, active) != test_bit(1, active)) {
>  		if (!btrfs_zone_activate(bg))
>  			return -EIO;
> @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
>  						 zone_info[1].capacity);
>  	}
>  
> +	if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
> +		zone_info[0].alloc_offset = bg->alloc_offset;
> +
>  	if (zone_info[0].alloc_offset != WP_MISSING_DEV)
>  		bg->alloc_offset = zone_info[0].alloc_offset;
>  	else
> @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
>  		return -EINVAL;
>  	}
>  
> +	for (int i = 0; i < map->num_stripes; i++) {
> +		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
> +			zone_info[i].alloc_offset = bg->alloc_offset;
> +	}
> +
>  	for (int i = 0; i < map->num_stripes; i++) {
>  		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
>  		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
> @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
>  		return -EINVAL;
>  	}
>  
> +	for (int i = 0; i < map->num_stripes; i++) {
> +		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
> +			zone_info[i].alloc_offset = bg->alloc_offset;
> +	}
> +
>  	for (int i = 0; i < map->num_stripes; i++) {
>  		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
>  		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
> @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
>  		} else if (map->num_stripes == num_conventional) {
>  			cache->alloc_offset = last_alloc;
>  			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
> -			goto out;
>  		}
>  	}
>  
> -- 
> 2.43.0
>
Damien Le Moal Feb. 12, 2024, 1:59 a.m. UTC | #2
On 2/9/24 19:46, Johannes Thumshirn wrote:
> On a zoned filesystem with conventional zones, we're skipping the block
> group profile checks for the conventional zones.
> 
> This allows converting a zoned filesystem's data block groups to RAID when
> all of the zones backing the chunk are on conventional zones.  But this
> will lead to problems, once we're trying to allocate chunks backed by
> sequential zones.
> 
> So also check for conventional zones when loading a block group's profile
> on them.
> 
> Reported-by: 韩于惟 <hrx@bupt.moe>

Let's keep using the roman alphabet for names please...

Yuwei,

Not all kernel developers can read Chinese, so please sign your emails with your
name written in roman alphabet.

> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++---
>  1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index d9716456bce0..5beb6b936e61 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
>  		return -EIO;
>  	}
>  
> -	bg->alloc_offset = info->alloc_offset;
> -	bg->zone_capacity = info->capacity;
> +	if (info->alloc_offset != WP_CONVENTIONAL) {
> +		bg->alloc_offset = info->alloc_offset;
> +		bg->zone_capacity = info->capacity;
> +	}
>  	if (test_bit(0, active))
>  		set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
>  	return 0;
> @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
>  		return -EIO;
>  	}
>  
> +	if (zone_info[0].alloc_offset == WP_CONVENTIONAL) {
> +		zone_info[0].alloc_offset = bg->alloc_offset;
> +		zone_info[0].capacity = bg->length;
> +	}
> +
> +	if (zone_info[1].alloc_offset == WP_CONVENTIONAL) {
> +		zone_info[1].alloc_offset = bg->alloc_offset;
> +		zone_info[1].capacity = bg->length;
> +	}
> +
>  	if (test_bit(0, active) != test_bit(1, active)) {
>  		if (!btrfs_zone_activate(bg))
>  			return -EIO;
> @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
>  						 zone_info[1].capacity);
>  	}
>  
> +	if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
> +		zone_info[0].alloc_offset = bg->alloc_offset;
> +
>  	if (zone_info[0].alloc_offset != WP_MISSING_DEV)
>  		bg->alloc_offset = zone_info[0].alloc_offset;
>  	else
> @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
>  		return -EINVAL;
>  	}
>  
> +	for (int i = 0; i < map->num_stripes; i++) {
> +		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
> +			zone_info[i].alloc_offset = bg->alloc_offset;
> +	}
> +
>  	for (int i = 0; i < map->num_stripes; i++) {
>  		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
>  		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
> @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
>  		return -EINVAL;
>  	}
>  
> +	for (int i = 0; i < map->num_stripes; i++) {
> +		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
> +			zone_info[i].alloc_offset = bg->alloc_offset;
> +	}
> +
>  	for (int i = 0; i < map->num_stripes; i++) {
>  		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
>  		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
> @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
>  		} else if (map->num_stripes == num_conventional) {
>  			cache->alloc_offset = last_alloc;
>  			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
> -			goto out;
>  		}
>  	}
>
HAN Yuwei Feb. 12, 2024, 4 a.m. UTC | #3
在 2024/2/12 9:59, Damien Le Moal 写道:
> On 2/9/24 19:46, Johannes Thumshirn wrote:
>> On a zoned filesystem with conventional zones, we're skipping the block
>> group profile checks for the conventional zones.
>>
>> This allows converting a zoned filesystem's data block groups to RAID when
>> all of the zones backing the chunk are on conventional zones.  But this
>> will lead to problems, once we're trying to allocate chunks backed by
>> sequential zones.
>>
>> So also check for conventional zones when loading a block group's profile
>> on them.
>>
>> Reported-by: 韩于惟 <hrx@bupt.moe>
> Let's keep using the roman alphabet for names please...

Updated. Can change to "HAN Yuwei".

>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>> ---
>>   fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++---
>>   1 file changed, 27 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
>> index d9716456bce0..5beb6b936e61 100644
>> --- a/fs/btrfs/zoned.c
>> +++ b/fs/btrfs/zoned.c
>> @@ -1369,8 +1369,10 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
>>   		return -EIO;
>>   	}
>>   
>> -	bg->alloc_offset = info->alloc_offset;
>> -	bg->zone_capacity = info->capacity;
>> +	if (info->alloc_offset != WP_CONVENTIONAL) {
>> +		bg->alloc_offset = info->alloc_offset;
>> +		bg->zone_capacity = info->capacity;
>> +	}
>>   	if (test_bit(0, active))
>>   		set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
>>   	return 0;
>> @@ -1406,6 +1408,16 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
>>   		return -EIO;
>>   	}
>>   
>> +	if (zone_info[0].alloc_offset == WP_CONVENTIONAL) {
>> +		zone_info[0].alloc_offset = bg->alloc_offset;
>> +		zone_info[0].capacity = bg->length;
>> +	}
>> +
>> +	if (zone_info[1].alloc_offset == WP_CONVENTIONAL) {
>> +		zone_info[1].alloc_offset = bg->alloc_offset;
>> +		zone_info[1].capacity = bg->length;
>> +	}
>> +
>>   	if (test_bit(0, active) != test_bit(1, active)) {
>>   		if (!btrfs_zone_activate(bg))
>>   			return -EIO;
>> @@ -1458,6 +1470,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
>>   						 zone_info[1].capacity);
>>   	}
>>   
>> +	if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
>> +		zone_info[0].alloc_offset = bg->alloc_offset;
>> +
>>   	if (zone_info[0].alloc_offset != WP_MISSING_DEV)
>>   		bg->alloc_offset = zone_info[0].alloc_offset;
>>   	else
>> @@ -1479,6 +1494,11 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
>>   		return -EINVAL;
>>   	}
>>   
>> +	for (int i = 0; i < map->num_stripes; i++) {
>> +		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
>> +			zone_info[i].alloc_offset = bg->alloc_offset;
>> +	}
>> +
>>   	for (int i = 0; i < map->num_stripes; i++) {
>>   		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
>>   		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
>> @@ -1511,6 +1531,11 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
>>   		return -EINVAL;
>>   	}
>>   
>> +	for (int i = 0; i < map->num_stripes; i++) {
>> +		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
>> +			zone_info[i].alloc_offset = bg->alloc_offset;
>> +	}
>> +
>>   	for (int i = 0; i < map->num_stripes; i++) {
>>   		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
>>   		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
>> @@ -1605,7 +1630,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
>>   		} else if (map->num_stripes == num_conventional) {
>>   			cache->alloc_offset = last_alloc;
>>   			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
>> -			goto out;
>>   		}
>>   	}
>>
Johannes Thumshirn Feb. 12, 2024, 9:48 a.m. UTC | #4
On 09.02.24 15:29, Naohiro Aota wrote:
> On Fri, Feb 09, 2024 at 02:46:26AM -0800, Johannes Thumshirn wrote:
>> On a zoned filesystem with conventional zones, we're skipping the block
>> group profile checks for the conventional zones.
>>
>> This allows converting a zoned filesystem's data block groups to RAID when
>> all of the zones backing the chunk are on conventional zones.  But this
>> will lead to problems, once we're trying to allocate chunks backed by
>> sequential zones.
>>
>> So also check for conventional zones when loading a block group's profile
>> on them.
>>
>> Reported-by: 韩于惟 <hrx@bupt.moe>
>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>> ---
>>   fs/btrfs/zoned.c | 30 +++++++++++++++++++++++++++---
>>   1 file changed, 27 insertions(+), 3 deletions(-)
> 
> This seems complex than necessary. It is duplicating
> calculate_alloc_pointer() code into the per-profile loading functions. How
> about adding a check equivalent below after the out label?
> 
> 	if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
> 		btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
> 			  btrfs_bg_type_to_raid_name(map->type));
> 		return -EINVAL;
> 	}

OK will do
David Sterba Feb. 12, 2024, 11:26 a.m. UTC | #5
On Fri, Feb 09, 2024 at 02:46:26AM -0800, Johannes Thumshirn wrote:
> On a zoned filesystem with conventional zones, we're skipping the block
> group profile checks for the conventional zones.
> 
> This allows converting a zoned filesystem's data block groups to RAID when
> all of the zones backing the chunk are on conventional zones.  But this
> will lead to problems, once we're trying to allocate chunks backed by
> sequential zones.

I don't remember if or to what extent the seq+zoned devices are
supported. There are many places with btrfs_is_zoned() that does not
distinguish if the sequential area is present. Some of the changes could
work on sequential but this could lead to strange problems, like in this
case.

We could support as much as is practical but now I don't see which use
cases are that.
diff mbox series

Patch

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index d9716456bce0..5beb6b936e61 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1369,8 +1369,10 @@  static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
 		return -EIO;
 	}
 
-	bg->alloc_offset = info->alloc_offset;
-	bg->zone_capacity = info->capacity;
+	if (info->alloc_offset != WP_CONVENTIONAL) {
+		bg->alloc_offset = info->alloc_offset;
+		bg->zone_capacity = info->capacity;
+	}
 	if (test_bit(0, active))
 		set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
 	return 0;
@@ -1406,6 +1408,16 @@  static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
 		return -EIO;
 	}
 
+	if (zone_info[0].alloc_offset == WP_CONVENTIONAL) {
+		zone_info[0].alloc_offset = bg->alloc_offset;
+		zone_info[0].capacity = bg->length;
+	}
+
+	if (zone_info[1].alloc_offset == WP_CONVENTIONAL) {
+		zone_info[1].alloc_offset = bg->alloc_offset;
+		zone_info[1].capacity = bg->length;
+	}
+
 	if (test_bit(0, active) != test_bit(1, active)) {
 		if (!btrfs_zone_activate(bg))
 			return -EIO;
@@ -1458,6 +1470,9 @@  static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
 						 zone_info[1].capacity);
 	}
 
+	if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
+		zone_info[0].alloc_offset = bg->alloc_offset;
+
 	if (zone_info[0].alloc_offset != WP_MISSING_DEV)
 		bg->alloc_offset = zone_info[0].alloc_offset;
 	else
@@ -1479,6 +1494,11 @@  static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
 		return -EINVAL;
 	}
 
+	for (int i = 0; i < map->num_stripes; i++) {
+		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+			zone_info[i].alloc_offset = bg->alloc_offset;
+	}
+
 	for (int i = 0; i < map->num_stripes; i++) {
 		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
 		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
@@ -1511,6 +1531,11 @@  static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
 		return -EINVAL;
 	}
 
+	for (int i = 0; i < map->num_stripes; i++) {
+		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+			zone_info[i].alloc_offset = bg->alloc_offset;
+	}
+
 	for (int i = 0; i < map->num_stripes; i++) {
 		if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
 		    zone_info[i].alloc_offset == WP_CONVENTIONAL)
@@ -1605,7 +1630,6 @@  int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		} else if (map->num_stripes == num_conventional) {
 			cache->alloc_offset = last_alloc;
 			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
-			goto out;
 		}
 	}