diff mbox series

[v2] btrfs: zoned: fix mounting with conventional zones

Message ID 4c82ad00b9241a4c31ac153a3ca9c2c78fbcd551.1662381459.git.johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series [v2] btrfs: zoned: fix mounting with conventional zones | expand

Commit Message

Johannes Thumshirn Sept. 5, 2022, 12:38 p.m. UTC
Since commit 6a921de58992 ("btrfs: zoned: introduce
space_info->active_total_bytes"), we're only counting the bytes of a
block-group on an active zone as usable for metadata writes. But on a SMR
drive, we don't have active zones and short circuit some of the logic.

This leads to an error on mount, because we cannot reserve space for
metadata writes.

Fix this by also setting the BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE bit in the
block-group's runtime flag if the zone is a conventional zone.

Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes")
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

---
Changes to v1:
* de-duplicate list handling code (Naohiro)
---
 fs/btrfs/zoned.c | 83 ++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 41 deletions(-)

Comments

David Sterba Sept. 5, 2022, 1:29 p.m. UTC | #1
On Mon, Sep 05, 2022 at 05:38:24AM -0700, Johannes Thumshirn wrote:
> Since commit 6a921de58992 ("btrfs: zoned: introduce
> space_info->active_total_bytes"), we're only counting the bytes of a
> block-group on an active zone as usable for metadata writes. But on a SMR
> drive, we don't have active zones and short circuit some of the logic.
> 
> This leads to an error on mount, because we cannot reserve space for
> metadata writes.
> 
> Fix this by also setting the BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE bit in the
> block-group's runtime flag if the zone is a conventional zone.
> 
> Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes")
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> ---
> Changes to v1:
> * de-duplicate list handling code (Naohiro)

Replaced in misc-next, thanks.
David Sterba Sept. 5, 2022, 1:42 p.m. UTC | #2
On Mon, Sep 05, 2022 at 05:38:24AM -0700, Johannes Thumshirn wrote:
> Since commit 6a921de58992 ("btrfs: zoned: introduce
> space_info->active_total_bytes"), we're only counting the bytes of a
> block-group on an active zone as usable for metadata writes. But on a SMR
> drive, we don't have active zones and short circuit some of the logic.
> 
> This leads to an error on mount, because we cannot reserve space for
> metadata writes.
> 
> Fix this by also setting the BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE bit in the
> block-group's runtime flag if the zone is a conventional zone.
> 
> Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes")
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

I'm going to send this with other zoned fixes and there's a minor change
compared to misc-next

> -		ret = calculate_alloc_pointer(cache, &last_alloc);
> -		if (ret || map->num_stripes == num_conventional) {
> -			if (!ret)
> -				cache->alloc_offset = last_alloc;
> -			else
> -				btrfs_err(fs_info,
> -			"zoned: failed to determine allocation offset of bg %llu",
> -					  cache->start);
> +		} else if (map->num_stripes == num_conventional) {
> +			cache->alloc_offset = last_alloc;
> +			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
> +				&cache->runtime_flags);

Changed to

			cache->zone_is_active = 1;

>  			goto out;
>  		}
>  	}
> @@ -1538,10 +1532,17 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
>  		ret = -EIO;
>  	}
>  
> -	if (!ret)
> +	if (!ret) {
>  		cache->meta_write_pointer = cache->alloc_offset + cache->start;
> -
> -	if (ret) {
> +		if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,

Similar here, if (cache->zone_is_active).

> +			     &cache->runtime_flags)) {
> +			btrfs_get_block_group(cache);
> +			spin_lock(&fs_info->zone_active_bgs_lock);
> +			list_add_tail(&cache->active_bg_list,
> +				      &fs_info->zone_active_bgs);
> +			spin_unlock(&fs_info->zone_active_bgs_lock);
> +		}
> +	} else {
diff mbox series

Patch

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index e12c0ca509fb..cd1d40fcc81f 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1187,7 +1187,7 @@  int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
  * offset.
  */
 static int calculate_alloc_pointer(struct btrfs_block_group *cache,
-				   u64 *offset_ret)
+				   u64 *offset_ret, bool new)
 {
 	struct btrfs_fs_info *fs_info = cache->fs_info;
 	struct btrfs_root *root;
@@ -1197,6 +1197,21 @@  static int calculate_alloc_pointer(struct btrfs_block_group *cache,
 	int ret;
 	u64 length;
 
+	/*
+	 * Avoid  tree lookups for a new BG. It has no use for a new BG. It
+	 * must always be 0.
+	 *
+	 * Also, we have a lock chain of extent buffer lock -> chunk mutex.
+	 * For new a BG, this function is called from btrfs_make_block_group()
+	 * which is already taking the chunk mutex. Thus, we cannot call
+	 * calculate_alloc_pointer() which takes extent buffer locks to avoid
+	 * deadlock.
+	 */
+	if (new) {
+		*offset_ret = 0;
+		return 0;
+	}
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -1332,6 +1347,13 @@  int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		else
 			num_conventional++;
 
+		/*
+		 * Consider a zone as active if we can allow any number of
+		 * active zones.
+		 */
+		if (!device->zone_info->max_active_zones)
+			__set_bit(i, active);
+
 		if (!is_sequential) {
 			alloc_offsets[i] = WP_CONVENTIONAL;
 			continue;
@@ -1398,45 +1420,24 @@  int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 			__set_bit(i, active);
 			break;
 		}
-
-		/*
-		 * Consider a zone as active if we can allow any number of
-		 * active zones.
-		 */
-		if (!device->zone_info->max_active_zones)
-			__set_bit(i, active);
 	}
 
 	if (num_sequential > 0)
 		cache->seq_zone = true;
 
 	if (num_conventional > 0) {
-		/*
-		 * Avoid calling calculate_alloc_pointer() for new BG. It
-		 * is no use for new BG. It must be always 0.
-		 *
-		 * Also, we have a lock chain of extent buffer lock ->
-		 * chunk mutex.  For new BG, this function is called from
-		 * btrfs_make_block_group() which is already taking the
-		 * chunk mutex. Thus, we cannot call
-		 * calculate_alloc_pointer() which takes extent buffer
-		 * locks to avoid deadlock.
-		 */
-
 		/* Zone capacity is always zone size in emulation */
 		cache->zone_capacity = cache->length;
-		if (new) {
-			cache->alloc_offset = 0;
+		ret = calculate_alloc_pointer(cache, &last_alloc, new);
+		if (ret) {
+			btrfs_err(fs_info,
+			  "zoned: failed to determine allocation offset of bg %llu",
+				  cache->start);
 			goto out;
-		}
-		ret = calculate_alloc_pointer(cache, &last_alloc);
-		if (ret || map->num_stripes == num_conventional) {
-			if (!ret)
-				cache->alloc_offset = last_alloc;
-			else
-				btrfs_err(fs_info,
-			"zoned: failed to determine allocation offset of bg %llu",
-					  cache->start);
+		} else if (map->num_stripes == num_conventional) {
+			cache->alloc_offset = last_alloc;
+			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
+				&cache->runtime_flags);
 			goto out;
 		}
 	}
@@ -1507,13 +1508,6 @@  int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		goto out;
 	}
 
-	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) {
-		btrfs_get_block_group(cache);
-		spin_lock(&fs_info->zone_active_bgs_lock);
-		list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
-		spin_unlock(&fs_info->zone_active_bgs_lock);
-	}
-
 out:
 	if (cache->alloc_offset > fs_info->zone_size) {
 		btrfs_err(fs_info,
@@ -1538,10 +1532,17 @@  int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		ret = -EIO;
 	}
 
-	if (!ret)
+	if (!ret) {
 		cache->meta_write_pointer = cache->alloc_offset + cache->start;
-
-	if (ret) {
+		if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
+			     &cache->runtime_flags)) {
+			btrfs_get_block_group(cache);
+			spin_lock(&fs_info->zone_active_bgs_lock);
+			list_add_tail(&cache->active_bg_list,
+				      &fs_info->zone_active_bgs);
+			spin_unlock(&fs_info->zone_active_bgs_lock);
+		}
+	} else {
 		kfree(cache->physical_map);
 		cache->physical_map = NULL;
 	}