diff mbox series

btrfs: zoned: fix zone_unusable accounting on making BG RW again

Message ID 1626ef0d42713eaa6e050a6a64be8a811446ad5a.1720624977.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned: fix zone_unusable accounting on making BG RW again | expand

Commit Message

Naohiro Aota July 10, 2024, 3:23 p.m. UTC
When btrfs makes a block group read-only, it adds all free regions in the
BG to space_info->bytes_readonly. That free space excludes reserved and
pinned regions. OTOH, when btrfs makes the BG read-write again, it moves
all the unused regions into the block group's zone_unusable. That unused
region includes reserved and pinned regions. As a result, it counts too
much zone_unusable bytes.

Fortunately (or unfortunately), having erroneous zone_unusable does not
affect the calculation of space_info->bytes_readonly, because free
space (num_bytes in btrfs_dec_block_group_ro) calculation is done based on
the erroneous zone_unusable and it reduces the num_bytes just to cancel the
error.

This behavior can be easily discovered by adding a WARN_ON to check e.g,
"bg->pinned > 0" in btrfs_dec_block_group_ro(), and running fstests test
case like btrfs/282.

Fix it by properly considering pinned and reserved in
btrfs_dec_block_group_ro(). Also, add a WARN_ON and introduce
btrfs_space_info_update_bytes_zone_unusable() to catch a similar mistake.

Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
CC: stable@vger.kernel.org # 5.15+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/block-group.c       | 13 ++++++++-----
 fs/btrfs/extent-tree.c       |  2 +-
 fs/btrfs/free-space-cache.c  |  4 +++-
 fs/btrfs/space-info.c        |  2 +-
 fs/btrfs/space-info.h        |  1 +
 include/trace/events/btrfs.h |  8 ++++++++
 6 files changed, 22 insertions(+), 8 deletions(-)

Comments

Josef Bacik July 10, 2024, 7:27 p.m. UTC | #1
On Thu, Jul 11, 2024 at 12:23:54AM +0900, Naohiro Aota wrote:
> When btrfs makes a block group read-only, it adds all free regions in the
> BG to space_info->bytes_readonly. That free space excludes reserved and
> pinned regions. OTOH, when btrfs makes the BG read-write again, it moves
> all the unused regions into the block group's zone_unusable. That unused
> region includes reserved and pinned regions. As a result, it counts too
> much zone_unusable bytes.
> 
> Fortunately (or unfortunately), having erroneous zone_unusable does not
> affect the calculation of space_info->bytes_readonly, because free
> space (num_bytes in btrfs_dec_block_group_ro) calculation is done based on
> the erroneous zone_unusable and it reduces the num_bytes just to cancel the
> error.
> 
> This behavior can be easily discovered by adding a WARN_ON to check e.g,
> "bg->pinned > 0" in btrfs_dec_block_group_ro(), and running fstests test
> case like btrfs/282.
> 
> Fix it by properly considering pinned and reserved in
> btrfs_dec_block_group_ro(). Also, add a WARN_ON and introduce
> btrfs_space_info_update_bytes_zone_unusable() to catch a similar mistake.
> 
> Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
> CC: stable@vger.kernel.org # 5.15+
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
Johannes Thumshirn July 11, 2024, 6:14 a.m. UTC | #2
Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 498442d0c216..2e49d978f504 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1223,8 +1223,8 @@  int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	block_group->space_info->total_bytes -= block_group->length;
 	block_group->space_info->bytes_readonly -=
 		(block_group->length - block_group->zone_unusable);
-	block_group->space_info->bytes_zone_unusable -=
-		block_group->zone_unusable;
+	btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info,
+						    -block_group->zone_unusable);
 	block_group->space_info->disk_total -= block_group->length * factor;
 
 	spin_unlock(&block_group->space_info->lock);
@@ -1396,7 +1396,8 @@  static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
 		if (btrfs_is_zoned(cache->fs_info)) {
 			/* Migrate zone_unusable bytes to readonly */
 			sinfo->bytes_readonly += cache->zone_unusable;
-			sinfo->bytes_zone_unusable -= cache->zone_unusable;
+			btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+								    -cache->zone_unusable);
 			cache->zone_unusable = 0;
 		}
 		cache->ro++;
@@ -3056,9 +3057,11 @@  void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
 		if (btrfs_is_zoned(cache->fs_info)) {
 			/* Migrate zone_unusable bytes back */
 			cache->zone_unusable =
-				(cache->alloc_offset - cache->used) +
+				(cache->alloc_offset - cache->used - cache->pinned -
+				 cache->reserved) +
 				(cache->length - cache->zone_capacity);
-			sinfo->bytes_zone_unusable += cache->zone_unusable;
+			btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+								    cache->zone_unusable);
 			sinfo->bytes_readonly -= cache->zone_unusable;
 		}
 		num_bytes = cache->length - cache->reserved -
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d77498e7671c..f2df49b46f35 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2793,7 +2793,7 @@  static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 			readonly = true;
 		} else if (btrfs_is_zoned(fs_info)) {
 			/* Need reset before reusing in a zoned block group */
-			space_info->bytes_zone_unusable += len;
+			btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, len);
 			readonly = true;
 		}
 		spin_unlock(&cache->lock);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f9b7507543a..f5996a43db24 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2723,8 +2723,10 @@  static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
 	 * If the block group is read-only, we should account freed space into
 	 * bytes_readonly.
 	 */
-	if (!block_group->ro)
+	if (!block_group->ro) {
 		block_group->zone_unusable += to_unusable;
+		WARN_ON(block_group->zone_unusable > block_group->length);
+	}
 	spin_unlock(&ctl->tree_lock);
 	if (!used) {
 		spin_lock(&block_group->lock);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 9ac94d3119e8..5227eadcaaa8 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -316,7 +316,7 @@  void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
 	found->bytes_used += block_group->used;
 	found->disk_used += block_group->used * factor;
 	found->bytes_readonly += block_group->bytes_super;
-	found->bytes_zone_unusable += block_group->zone_unusable;
+	btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable);
 	if (block_group->length > 0)
 		found->full = 0;
 	btrfs_try_granting_tickets(info, found);
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 4db8a0267c16..88b44221ce97 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -249,6 +249,7 @@  btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info,		\
 
 DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
 DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
+DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable");
 
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
 void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index eeb56975bee7..de55a555d95b 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2383,6 +2383,14 @@  DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
 	TP_ARGS(fs_info, sinfo, old, diff)
 );
 
+DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable,
+
+	TP_PROTO(const struct btrfs_fs_info *fs_info,
+		 const struct btrfs_space_info *sinfo, u64 old, s64 diff),
+
+	TP_ARGS(fs_info, sinfo, old, diff)
+);
+
 DECLARE_EVENT_CLASS(btrfs_raid56_bio,
 
 	TP_PROTO(const struct btrfs_raid_bio *rbio,