diff mbox series

[12/15] btrfs: clean up our handling of refs == 0 in snapshot delete

Message ID ef416b593a77b2b4c4b8faed51390bb3cc36ae1c.1713550368.git.josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series btrfs: snapshot delete cleanups | expand

Commit Message

Josef Bacik April 19, 2024, 6:17 p.m. UTC
In reada we BUG_ON(refs == 0), which could be unkind since we aren't
holding a lock on the extent leaf and thus could get a transient
incorrect answer.  In walk_down_proc we also BUG_ON(refs == 0), which
could happen if we have extent tree corruption.  Change that to return
-EUCLEAN.  In do_walk_down() we catch this case and handle it correctly,
however we return -EIO, which -EUCLEAN is a more appropriate error code.
Finally in walk_up_proc we have the same BUG_ON(refs == 0), so convert
that to proper error handling.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/extent-tree.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

Comments

David Sterba April 24, 2024, 12:23 p.m. UTC | #1
On Fri, Apr 19, 2024 at 02:17:07PM -0400, Josef Bacik wrote:
> In reada we BUG_ON(refs == 0), which could be unkind since we aren't
> holding a lock on the extent leaf and thus could get a transient
> incorrect answer.  In walk_down_proc we also BUG_ON(refs == 0), which
> could happen if we have extent tree corruption.  Change that to return
> -EUCLEAN.  In do_walk_down() we catch this case and handle it correctly,
> however we return -EIO, which -EUCLEAN is a more appropriate error code.
> Finally in walk_up_proc we have the same BUG_ON(refs == 0), so convert
> that to proper error handling.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/extent-tree.c | 22 ++++++++++++++++++----
>  1 file changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 43fe12b073c3..5eb39f405fd5 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -5352,7 +5352,15 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
>  		/* We don't care about errors in readahead. */
>  		if (ret < 0)
>  			continue;
> -		BUG_ON(refs == 0);
> +
> +		/*
> +		 * This could be racey, it's conceivable that we raced and end
> +		 * up with a bogus refs count, if that's the case just skip, if
> +		 * we are actually corrupt we will notice when we look up
> +		 * everything again with our locks.
> +		 */
> +		if (refs == 0)
> +			continue;
>  
>  		/* If we don't need to visit this node don't reada. */
>  		if (!visit_node_for_delete(root, wc, eb, refs, flags, slot))
> @@ -5401,7 +5409,10 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
>  					       NULL);
>  		if (ret)
>  			return ret;
> -		BUG_ON(wc->refs[level] == 0);
> +		if (unlikely(wc->refs[level] == 0)) {
> +			btrfs_err(fs_info, "Missing references.");

Has this message been copied from somewhere? It's quite useless and
could be either dropped completely or at least be more specific about
the error.

> +			return -EUCLEAN;
> +		}
>  	}
>  
>  	if (wc->stage == DROP_REFERENCE) {
> @@ -5665,7 +5676,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
>  
>  	if (unlikely(wc->refs[level - 1] == 0)) {
>  		btrfs_err(fs_info, "Missing references.");

Ah right, so this looks like the source.

> -		ret = -EIO;
> +		ret = -EUCLEAN;
>  		goto out_unlock;
>  	}
>  	wc->lookup_info = 0;
> @@ -5776,7 +5787,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
>  				path->locks[level] = 0;
>  				return ret;
>  			}
> -			BUG_ON(wc->refs[level] == 0);
> +			if (unlikely(wc->refs[level] == 0)) {
> +				btrfs_err(fs_info, "Missing refs.");
> +				return -EUCLEAN;
> +			}
>  			if (wc->refs[level] == 1) {
>  				btrfs_tree_unlock_rw(eb, path->locks[level]);
>  				path->locks[level] = 0;
> -- 
> 2.43.0
>
diff mbox series

Patch

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 43fe12b073c3..5eb39f405fd5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5352,7 +5352,15 @@  static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
 		/* We don't care about errors in readahead. */
 		if (ret < 0)
 			continue;
-		BUG_ON(refs == 0);
+
+		/*
+		 * This could be racey, it's conceivable that we raced and end
+		 * up with a bogus refs count, if that's the case just skip, if
+		 * we are actually corrupt we will notice when we look up
+		 * everything again with our locks.
+		 */
+		if (refs == 0)
+			continue;
 
 		/* If we don't need to visit this node don't reada. */
 		if (!visit_node_for_delete(root, wc, eb, refs, flags, slot))
@@ -5401,7 +5409,10 @@  static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 					       NULL);
 		if (ret)
 			return ret;
-		BUG_ON(wc->refs[level] == 0);
+		if (unlikely(wc->refs[level] == 0)) {
+			btrfs_err(fs_info, "Missing references.");
+			return -EUCLEAN;
+		}
 	}
 
 	if (wc->stage == DROP_REFERENCE) {
@@ -5665,7 +5676,7 @@  static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
 	if (unlikely(wc->refs[level - 1] == 0)) {
 		btrfs_err(fs_info, "Missing references.");
-		ret = -EIO;
+		ret = -EUCLEAN;
 		goto out_unlock;
 	}
 	wc->lookup_info = 0;
@@ -5776,7 +5787,10 @@  static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
 				path->locks[level] = 0;
 				return ret;
 			}
-			BUG_ON(wc->refs[level] == 0);
+			if (unlikely(wc->refs[level] == 0)) {
+				btrfs_err(fs_info, "Missing refs.");
+				return -EUCLEAN;
+			}
 			if (wc->refs[level] == 1) {
 				btrfs_tree_unlock_rw(eb, path->locks[level]);
 				path->locks[level] = 0;