diff mbox series

btrfs: get correct owning_root when dropping snapshot

Message ID 2bd997ea59e43e8f7db0f8fd8c8f3d85d0ff0c06.1697224683.git.josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series btrfs: get correct owning_root when dropping snapshot | expand

Commit Message

Josef Bacik Oct. 13, 2023, 7:18 p.m. UTC
Dave reported a bug where we were aborting the transaction while trying
to cleanup the squota reservation for an extent.

This turned out to be because we're doing btrfs_header_owner(next) in
do_walk_down when we decide to free the block.  However in this code
block we haven't explicitly read next, so it could be stale.  We would
then get whatever garbage happened to be in the pages at this point.

Fix this by saving the owner_root when we do the
btrfs_lookup_extent_info().  We always do this in do_walk_down, it is
how we make the decision of whether or not to delete the block.  This is
cheap because we've already done the extent item lookup at this point,
so it's straightforward to just grab the owner root as well.

Then we can use this when deleting the metadata block without needing to
force a read of the extent buffer to find the owner.

This fixes the problem that Dave reported.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/ctree.c       |  2 +-
 fs/btrfs/extent-tree.c | 25 +++++++++++++++++--------
 fs/btrfs/extent-tree.h |  3 ++-
 3 files changed, 20 insertions(+), 10 deletions(-)

Comments

Filipe Manana Oct. 19, 2023, 4:20 p.m. UTC | #1
On Fri, Oct 13, 2023 at 8:18 PM Josef Bacik <josef@toxicpanda.com> wrote:
>
> Dave reported a bug where we were aborting the transaction while trying
> to cleanup the squota reservation for an extent.
>
> This turned out to be because we're doing btrfs_header_owner(next) in
> do_walk_down when we decide to free the block.  However in this code
> block we haven't explicitly read next, so it could be stale.  We would
> then get whatever garbage happened to be in the pages at this point.
>
> Fix this by saving the owner_root when we do the
> btrfs_lookup_extent_info().  We always do this in do_walk_down, it is
> how we make the decision of whether or not to delete the block.  This is
> cheap because we've already done the extent item lookup at this point,
> so it's straightforward to just grab the owner root as well.
>
> Then we can use this when deleting the metadata block without needing to
> force a read of the extent buffer to find the owner.
>
> This fixes the problem that Dave reported.
>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>

So this fixes "btrfs: track owning root in btrfs_ref" (currently
457cb1ddf5e8d895e9c551cad6b84bafae41f32c in misc-next).
Shouldn't that be mentioned somewhere?

Otherwise it looks good to me.

Reviewed-by: Filipe Manana <fdmanana@suse.com>


> ---
>  fs/btrfs/ctree.c       |  2 +-
>  fs/btrfs/extent-tree.c | 25 +++++++++++++++++--------
>  fs/btrfs/extent-tree.h |  3 ++-
>  3 files changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index c0c5f2239820..14cefeaf9622 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -421,7 +421,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
>         if (btrfs_block_can_be_shared(root, buf)) {
>                 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
>                                                btrfs_header_level(buf), 1,
> -                                              &refs, &flags);
> +                                              &refs, &flags, NULL);
>                 if (ret)
>                         return ret;
>                 if (unlikely(refs == 0)) {
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index c8e5b4715b49..0455935ff558 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -102,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
>   */
>  int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
>                              struct btrfs_fs_info *fs_info, u64 bytenr,
> -                            u64 offset, int metadata, u64 *refs, u64 *flags)
> +                            u64 offset, int metadata, u64 *refs, u64 *flags,
> +                            u64 *owning_root)
>  {
>         struct btrfs_root *extent_root;
>         struct btrfs_delayed_ref_head *head;
> @@ -114,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
>         u32 item_size;
>         u64 num_refs;
>         u64 extent_flags;
> +       u64 owner = 0;
>         int ret;
>
>         /*
> @@ -167,6 +169,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
>                                             struct btrfs_extent_item);
>                         num_refs = btrfs_extent_refs(leaf, ei);
>                         extent_flags = btrfs_extent_flags(leaf, ei);
> +                       owner = btrfs_get_extent_owner_root(fs_info, leaf,
> +                                                           path->slots[0]);
>                 } else {
>                         ret = -EUCLEAN;
>                         btrfs_err(fs_info,
> @@ -226,6 +230,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
>                 *refs = num_refs;
>         if (flags)
>                 *flags = extent_flags;
> +       if (owning_root)
> +               *owning_root = owner;
>  out_free:
>         btrfs_free_path(path);
>         return ret;
> @@ -5234,7 +5240,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
>                 /* We don't lock the tree block, it's OK to be racy here */
>                 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
>                                                wc->level - 1, 1, &refs,
> -                                              &flags);
> +                                              &flags, NULL);
>                 /* We don't care about errors in readahead. */
>                 if (ret < 0)
>                         continue;
> @@ -5301,7 +5307,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
>                 ret = btrfs_lookup_extent_info(trans, fs_info,
>                                                eb->start, level, 1,
>                                                &wc->refs[level],
> -                                              &wc->flags[level]);
> +                                              &wc->flags[level],
> +                                              NULL);
>                 BUG_ON(ret == -ENOMEM);
>                 if (ret)
>                         return ret;
> @@ -5391,6 +5398,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
>         u64 bytenr;
>         u64 generation;
>         u64 parent;
> +       u64 owner_root = 0;
>         struct btrfs_tree_parent_check check = { 0 };
>         struct btrfs_key key;
>         struct btrfs_ref ref = { 0 };
> @@ -5434,7 +5442,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
>
>         ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
>                                        &wc->refs[level - 1],
> -                                      &wc->flags[level - 1]);
> +                                      &wc->flags[level - 1],
> +                                      &owner_root);
>         if (ret < 0)
>                 goto out_unlock;
>
> @@ -5567,8 +5576,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
>                 find_next_key(path, level, &wc->drop_progress);
>
>                 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
> -                                      fs_info->nodesize, parent,
> -                                      btrfs_header_owner(next));
> +                                      fs_info->nodesize, parent, owner_root);
>                 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
>                                     0, false);
>                 ret = btrfs_free_extent(trans, &ref);
> @@ -5635,7 +5643,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
>                         ret = btrfs_lookup_extent_info(trans, fs_info,
>                                                        eb->start, level, 1,
>                                                        &wc->refs[level],
> -                                                      &wc->flags[level]);
> +                                                      &wc->flags[level],
> +                                                      NULL);
>                         if (ret < 0) {
>                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
>                                 path->locks[level] = 0;
> @@ -5880,7 +5889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
>                         ret = btrfs_lookup_extent_info(trans, fs_info,
>                                                 path->nodes[level]->start,
>                                                 level, 1, &wc->refs[level],
> -                                               &wc->flags[level]);
> +                                               &wc->flags[level], NULL);
>                         if (ret < 0) {
>                                 err = ret;
>                                 goto out_end_trans;
> diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
> index 0716f65d9753..2e066035ccee 100644
> --- a/fs/btrfs/extent-tree.h
> +++ b/fs/btrfs/extent-tree.h
> @@ -99,7 +99,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
>  int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
>  int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
>                              struct btrfs_fs_info *fs_info, u64 bytenr,
> -                            u64 offset, int metadata, u64 *refs, u64 *flags);
> +                            u64 offset, int metadata, u64 *refs, u64 *flags,
> +                            u64 *owner_root);
>  int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
>                      int reserved);
>  int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
> --
> 2.41.0
>
David Sterba Oct. 23, 2023, 2:52 p.m. UTC | #2
On Fri, Oct 13, 2023 at 03:18:17PM -0400, Josef Bacik wrote:
> Dave reported a bug where we were aborting the transaction while trying
> to cleanup the squota reservation for an extent.
> 
> This turned out to be because we're doing btrfs_header_owner(next) in
> do_walk_down when we decide to free the block.  However in this code
> block we haven't explicitly read next, so it could be stale.  We would
> then get whatever garbage happened to be in the pages at this point.
> 
> Fix this by saving the owner_root when we do the
> btrfs_lookup_extent_info().  We always do this in do_walk_down, it is
> how we make the decision of whether or not to delete the block.  This is
> cheap because we've already done the extent item lookup at this point,
> so it's straightforward to just grab the owner root as well.
> 
> Then we can use this when deleting the metadata block without needing to
> force a read of the extent buffer to find the owner.
> 
> This fixes the problem that Dave reported.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>

Added to misc-next with the note about the commit that introduced it.
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c0c5f2239820..14cefeaf9622 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,7 +421,7 @@  static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 	if (btrfs_block_can_be_shared(root, buf)) {
 		ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
 					       btrfs_header_level(buf), 1,
-					       &refs, &flags);
+					       &refs, &flags, NULL);
 		if (ret)
 			return ret;
 		if (unlikely(refs == 0)) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c8e5b4715b49..0455935ff558 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -102,7 +102,8 @@  int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
  */
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info, u64 bytenr,
-			     u64 offset, int metadata, u64 *refs, u64 *flags)
+			     u64 offset, int metadata, u64 *refs, u64 *flags,
+			     u64 *owning_root)
 {
 	struct btrfs_root *extent_root;
 	struct btrfs_delayed_ref_head *head;
@@ -114,6 +115,7 @@  int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 	u32 item_size;
 	u64 num_refs;
 	u64 extent_flags;
+	u64 owner = 0;
 	int ret;
 
 	/*
@@ -167,6 +169,8 @@  int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 					    struct btrfs_extent_item);
 			num_refs = btrfs_extent_refs(leaf, ei);
 			extent_flags = btrfs_extent_flags(leaf, ei);
+			owner = btrfs_get_extent_owner_root(fs_info, leaf,
+							    path->slots[0]);
 		} else {
 			ret = -EUCLEAN;
 			btrfs_err(fs_info,
@@ -226,6 +230,8 @@  int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 		*refs = num_refs;
 	if (flags)
 		*flags = extent_flags;
+	if (owning_root)
+		*owning_root = owner;
 out_free:
 	btrfs_free_path(path);
 	return ret;
@@ -5234,7 +5240,7 @@  static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
 		/* We don't lock the tree block, it's OK to be racy here */
 		ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
 					       wc->level - 1, 1, &refs,
-					       &flags);
+					       &flags, NULL);
 		/* We don't care about errors in readahead. */
 		if (ret < 0)
 			continue;
@@ -5301,7 +5307,8 @@  static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 		ret = btrfs_lookup_extent_info(trans, fs_info,
 					       eb->start, level, 1,
 					       &wc->refs[level],
-					       &wc->flags[level]);
+					       &wc->flags[level],
+					       NULL);
 		BUG_ON(ret == -ENOMEM);
 		if (ret)
 			return ret;
@@ -5391,6 +5398,7 @@  static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	u64 bytenr;
 	u64 generation;
 	u64 parent;
+	u64 owner_root = 0;
 	struct btrfs_tree_parent_check check = { 0 };
 	struct btrfs_key key;
 	struct btrfs_ref ref = { 0 };
@@ -5434,7 +5442,8 @@  static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
 	ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
 				       &wc->refs[level - 1],
-				       &wc->flags[level - 1]);
+				       &wc->flags[level - 1],
+				       &owner_root);
 	if (ret < 0)
 		goto out_unlock;
 
@@ -5567,8 +5576,7 @@  static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 		find_next_key(path, level, &wc->drop_progress);
 
 		btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
-				       fs_info->nodesize, parent,
-				       btrfs_header_owner(next));
+				       fs_info->nodesize, parent, owner_root);
 		btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
 				    0, false);
 		ret = btrfs_free_extent(trans, &ref);
@@ -5635,7 +5643,8 @@  static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
 			ret = btrfs_lookup_extent_info(trans, fs_info,
 						       eb->start, level, 1,
 						       &wc->refs[level],
-						       &wc->flags[level]);
+						       &wc->flags[level],
+						       NULL);
 			if (ret < 0) {
 				btrfs_tree_unlock_rw(eb, path->locks[level]);
 				path->locks[level] = 0;
@@ -5880,7 +5889,7 @@  int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
 			ret = btrfs_lookup_extent_info(trans, fs_info,
 						path->nodes[level]->start,
 						level, 1, &wc->refs[level],
-						&wc->flags[level]);
+						&wc->flags[level], NULL);
 			if (ret < 0) {
 				err = ret;
 				goto out_end_trans;
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 0716f65d9753..2e066035ccee 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -99,7 +99,8 @@  u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info, u64 bytenr,
-			     u64 offset, int metadata, u64 *refs, u64 *flags);
+			     u64 offset, int metadata, u64 *refs, u64 *flags,
+			     u64 *owner_root);
 int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
 		     int reserved);
 int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,