diff mbox series

[6/6] btrfs: clone relocation checksums in btrfs_alloc_ordered_extent

Message ID 20230724142243.5742-7-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/6] btrfs: fix error handling when in a COW window in run_delalloc_nocow | expand

Commit Message

Christoph Hellwig July 24, 2023, 2:22 p.m. UTC
Error handling for btrfs_reloc_clone_csums is a royal pain.  That is
not because btrfs_reloc_clone_csums does something particularly nasty:
the only failure can come from looking up the csums in the csum tree -
instead it is because btrfs_reloc_clone_csums is called after
btrfs_alloc_ordered_extent because it adds those founds csums to the
list in the ordred extent, and cleaning up after an ordered extent
has been added to the lookup data structures is very cumbersome.

Fix this by calling btrfs_reloc_clone_csums in
btrfs_alloc_ordered_extent after allocting the ordered extents, but
before making it reachable by the wider word and thus bypassing the
complex ordered extent removal path entirely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/inode.c        | 44 -----------------------------------------
 fs/btrfs/ordered-data.c | 24 ++++++++++++++++++++--
 2 files changed, 22 insertions(+), 46 deletions(-)

Comments

David Sterba Aug. 10, 2023, 5:07 p.m. UTC | #1
On Mon, Jul 24, 2023 at 07:22:43AM -0700, Christoph Hellwig wrote:
> Error handling for btrfs_reloc_clone_csums is a royal pain.  That is
> not because btrfs_reloc_clone_csums does something particularly nasty:
> the only failure can come from looking up the csums in the csum tree -
> instead it is because btrfs_reloc_clone_csums is called after
> btrfs_alloc_ordered_extent because it adds those founds csums to the
> list in the ordred extent, and cleaning up after an ordered extent
> has been added to the lookup data structures is very cumbersome.
> 
> Fix this by calling btrfs_reloc_clone_csums in
> btrfs_alloc_ordered_extent after allocting the ordered extents, but
> before making it reachable by the wider word and thus bypassing the
> complex ordered extent removal path entirely.

Please rephrase the changelog and fix the typos. Relocation code is
difficult and nobody wants to break it so there are things to fix but
instead of venting I'd rather see a more concise description that helps
to understand the problem and solution. We don't have much code
documentation and the efforts put into writing good changelogs for such
areas of code is worthwhile. Thanks.
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index caaf2c002d795d..7864cae3ee2094 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1435,26 +1435,6 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 			ret = PTR_ERR(ordered);
 			goto out_drop_extent_cache;
 		}
-
-		if (btrfs_is_data_reloc_root(root)) {
-			ret = btrfs_reloc_clone_csums(ordered);
-
-			/*
-			 * Only drop cache here, and process as normal.
-			 *
-			 * We must not allow extent_clear_unlock_delalloc()
-			 * at out_unlock label to free meta of this ordered
-			 * extent, as its meta should be freed by
-			 * btrfs_finish_ordered_io().
-			 *
-			 * So we must continue until @start is increased to
-			 * skip current ordered extent.
-			 */
-			if (ret)
-				btrfs_drop_extent_map_range(inode, start,
-							    start + ram_size - 1,
-							    false);
-		}
 		btrfs_put_ordered_extent(ordered);
 
 		btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1481,14 +1461,6 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 		alloc_hint = ins.objectid + ins.offset;
 		start += cur_alloc_size;
 		extent_reserved = false;
-
-		/*
-		 * btrfs_reloc_clone_csums() error, since start is increased
-		 * extent_clear_unlock_delalloc() at out_unlock label won't
-		 * free metadata of current ordered extent, we're OK to exit.
-		 */
-		if (ret)
-			goto out_unlock;
 	}
 done:
 	if (done_offset)
@@ -2171,14 +2143,6 @@  static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 			ret = PTR_ERR(ordered);
 			goto error;
 		}
-
-		if (btrfs_is_data_reloc_root(root))
-			/*
-			 * Error handled later, as we must prevent
-			 * extent_clear_unlock_delalloc() in error handler
-			 * from freeing metadata of created ordered extent.
-			 */
-			ret = btrfs_reloc_clone_csums(ordered);
 		btrfs_put_ordered_extent(ordered);
 
 		extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
@@ -2188,14 +2152,6 @@  static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 					     PAGE_UNLOCK | PAGE_SET_ORDERED);
 
 		cur_offset = nocow_end + 1;
-
-		/*
-		 * btrfs_reloc_clone_csums() error, now we're OK to call error
-		 * handler, as metadata for created ordered extent will only
-		 * be freed by btrfs_finish_ordered_io().
-		 */
-		if (ret)
-			goto error;
 		if (cur_offset > end)
 			break;
 	}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 109e80ed25b669..caa5dbf48db572 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -18,6 +18,7 @@ 
 #include "delalloc-space.h"
 #include "qgroup.h"
 #include "subpage.h"
+#include "relocation.h"
 #include "file.h"
 #include "super.h"
 
@@ -274,8 +275,27 @@  struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
 	entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
 				     disk_bytenr, disk_num_bytes, offset, flags,
 				     compress_type);
-	if (!IS_ERR(entry))
-		insert_ordered_extent(entry);
+	if (IS_ERR(entry))
+		return entry;
+
+	/*
+	 * Writes to relocation roots are special, and clones the existing csums
+	 * from the csum tree instead of calculating them.
+	 *
+	 * Clone the csums here so that the ordered extent never gets inserted
+	 * into the per-inode ordered extent tree and per-root list on failure.
+	 */
+	if (btrfs_is_data_reloc_root(inode->root)) {
+		int ret;
+
+		ret = btrfs_reloc_clone_csums(entry);
+		if (ret) {
+			kmem_cache_free(btrfs_ordered_extent_cache, entry);
+			return ERR_PTR(ret);
+		}
+	}
+
+	insert_ordered_extent(entry);
 	return entry;
 }