diff mbox series

[3/3] btrfs-progs: properly handle write error when writing back tree blocks

Message ID 3b4a860cd4d9091def2121e0c67d312a99ae0dca.1663934243.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs-progs: enhance error handling for metadata writeback | expand

Commit Message

Qu Wenruo Sept. 23, 2022, 11:59 a.m. UTC
[BUG]
If we emulate a write error during commit transaction, by setting the
block device read-only, then we can easily have the following crash
using "btrfs check --clear-space-cache v2":

  Opening filesystem to check...
  Checking filesystem on /dev/test/scratch1
  UUID: 5945915b-37f1-4bfa-9f64-684b318b8f73
  Clear free space cache v2
  Error writing to device 1
  kernel-shared/transaction.c:156: __commit_transaction: BUG_ON `ret` triggered, value 1
  ./btrfs(+0x570c9)[0x562ec894f0c9]
  ./btrfs(+0x57167)[0x562ec894f167]
  ./btrfs(__commit_transaction+0x13b)[0x562ec894f7f2]
  ./btrfs(btrfs_commit_transaction+0x214)[0x562ec894fa64]
  ./btrfs(btrfs_clear_free_space_tree+0x177)[0x562ec8941ae6]
  ./btrfs(+0xc8958)[0x562ec89c0958]
  ./btrfs(+0xc9d53)[0x562ec89c1d53]
  ./btrfs(+0x17ec7)[0x562ec890fec7]
  ./btrfs(main+0x12f)[0x562ec8910908]
  /usr/lib/libc.so.6(+0x232d0)[0x7ff917ee82d0]
  /usr/lib/libc.so.6(__libc_start_main+0x8a)[0x7ff917ee838a]
  ./btrfs(_start+0x25)[0x562ec890fdc5]
  Aborted (core dumped)

[CAUSE]
The call trace has shown it's a BUG_ON(), and it's from
__commit_transaction(), which is writing tree blocks back.

[FIX]
The fix is pretty simple, just return error.

In fact we even have an error value check in btrfs_commit_transaction()
just after __commit_transaction() call (although not catching the return
value from it).

And since we're here, also call btrfs_abort_transaction() to prevent
newer transactions from being started.

Now we won't have a full crash:

  Opening filesystem to check...
  Checking filesystem on /dev/test/scratch1
  UUID: 5945915b-37f1-4bfa-9f64-684b318b8f73
  Clear free space cache v2
  Error writing to device 1
  ERROR: failed to write bytenr 30425088 length 16384: Operation not permitted
  ERROR: failed to write tree block 30425088: Operation not permitted
  ERROR: failed to clear free space cache v2: -1
  extent buffer leak: start 30720000 len 16384

Reported-by: Christoph Anton Mitterer <calestyo@scientia.org>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 kernel-shared/extent_io.c   |  2 +-
 kernel-shared/transaction.c | 33 +++++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/kernel-shared/extent_io.c b/kernel-shared/extent_io.c
index f10acc3595c3..3875b8f61242 100644
--- a/kernel-shared/extent_io.c
+++ b/kernel-shared/extent_io.c
@@ -1011,7 +1011,7 @@  int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
 				if (ret < 0) {
 					fprintf(stderr, "Error writing to "
 						"device %d\n", errno);
-					ret = errno;
+					ret = -errno;
 					kfree(multi);
 					return ret;
 				} else {
diff --git a/kernel-shared/transaction.c b/kernel-shared/transaction.c
index 8d74016e21d2..28b1684828ee 100644
--- a/kernel-shared/transaction.c
+++ b/kernel-shared/transaction.c
@@ -153,13 +153,41 @@  again:
 			eb = find_first_extent_buffer(tree, start);
 			BUG_ON(!eb || eb->start != start);
 			ret = write_tree_block(trans, fs_info, eb);
-			BUG_ON(ret);
+			if (ret < 0) {
+				free_extent_buffer(eb);
+				errno = -ret;
+				error("failed to write tree block %llu: %m",
+				      eb->start);
+				goto cleanup;
+			}
 			start += eb->len;
 			clear_extent_buffer_dirty(eb);
 			free_extent_buffer(eb);
 		}
 	}
 	return 0;
+cleanup:
+	/*
+	 * Mark all remaining dirty ebs clean, as they have no chance to be written
+	 * back anymore.
+	 */
+	while (1) {
+		int find_ret;
+
+		find_ret = find_first_extent_bit(tree, 0, &start, &end, EXTENT_DIRTY);
+
+		if (find_ret)
+			break;
+
+		while (start <= end) {
+			eb = find_first_extent_buffer(tree, start);
+			BUG_ON(!eb || eb->start != start);
+			start += eb->len;
+			clear_extent_buffer_dirty(eb);
+			free_extent_buffer(eb);
+		}
+	}
+	return ret;
 }
 
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -219,7 +247,7 @@  commit_tree:
 		if (ret < 0)
 			goto error;
 	}
-	__commit_transaction(trans, root);
+	ret = __commit_transaction(trans, root);
 	if (ret < 0)
 		goto error;
 
@@ -244,6 +272,7 @@  commit_tree:
 	}
 	return ret;
 error:
+	btrfs_abort_transaction(trans, ret);
 	btrfs_destroy_delayed_refs(trans);
 	free(trans);
 	return ret;