@@ -1011,7 +1011,7 @@ int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
if (ret < 0) {
fprintf(stderr, "Error writing to "
"device %d\n", errno);
- ret = errno;
+ ret = -errno;
kfree(multi);
return ret;
} else {
@@ -153,13 +153,41 @@ again:
eb = find_first_extent_buffer(tree, start);
BUG_ON(!eb || eb->start != start);
ret = write_tree_block(trans, fs_info, eb);
- BUG_ON(ret);
+ if (ret < 0) {
+ free_extent_buffer(eb);
+ errno = -ret;
+ error("failed to write tree block %llu: %m",
+ eb->start);
+ goto cleanup;
+ }
start += eb->len;
clear_extent_buffer_dirty(eb);
free_extent_buffer(eb);
}
}
return 0;
+cleanup:
+ /*
+ * Mark all remaining dirty ebs clean, as they have no chance to be written
+ * back anymore.
+ */
+ while (1) {
+ int find_ret;
+
+ find_ret = find_first_extent_bit(tree, 0, &start, &end, EXTENT_DIRTY);
+
+ if (find_ret)
+ break;
+
+ while (start <= end) {
+ eb = find_first_extent_buffer(tree, start);
+ BUG_ON(!eb || eb->start != start);
+ start += eb->len;
+ clear_extent_buffer_dirty(eb);
+ free_extent_buffer(eb);
+ }
+ }
+ return ret;
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -219,7 +247,7 @@ commit_tree:
if (ret < 0)
goto error;
}
- __commit_transaction(trans, root);
+ ret = __commit_transaction(trans, root);
if (ret < 0)
goto error;
@@ -244,6 +272,7 @@ commit_tree:
}
return ret;
error:
+ btrfs_abort_transaction(trans, ret);
btrfs_destroy_delayed_refs(trans);
free(trans);
return ret;
[BUG] If we emulate a write error during commit transaction, by setting the block device read-only, then we can easily have the following crash using "btrfs check --clear-space-cache v2": Opening filesystem to check... Checking filesystem on /dev/test/scratch1 UUID: 5945915b-37f1-4bfa-9f64-684b318b8f73 Clear free space cache v2 Error writing to device 1 kernel-shared/transaction.c:156: __commit_transaction: BUG_ON `ret` triggered, value 1 ./btrfs(+0x570c9)[0x562ec894f0c9] ./btrfs(+0x57167)[0x562ec894f167] ./btrfs(__commit_transaction+0x13b)[0x562ec894f7f2] ./btrfs(btrfs_commit_transaction+0x214)[0x562ec894fa64] ./btrfs(btrfs_clear_free_space_tree+0x177)[0x562ec8941ae6] ./btrfs(+0xc8958)[0x562ec89c0958] ./btrfs(+0xc9d53)[0x562ec89c1d53] ./btrfs(+0x17ec7)[0x562ec890fec7] ./btrfs(main+0x12f)[0x562ec8910908] /usr/lib/libc.so.6(+0x232d0)[0x7ff917ee82d0] /usr/lib/libc.so.6(__libc_start_main+0x8a)[0x7ff917ee838a] ./btrfs(_start+0x25)[0x562ec890fdc5] Aborted (core dumped) [CAUSE] The call trace has shown it's a BUG_ON(), and it's from __commit_transaction(), which is writing tree blocks back. [FIX] The fix is pretty simple, just return error. In fact we even have an error value check in btrfs_commit_transaction() just after __commit_transaction() call (although not catching the return value from it). And since we're here, also call btrfs_abort_transaction() to prevent newer transactions from being started. Now we won't have a full crash: Opening filesystem to check... Checking filesystem on /dev/test/scratch1 UUID: 5945915b-37f1-4bfa-9f64-684b318b8f73 Clear free space cache v2 Error writing to device 1 ERROR: failed to write bytenr 30425088 length 16384: Operation not permitted ERROR: failed to write tree block 30425088: Operation not permitted ERROR: failed to clear free space cache v2: -1 extent buffer leak: start 30720000 len 16384 Reported-by: Christoph Anton Mitterer <calestyo@scientia.org> Signed-off-by: Qu Wenruo <wqu@suse.com> --- kernel-shared/extent_io.c | 2 +- kernel-shared/transaction.c | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-)