@@ -6848,6 +6848,53 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
return 0;
}
+/*
+ * Real work happens here to drop one or more refs of @node.
+ *
+ * The work is mostly done in two parts:
+ * 1. Locate the extent refs.
+ * It's either inlined in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
+ * Locate it then reduces the refs number or remove the ref line completely.
+ *
+ * 2. Update the refs count in EXTENT/METADATA_ITEM
+ *
+ * Due to the above two operations and possible optimizations, the function
+ * is a little hard to read, but with the following examples, the result
+ * of this function should be pretty easy to get.
+ *
+ * Example:
+ * *** Inlined backref case ***
+ * In extent tree we have:
+ * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
+ * refs 2 gen 6 flags DATA
+ * extent data backref root FS_TREE objectid 258 offset 0 count 1
+ * extent data backref root FS_TREE objectid 257 offset 0 count 1
+ *
+ * This function get called with
+ * node->bytenr = 13631488, node->num_bytes = 1048576
+ * root_objectid = FS_TREE, owner_objectid = 257, owner_offset = 0,
+ * refs_to_drop = 1
+ * Then we should get some like:
+ * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
+ * refs 1 gen 6 flags DATA
+ * extent data backref root FS_TREE objectid 258 offset 0 count 1
+ *
+ * *** Keyed backref case ***
+ * In extent tree we have:
+ * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
+ * refs 754 gen 6 flags DATA
+ * [...]
+ * item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
+ * extent data backref root FS_TREE objectid 866 offset 0 count 1
+ * This function get called with
+ * node->bytenr = 13631488, node->num_bytes = 1048576
+ * root_objectid = FS_TREE, owner_objectid = 866, owner_offset = 0,
+ * refs_to_drop = 1
+ * Then we should get some like:
+ * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
+ * refs 753 gen 6 flags DATA
+ * And that (13631488 EXTENT_DATA_REF <HASH>) get removed.
+ */
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node, u64 parent,
u64 root_objectid, u64 owner_objectid,
@@ -6881,7 +6928,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
path->leave_spinning = 1;
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
- BUG_ON(!is_data && refs_to_drop != 1);
+
+ if (unlikely(!is_data && refs_to_drop != 1)) {
+ btrfs_crit(info,
+"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
+ node->bytenr, refs_to_drop);
+ ret = -EINVAL;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
if (is_data)
skinny_metadata = false;
@@ -6890,6 +6945,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
parent, root_objectid, owner_objectid,
owner_offset);
if (ret == 0) {
+ /*
+ * Either the inline backref or the SHARED_DATA_REF/
+ * SHARED_BLOCK_REF is found
+ *
+ * Here is a quick path to locate EXTENT/METADATA_ITEM.
+ * It's possible the EXTENT/METADATA_ITEM is near current slot.
+ */
extent_slot = path->slots[0];
while (extent_slot >= 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key,
@@ -6906,13 +6968,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
found_extent = 1;
break;
}
+
+ /* Quick path didn't find the EXTEMT/METADATA_ITEM */
if (path->slots[0] - extent_slot > 5)
break;
extent_slot--;
}
if (!found_extent) {
- BUG_ON(iref);
+ if (unlikely(iref)) {
+ btrfs_crit(info,
+"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
+ goto err_dump_abort;
+ }
+ /* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, path, NULL,
refs_to_drop,
is_data, &last_ref);
@@ -6923,6 +6992,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
path->leave_spinning = 1;
+
+ /* Slow path to locate EXTENT/METADATA_ITEM */
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
@@ -6997,19 +7068,24 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *bi;
- BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
+ if (unlikely(item_size < sizeof(*ei) + sizeof(*bi))) {
+ btrfs_crit(info,
+"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %lu",
+ key.objectid, key.type, key.offset,
+ owner_objectid, item_size,
+ sizeof(*ei) + sizeof(*bi));
+ goto err_dump_abort;
+ }
bi = (struct btrfs_tree_block_info *)(ei + 1);
WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
}
refs = btrfs_extent_refs(leaf, ei);
if (refs < refs_to_drop) {
- btrfs_err(info,
- "trying to drop %d refs but we only have %Lu for bytenr %Lu",
+ btrfs_crit(info,
+ "trying to drop %d refs but we only have %Lu for bytenr %Lu",
refs_to_drop, refs, bytenr);
- ret = -EINVAL;
- btrfs_abort_transaction(trans, ret);
- goto out;
+ goto err_dump_abort;
}
refs -= refs_to_drop;
@@ -7021,7 +7097,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* be updated by remove_extent_backref
*/
if (iref) {
- BUG_ON(!found_extent);
+ if (unlikely(!found_extent)) {
+ btrfs_crit(info,
+"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
+ goto err_dump_abort;
+ }
} else {
btrfs_set_extent_refs(leaf, ei, refs);
btrfs_mark_buffer_dirty(leaf);
@@ -7036,13 +7116,36 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
} else {
+ /* In this branch refs == 1 */
if (found_extent) {
- BUG_ON(is_data && refs_to_drop !=
- extent_data_ref_count(path, iref));
+ if (is_data && refs_to_drop !=
+ extent_data_ref_count(path, iref)) {
+ btrfs_crit(info,
+ "invalid refs_to_drop, current refs %u refs_to_drop %u",
+ extent_data_ref_count(path, iref),
+ refs_to_drop);
+ goto err_dump_abort;
+ }
if (iref) {
- BUG_ON(path->slots[0] != extent_slot);
+ if (path->slots[0] != extent_slot) {
+ btrfs_crit(info,
+"invalid iref, extent item key (%llu %u %llu) doesn't has wanted iref",
+ key.objectid, key.type,
+ key.offset);
+ goto err_dump_abort;
+ }
} else {
- BUG_ON(path->slots[0] != extent_slot + 1);
+ /*
+ * No inline ref, we must at SHARED_* item,
+ * And it's single ref, it must be:
+ * | extent_slot ||extent_slot + 1|
+ * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
+ */
+ if (path->slots[0] != extent_slot + 1) {
+ btrfs_crit(info,
+ "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
+ goto err_dump_abort;
+ }
path->slots[0] = extent_slot;
num_to_del = 2;
}
@@ -7082,6 +7185,21 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
return ret;
+err_dump_abort:
+ /*
+ * Leaf dump can take up a lot of dmesg buffer since default nodesize
+ * is already 16K.
+ * So we only do full leaf dump for debug build.
+ */
+ if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
+ btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
+ path->slots[0], extent_slot);
+ btrfs_print_leaf(path->nodes[0]);
+ }
+
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ btrfs_free_path(path);
+ return -EUCLEAN;
}
/*
__btrfs_free_extent() is one of the best cases to show how optimization could make a function hard to read. In fact __btrfs_free_extent() is only doing two major works: 1. Reduce the refs number of an extent backref Either it's an inlined extent backref (inside EXTENT/METADATA item) or a keyed extent backref (SHARED_* item). We only need to locate that backref line, either reduce the number or remove the backref line completely. 2. Update the refs count in EXTENT/METADATA_ITEM But in real world, we do it in a complex but somewhat efficient way. During step 1), we will try to locate the EXTENT/METADATA_ITEM without triggering another btrfs_search_slot() as fast path. Only when we failed to locate that item, we will trigger another btrfs_search_slot() to get that EXTENT/METADATA_ITEM after we updated/deleted the backref line. And we have a lot of restrict check on things like refs_to_drop against extent refs and special case check for single ref extent. All of these results: - 7 BUG_ON()s in a single function Although all these BUG_ON() are doing correct check, they're super easy to get triggered for fuzzed images. It's never a good idea to piss the end user. - Near 300 lines without much useful comments but a lot of hidden conditions I believe even the author needs several minutes to recall what the code is doing Not to mention a lot of BUG_ON() conditions needs to go back tens of lines to find out why. This patch address all these problems by: - Introduce two examples to show what __btrfs_free_extent() is doing One inlined backref case and one keyed case. Should cover most cases. - Kill all BUG_ON()s with proper error message and optional leaf dump - Add comment to show the overall workflow Link: https://bugzilla.kernel.org/show_bug.cgi?id=202819 [ The report triggers one BUG_ON() in __btrfs_free_extent() ] Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/extent-tree.c | 144 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 131 insertions(+), 13 deletions(-)