diff mbox series

btrfs: make sure that WRITTEN is set on all metadata blocks

Message ID d82bd6cef76e7beaa0d33ef48f9292f3779d015c.1714395805.git.josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series btrfs: make sure that WRITTEN is set on all metadata blocks | expand

Commit Message

Josef Bacik April 29, 2024, 1:03 p.m. UTC
We previously would call btrfs_check_leaf() if we had the check
integrity code enabled, which meant that we could only run the extended
leaf checks if we had WRITTEN set on the header flags.

This leaves a gap in our checking, because we could end up with
corruption on disk where WRITTEN isn't set on the leaf, and then the
extended leaf checks don't get run which we rely on to validate all of
the item pointers to make sure we don't access memory outside of the
extent buffer.

However, since 732fab95abe2 ("btrfs: check-integrity: remove
CONFIG_BTRFS_FS_CHECK_INTEGRITY option") we no longer call
btrfs_check_leaf() from btrfs_mark_buffer_dirty(), which means we only
ever call it on blocks that are being written out, and thus have WRITTEN
set, or that are being read in, which should have WRITTEN set.

Add checks to make sure we have WRITTEN set appropriately, and then make
sure __btrfs_check_leaf() always does the item checking.  This will
protect us from file systems that have been corrupted and no longer have
WRITTEN set on some of the blocks.

Reported-by: lei lu <llfamsec@gmail.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/tree-checker.c | 30 +++++++++++++++---------------
 fs/btrfs/tree-checker.h |  1 +
 2 files changed, 16 insertions(+), 15 deletions(-)

Comments

David Sterba April 30, 2024, 8:37 p.m. UTC | #1
On Mon, Apr 29, 2024 at 09:03:35AM -0400, Josef Bacik wrote:
> We previously would call btrfs_check_leaf() if we had the check
> integrity code enabled, which meant that we could only run the extended
> leaf checks if we had WRITTEN set on the header flags.
> 
> This leaves a gap in our checking, because we could end up with
> corruption on disk where WRITTEN isn't set on the leaf, and then the
> extended leaf checks don't get run which we rely on to validate all of
> the item pointers to make sure we don't access memory outside of the
> extent buffer.
> 
> However, since 732fab95abe2 ("btrfs: check-integrity: remove
> CONFIG_BTRFS_FS_CHECK_INTEGRITY option") we no longer call
> btrfs_check_leaf() from btrfs_mark_buffer_dirty(), which means we only
> ever call it on blocks that are being written out, and thus have WRITTEN
> set, or that are being read in, which should have WRITTEN set.
> 
> Add checks to make sure we have WRITTEN set appropriately, and then make
> sure __btrfs_check_leaf() always does the item checking.  This will
> protect us from file systems that have been corrupted and no longer have
> WRITTEN set on some of the blocks.
> 
> Reported-by: lei lu <llfamsec@gmail.com>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>

Reviewed-by: David Sterba <dsterba@suse.com>
Qu Wenruo May 1, 2024, 9:45 p.m. UTC | #2
在 2024/4/29 22:33, Josef Bacik 写道:
> We previously would call btrfs_check_leaf() if we had the check
> integrity code enabled, which meant that we could only run the extended
> leaf checks if we had WRITTEN set on the header flags.
>
> This leaves a gap in our checking, because we could end up with
> corruption on disk where WRITTEN isn't set on the leaf, and then the
> extended leaf checks don't get run which we rely on to validate all of
> the item pointers to make sure we don't access memory outside of the
> extent buffer.
>
> However, since 732fab95abe2 ("btrfs: check-integrity: remove
> CONFIG_BTRFS_FS_CHECK_INTEGRITY option") we no longer call
> btrfs_check_leaf() from btrfs_mark_buffer_dirty(), which means we only
> ever call it on blocks that are being written out, and thus have WRITTEN
> set, or that are being read in, which should have WRITTEN set.
>
> Add checks to make sure we have WRITTEN set appropriately, and then make
> sure __btrfs_check_leaf() always does the item checking.  This will
> protect us from file systems that have been corrupted and no longer have
> WRITTEN set on some of the blocks.
>
> Reported-by: lei lu <llfamsec@gmail.com>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Is there any real world bug report on this? Or just some code reading
exposed this problem?

Thanks,
Qu
> ---
>   fs/btrfs/tree-checker.c | 30 +++++++++++++++---------------
>   fs/btrfs/tree-checker.h |  1 +
>   2 files changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index a127abbc09c3..5a7e869da230 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c
> @@ -1797,6 +1797,11 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
>   		return BTRFS_TREE_BLOCK_INVALID_LEVEL;
>   	}
>
> +	if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) {
> +		generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set");
> +		return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
> +	}
> +
>   	/*
>   	 * Extent buffers from a relocation tree have a owner field that
>   	 * corresponds to the subvolume tree they are based on. So just from an
> @@ -1858,6 +1863,7 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
>   	for (slot = 0; slot < nritems; slot++) {
>   		u32 item_end_expected;
>   		u64 item_data_end;
> +		enum btrfs_tree_block_status ret;
>
>   		btrfs_item_key_to_cpu(leaf, &key, slot);
>
> @@ -1913,21 +1919,10 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
>   			return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
>   		}
>
> -		/*
> -		 * We only want to do this if WRITTEN is set, otherwise the leaf
> -		 * may be in some intermediate state and won't appear valid.
> -		 */
> -		if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
> -			enum btrfs_tree_block_status ret;
> -
> -			/*
> -			 * Check if the item size and content meet other
> -			 * criteria
> -			 */
> -			ret = check_leaf_item(leaf, &key, slot, &prev_key);
> -			if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
> -				return ret;
> -		}
> +		/* Check if the item size and content meet other criteria */
> +		ret = check_leaf_item(leaf, &key, slot, &prev_key);
> +		if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
> +			return ret;
>
>   		prev_key.objectid = key.objectid;
>   		prev_key.type = key.type;
> @@ -1957,6 +1952,11 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
>   	int level = btrfs_header_level(node);
>   	u64 bytenr;
>
> +	if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) {
> +		generic_err(node, 0, "invalid flag for node, WRITTEN not set");
> +		return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
> +	}
> +
>   	if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
>   		generic_err(node, 0,
>   			"invalid level for node, have %d expect [1, %d]",
> diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
> index 5c809b50b2d0..01669cfa6578 100644
> --- a/fs/btrfs/tree-checker.h
> +++ b/fs/btrfs/tree-checker.h
> @@ -53,6 +53,7 @@ enum btrfs_tree_block_status {
>   	BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
>   	BTRFS_TREE_BLOCK_INVALID_ITEM,
>   	BTRFS_TREE_BLOCK_INVALID_OWNER,
> +	BTRFS_TREE_BLOCK_WRITTEN_NOT_SET,
>   };
>
>   /*
David Sterba May 2, 2024, 12:27 p.m. UTC | #3
On Thu, May 02, 2024 at 07:15:36AM +0930, Qu Wenruo wrote:
> 
> 
> 在 2024/4/29 22:33, Josef Bacik 写道:
> > We previously would call btrfs_check_leaf() if we had the check
> > integrity code enabled, which meant that we could only run the extended
> > leaf checks if we had WRITTEN set on the header flags.
> >
> > This leaves a gap in our checking, because we could end up with
> > corruption on disk where WRITTEN isn't set on the leaf, and then the
> > extended leaf checks don't get run which we rely on to validate all of
> > the item pointers to make sure we don't access memory outside of the
> > extent buffer.
> >
> > However, since 732fab95abe2 ("btrfs: check-integrity: remove
> > CONFIG_BTRFS_FS_CHECK_INTEGRITY option") we no longer call
> > btrfs_check_leaf() from btrfs_mark_buffer_dirty(), which means we only
> > ever call it on blocks that are being written out, and thus have WRITTEN
> > set, or that are being read in, which should have WRITTEN set.
> >
> > Add checks to make sure we have WRITTEN set appropriately, and then make
> > sure __btrfs_check_leaf() always does the item checking.  This will
> > protect us from file systems that have been corrupted and no longer have
> > WRITTEN set on some of the blocks.
> >
> > Reported-by: lei lu <llfamsec@gmail.com>
> > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> 
> Reviewed-by: Qu Wenruo <wqu@suse.com>
> 
> Is there any real world bug report on this? Or just some code reading
> exposed this problem?

There is a report.
Qu Wenruo May 2, 2024, 9:32 p.m. UTC | #4
在 2024/5/2 21:57, David Sterba 写道:
> On Thu, May 02, 2024 at 07:15:36AM +0930, Qu Wenruo wrote:
>>
>>
>> 在 2024/4/29 22:33, Josef Bacik 写道:
>>> We previously would call btrfs_check_leaf() if we had the check
>>> integrity code enabled, which meant that we could only run the extended
>>> leaf checks if we had WRITTEN set on the header flags.
>>>
>>> This leaves a gap in our checking, because we could end up with
>>> corruption on disk where WRITTEN isn't set on the leaf, and then the
>>> extended leaf checks don't get run which we rely on to validate all of
>>> the item pointers to make sure we don't access memory outside of the
>>> extent buffer.
>>>
>>> However, since 732fab95abe2 ("btrfs: check-integrity: remove
>>> CONFIG_BTRFS_FS_CHECK_INTEGRITY option") we no longer call
>>> btrfs_check_leaf() from btrfs_mark_buffer_dirty(), which means we only
>>> ever call it on blocks that are being written out, and thus have WRITTEN
>>> set, or that are being read in, which should have WRITTEN set.
>>>
>>> Add checks to make sure we have WRITTEN set appropriately, and then make
>>> sure __btrfs_check_leaf() always does the item checking.  This will
>>> protect us from file systems that have been corrupted and no longer have
>>> WRITTEN set on some of the blocks.
>>>
>>> Reported-by: lei lu <llfamsec@gmail.com>
>>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
>>
>> Reviewed-by: Qu Wenruo <wqu@suse.com>
>>
>> Is there any real world bug report on this? Or just some code reading
>> exposed this problem?
>
> There is a report.
>
Where?

I searched btrfs ML using this name, but no hit at all.

Thanks,
Qu
diff mbox series

Patch

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a127abbc09c3..5a7e869da230 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1797,6 +1797,11 @@  enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
 		return BTRFS_TREE_BLOCK_INVALID_LEVEL;
 	}
 
+	if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) {
+		generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set");
+		return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
+	}
+
 	/*
 	 * Extent buffers from a relocation tree have a owner field that
 	 * corresponds to the subvolume tree they are based on. So just from an
@@ -1858,6 +1863,7 @@  enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
 	for (slot = 0; slot < nritems; slot++) {
 		u32 item_end_expected;
 		u64 item_data_end;
+		enum btrfs_tree_block_status ret;
 
 		btrfs_item_key_to_cpu(leaf, &key, slot);
 
@@ -1913,21 +1919,10 @@  enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
 			return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
 		}
 
-		/*
-		 * We only want to do this if WRITTEN is set, otherwise the leaf
-		 * may be in some intermediate state and won't appear valid.
-		 */
-		if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
-			enum btrfs_tree_block_status ret;
-
-			/*
-			 * Check if the item size and content meet other
-			 * criteria
-			 */
-			ret = check_leaf_item(leaf, &key, slot, &prev_key);
-			if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
-				return ret;
-		}
+		/* Check if the item size and content meet other criteria */
+		ret = check_leaf_item(leaf, &key, slot, &prev_key);
+		if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+			return ret;
 
 		prev_key.objectid = key.objectid;
 		prev_key.type = key.type;
@@ -1957,6 +1952,11 @@  enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
 	int level = btrfs_header_level(node);
 	u64 bytenr;
 
+	if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) {
+		generic_err(node, 0, "invalid flag for node, WRITTEN not set");
+		return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET;
+	}
+
 	if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
 		generic_err(node, 0,
 			"invalid level for node, have %d expect [1, %d]",
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 5c809b50b2d0..01669cfa6578 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -53,6 +53,7 @@  enum btrfs_tree_block_status {
 	BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
 	BTRFS_TREE_BLOCK_INVALID_ITEM,
 	BTRFS_TREE_BLOCK_INVALID_OWNER,
+	BTRFS_TREE_BLOCK_WRITTEN_NOT_SET,
 };
 
 /*