diff mbox series

[2/2] Btrfs: report and handle error on unexpected first key on extent buffer

Message ID 20190218165826.23549-1-fdmanana@kernel.org (mailing list archive)
State Superseded, archived
Headers show
Series [1/2] Btrfs: add missing error handling after doing leaf/node binary search | expand

Commit Message

Filipe Manana Feb. 18, 2019, 4:58 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

When there is a kind of corruption in an extent buffer such that its first
key does not match the key at the respective parent slot, one of two things
happens:

1) When assertions are enabled, we effectively hit a BUG_ON() which
   requires rebooting the machine later. This also does not tell any
   information about which extent buffer is affected, from which root,
   the expected and found keys, etc.

2) When assertions are disabled, we just ignore the mismatch and assume
   everything is ok, which can potentially lead to all sorts of unexpected
   problems later after a tree search (in the worst case, could lead to
   further silent corruption).

So improve this by always checking if the first key of an extent buffer is
what it's supposed to be, when doing a key search at key_search(), and
report and return an appropriate error. The overhead is just comparing one
key, which is minimal and is anyway just done in a special case where we
skip the more expensive binary search (the binary search in the parent
node returned 0, exact key match).

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/ctree.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

Comments

Qu Wenruo Feb. 19, 2019, 12:53 a.m. UTC | #1
On 2019/2/19 上午12:58, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> When there is a kind of corruption in an extent buffer such that its first
> key does not match the key at the respective parent slot, one of two things
> happens:

Isn't that handled by read_tree_block() already?

Thanks,
Qu

> 
> 1) When assertions are enabled, we effectively hit a BUG_ON() which
>    requires rebooting the machine later. This also does not tell any
>    information about which extent buffer is affected, from which root,
>    the expected and found keys, etc.
> 
> 2) When assertions are disabled, we just ignore the mismatch and assume
>    everything is ok, which can potentially lead to all sorts of unexpected
>    problems later after a tree search (in the worst case, could lead to
>    further silent corruption).
> 
> So improve this by always checking if the first key of an extent buffer is
> what it's supposed to be, when doing a key search at key_search(), and
> report and return an appropriate error. The overhead is just comparing one
> key, which is minimal and is anyway just done in a special case where we
> skip the more expensive binary search (the binary search in the parent
> node returned 0, exact key match).
> 
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
>  fs/btrfs/ctree.c | 38 +++++++++++++++++---------------------
>  1 file changed, 17 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index 5b9f602fb9e2..a0bd0278208d 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
>  	return ret;
>  }
>  
> -static void key_search_validate(struct extent_buffer *b,
> -				const struct btrfs_key *key,
> -				int level)
> -{
> -#ifdef CONFIG_BTRFS_ASSERT
> -	struct btrfs_disk_key disk_key;
> -
> -	btrfs_cpu_key_to_disk(&disk_key, key);
> -
> -	if (level == 0)
> -		ASSERT(!memcmp_extent_buffer(b, &disk_key,
> -		    offsetof(struct btrfs_leaf, items[0].key),
> -		    sizeof(disk_key)));
> -	else
> -		ASSERT(!memcmp_extent_buffer(b, &disk_key,
> -		    offsetof(struct btrfs_node, ptrs[0].key),
> -		    sizeof(disk_key)));
> -#endif
> -}
> -
>  static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
>  		      int level, int *prev_cmp, int *slot)
>  {
> +	struct btrfs_key found_key;
> +
>  	if (*prev_cmp != 0) {
>  		*prev_cmp = btrfs_bin_search(b, key, level, slot);
>  		return *prev_cmp;
>  	}
>  
> -	key_search_validate(b, key, level);
> +	if (level == 0)
> +		btrfs_item_key_to_cpu(b, &found_key, 0);
> +	else
> +		btrfs_node_key_to_cpu(b, &found_key, 0);
> +
> +	if (btrfs_comp_cpu_keys(&found_key, key) != 0) {
> +		btrfs_crit(b->fs_info,
> +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)",
> +			   btrfs_header_bytenr(b), level, btrfs_header_owner(b),
> +			   key->objectid, key->type, key->offset,
> +			   found_key.objectid, found_key.type,
> +			   found_key.offset);
> +		return -EUCLEAN;
> +	}
> +
>  	*slot = 0;
>  
>  	return 0;
>
Filipe Manana Feb. 19, 2019, 11:59 a.m. UTC | #2
On Tue, Feb 19, 2019 at 12:54 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> On 2019/2/19 上午12:58, fdmanana@kernel.org wrote:
> > From: Filipe Manana <fdmanana@suse.com>
> >
> > When there is a kind of corruption in an extent buffer such that its first
> > key does not match the key at the respective parent slot, one of two things
> > happens:
>
> Isn't that handled by read_tree_block() already?

It is, but only at the time we read a node/leaf from disk.
By doing the check here we can actually catch other types of bugs and
memory corruption.

To be honest I missed that since this is motivated by a report on
older kernel (SLE12 SP3).
So I still find it useful to have due to the reason pointed above,
however I'm not against simply removing the check from key_search().

> Thanks,
> Qu
>
> >
> > 1) When assertions are enabled, we effectively hit a BUG_ON() which
> >    requires rebooting the machine later. This also does not tell any
> >    information about which extent buffer is affected, from which root,
> >    the expected and found keys, etc.
> >
> > 2) When assertions are disabled, we just ignore the mismatch and assume
> >    everything is ok, which can potentially lead to all sorts of unexpected
> >    problems later after a tree search (in the worst case, could lead to
> >    further silent corruption).
> >
> > So improve this by always checking if the first key of an extent buffer is
> > what it's supposed to be, when doing a key search at key_search(), and
> > report and return an appropriate error. The overhead is just comparing one
> > key, which is minimal and is anyway just done in a special case where we
> > skip the more expensive binary search (the binary search in the parent
> > node returned 0, exact key match).
> >
> > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > ---
> >  fs/btrfs/ctree.c | 38 +++++++++++++++++---------------------
> >  1 file changed, 17 insertions(+), 21 deletions(-)
> >
> > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> > index 5b9f602fb9e2..a0bd0278208d 100644
> > --- a/fs/btrfs/ctree.c
> > +++ b/fs/btrfs/ctree.c
> > @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
> >       return ret;
> >  }
> >
> > -static void key_search_validate(struct extent_buffer *b,
> > -                             const struct btrfs_key *key,
> > -                             int level)
> > -{
> > -#ifdef CONFIG_BTRFS_ASSERT
> > -     struct btrfs_disk_key disk_key;
> > -
> > -     btrfs_cpu_key_to_disk(&disk_key, key);
> > -
> > -     if (level == 0)
> > -             ASSERT(!memcmp_extent_buffer(b, &disk_key,
> > -                 offsetof(struct btrfs_leaf, items[0].key),
> > -                 sizeof(disk_key)));
> > -     else
> > -             ASSERT(!memcmp_extent_buffer(b, &disk_key,
> > -                 offsetof(struct btrfs_node, ptrs[0].key),
> > -                 sizeof(disk_key)));
> > -#endif
> > -}
> > -
> >  static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
> >                     int level, int *prev_cmp, int *slot)
> >  {
> > +     struct btrfs_key found_key;
> > +
> >       if (*prev_cmp != 0) {
> >               *prev_cmp = btrfs_bin_search(b, key, level, slot);
> >               return *prev_cmp;
> >       }
> >
> > -     key_search_validate(b, key, level);
> > +     if (level == 0)
> > +             btrfs_item_key_to_cpu(b, &found_key, 0);
> > +     else
> > +             btrfs_node_key_to_cpu(b, &found_key, 0);
> > +
> > +     if (btrfs_comp_cpu_keys(&found_key, key) != 0) {
> > +             btrfs_crit(b->fs_info,
> > +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)",
> > +                        btrfs_header_bytenr(b), level, btrfs_header_owner(b),
> > +                        key->objectid, key->type, key->offset,
> > +                        found_key.objectid, found_key.type,
> > +                        found_key.offset);
> > +             return -EUCLEAN;
> > +     }
> > +
> >       *slot = 0;
> >
> >       return 0;
> >
>
Qu Wenruo Feb. 19, 2019, 12:24 p.m. UTC | #3
On 2019/2/19 下午7:59, Filipe Manana wrote:
> On Tue, Feb 19, 2019 at 12:54 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>>
>>
>>
>> On 2019/2/19 上午12:58, fdmanana@kernel.org wrote:
>>> From: Filipe Manana <fdmanana@suse.com>
>>>
>>> When there is a kind of corruption in an extent buffer such that its first
>>> key does not match the key at the respective parent slot, one of two things
>>> happens:
>>
>> Isn't that handled by read_tree_block() already?
> 
> It is, but only at the time we read a node/leaf from disk.
> By doing the check here we can actually catch other types of bugs and
> memory corruption.

Although when memory corruption happens it's more concerning than
mismatch keys.

> 
> To be honest I missed that since this is motivated by a report on
> older kernel (SLE12 SP3).
> So I still find it useful to have due to the reason pointed above,
> however I'm not against simply removing the check from key_search().

Removing the check looks good to me.
Especially since we're going to have mandatory write time tree checker,
it should be mostly fine.

Thanks,
Qu

> 
>> Thanks,
>> Qu
>>
>>>
>>> 1) When assertions are enabled, we effectively hit a BUG_ON() which
>>>    requires rebooting the machine later. This also does not tell any
>>>    information about which extent buffer is affected, from which root,
>>>    the expected and found keys, etc.
>>>
>>> 2) When assertions are disabled, we just ignore the mismatch and assume
>>>    everything is ok, which can potentially lead to all sorts of unexpected
>>>    problems later after a tree search (in the worst case, could lead to
>>>    further silent corruption).
>>>
>>> So improve this by always checking if the first key of an extent buffer is
>>> what it's supposed to be, when doing a key search at key_search(), and
>>> report and return an appropriate error. The overhead is just comparing one
>>> key, which is minimal and is anyway just done in a special case where we
>>> skip the more expensive binary search (the binary search in the parent
>>> node returned 0, exact key match).
>>>
>>> Signed-off-by: Filipe Manana <fdmanana@suse.com>
>>> ---
>>>  fs/btrfs/ctree.c | 38 +++++++++++++++++---------------------
>>>  1 file changed, 17 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
>>> index 5b9f602fb9e2..a0bd0278208d 100644
>>> --- a/fs/btrfs/ctree.c
>>> +++ b/fs/btrfs/ctree.c
>>> @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
>>>       return ret;
>>>  }
>>>
>>> -static void key_search_validate(struct extent_buffer *b,
>>> -                             const struct btrfs_key *key,
>>> -                             int level)
>>> -{
>>> -#ifdef CONFIG_BTRFS_ASSERT
>>> -     struct btrfs_disk_key disk_key;
>>> -
>>> -     btrfs_cpu_key_to_disk(&disk_key, key);
>>> -
>>> -     if (level == 0)
>>> -             ASSERT(!memcmp_extent_buffer(b, &disk_key,
>>> -                 offsetof(struct btrfs_leaf, items[0].key),
>>> -                 sizeof(disk_key)));
>>> -     else
>>> -             ASSERT(!memcmp_extent_buffer(b, &disk_key,
>>> -                 offsetof(struct btrfs_node, ptrs[0].key),
>>> -                 sizeof(disk_key)));
>>> -#endif
>>> -}
>>> -
>>>  static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
>>>                     int level, int *prev_cmp, int *slot)
>>>  {
>>> +     struct btrfs_key found_key;
>>> +
>>>       if (*prev_cmp != 0) {
>>>               *prev_cmp = btrfs_bin_search(b, key, level, slot);
>>>               return *prev_cmp;
>>>       }
>>>
>>> -     key_search_validate(b, key, level);
>>> +     if (level == 0)
>>> +             btrfs_item_key_to_cpu(b, &found_key, 0);
>>> +     else
>>> +             btrfs_node_key_to_cpu(b, &found_key, 0);
>>> +
>>> +     if (btrfs_comp_cpu_keys(&found_key, key) != 0) {
>>> +             btrfs_crit(b->fs_info,
>>> +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)",
>>> +                        btrfs_header_bytenr(b), level, btrfs_header_owner(b),
>>> +                        key->objectid, key->type, key->offset,
>>> +                        found_key.objectid, found_key.type,
>>> +                        found_key.offset);
>>> +             return -EUCLEAN;
>>> +     }
>>> +
>>>       *slot = 0;
>>>
>>>       return 0;
>>>
>>
Filipe Manana Feb. 20, 2019, 11:12 a.m. UTC | #4
On Tue, Feb 19, 2019 at 12:26 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> On 2019/2/19 下午7:59, Filipe Manana wrote:
> > On Tue, Feb 19, 2019 at 12:54 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
> >>
> >>
> >>
> >> On 2019/2/19 上午12:58, fdmanana@kernel.org wrote:
> >>> From: Filipe Manana <fdmanana@suse.com>
> >>>
> >>> When there is a kind of corruption in an extent buffer such that its first
> >>> key does not match the key at the respective parent slot, one of two things
> >>> happens:
> >>
> >> Isn't that handled by read_tree_block() already?
> >
> > It is, but only at the time we read a node/leaf from disk.
> > By doing the check here we can actually catch other types of bugs and
> > memory corruption.
>
> Although when memory corruption happens it's more concerning than
> mismatch keys.
>
> >
> > To be honest I missed that since this is motivated by a report on
> > older kernel (SLE12 SP3).
> > So I still find it useful to have due to the reason pointed above,
> > however I'm not against simply removing the check from key_search().
>
> Removing the check looks good to me.
> Especially since we're going to have mandatory write time tree checker,
> it should be mostly fine.

Looks reasonable.
Sent as https://patchwork.kernel.org/patch/10821851/ and replaces the
patch from this thread.

>
> Thanks,
> Qu
>
> >
> >> Thanks,
> >> Qu
> >>
> >>>
> >>> 1) When assertions are enabled, we effectively hit a BUG_ON() which
> >>>    requires rebooting the machine later. This also does not tell any
> >>>    information about which extent buffer is affected, from which root,
> >>>    the expected and found keys, etc.
> >>>
> >>> 2) When assertions are disabled, we just ignore the mismatch and assume
> >>>    everything is ok, which can potentially lead to all sorts of unexpected
> >>>    problems later after a tree search (in the worst case, could lead to
> >>>    further silent corruption).
> >>>
> >>> So improve this by always checking if the first key of an extent buffer is
> >>> what it's supposed to be, when doing a key search at key_search(), and
> >>> report and return an appropriate error. The overhead is just comparing one
> >>> key, which is minimal and is anyway just done in a special case where we
> >>> skip the more expensive binary search (the binary search in the parent
> >>> node returned 0, exact key match).
> >>>
> >>> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> >>> ---
> >>>  fs/btrfs/ctree.c | 38 +++++++++++++++++---------------------
> >>>  1 file changed, 17 insertions(+), 21 deletions(-)
> >>>
> >>> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> >>> index 5b9f602fb9e2..a0bd0278208d 100644
> >>> --- a/fs/btrfs/ctree.c
> >>> +++ b/fs/btrfs/ctree.c
> >>> @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
> >>>       return ret;
> >>>  }
> >>>
> >>> -static void key_search_validate(struct extent_buffer *b,
> >>> -                             const struct btrfs_key *key,
> >>> -                             int level)
> >>> -{
> >>> -#ifdef CONFIG_BTRFS_ASSERT
> >>> -     struct btrfs_disk_key disk_key;
> >>> -
> >>> -     btrfs_cpu_key_to_disk(&disk_key, key);
> >>> -
> >>> -     if (level == 0)
> >>> -             ASSERT(!memcmp_extent_buffer(b, &disk_key,
> >>> -                 offsetof(struct btrfs_leaf, items[0].key),
> >>> -                 sizeof(disk_key)));
> >>> -     else
> >>> -             ASSERT(!memcmp_extent_buffer(b, &disk_key,
> >>> -                 offsetof(struct btrfs_node, ptrs[0].key),
> >>> -                 sizeof(disk_key)));
> >>> -#endif
> >>> -}
> >>> -
> >>>  static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
> >>>                     int level, int *prev_cmp, int *slot)
> >>>  {
> >>> +     struct btrfs_key found_key;
> >>> +
> >>>       if (*prev_cmp != 0) {
> >>>               *prev_cmp = btrfs_bin_search(b, key, level, slot);
> >>>               return *prev_cmp;
> >>>       }
> >>>
> >>> -     key_search_validate(b, key, level);
> >>> +     if (level == 0)
> >>> +             btrfs_item_key_to_cpu(b, &found_key, 0);
> >>> +     else
> >>> +             btrfs_node_key_to_cpu(b, &found_key, 0);
> >>> +
> >>> +     if (btrfs_comp_cpu_keys(&found_key, key) != 0) {
> >>> +             btrfs_crit(b->fs_info,
> >>> +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)",
> >>> +                        btrfs_header_bytenr(b), level, btrfs_header_owner(b),
> >>> +                        key->objectid, key->type, key->offset,
> >>> +                        found_key.objectid, found_key.type,
> >>> +                        found_key.offset);
> >>> +             return -EUCLEAN;
> >>> +     }
> >>> +
> >>>       *slot = 0;
> >>>
> >>>       return 0;
> >>>
> >>
>
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5b9f602fb9e2..a0bd0278208d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2529,35 +2529,31 @@  setup_nodes_for_search(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-static void key_search_validate(struct extent_buffer *b,
-				const struct btrfs_key *key,
-				int level)
-{
-#ifdef CONFIG_BTRFS_ASSERT
-	struct btrfs_disk_key disk_key;
-
-	btrfs_cpu_key_to_disk(&disk_key, key);
-
-	if (level == 0)
-		ASSERT(!memcmp_extent_buffer(b, &disk_key,
-		    offsetof(struct btrfs_leaf, items[0].key),
-		    sizeof(disk_key)));
-	else
-		ASSERT(!memcmp_extent_buffer(b, &disk_key,
-		    offsetof(struct btrfs_node, ptrs[0].key),
-		    sizeof(disk_key)));
-#endif
-}
-
 static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
 		      int level, int *prev_cmp, int *slot)
 {
+	struct btrfs_key found_key;
+
 	if (*prev_cmp != 0) {
 		*prev_cmp = btrfs_bin_search(b, key, level, slot);
 		return *prev_cmp;
 	}
 
-	key_search_validate(b, key, level);
+	if (level == 0)
+		btrfs_item_key_to_cpu(b, &found_key, 0);
+	else
+		btrfs_node_key_to_cpu(b, &found_key, 0);
+
+	if (btrfs_comp_cpu_keys(&found_key, key) != 0) {
+		btrfs_crit(b->fs_info,
+"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)",
+			   btrfs_header_bytenr(b), level, btrfs_header_owner(b),
+			   key->objectid, key->type, key->offset,
+			   found_key.objectid, found_key.type,
+			   found_key.offset);
+		return -EUCLEAN;
+	}
+
 	*slot = 0;
 
 	return 0;