Message ID | 20190218165826.23549-1-fdmanana@kernel.org (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | [1/2] Btrfs: add missing error handling after doing leaf/node binary search | expand |
On 2019/2/19 上午12:58, fdmanana@kernel.org wrote: > From: Filipe Manana <fdmanana@suse.com> > > When there is a kind of corruption in an extent buffer such that its first > key does not match the key at the respective parent slot, one of two things > happens: Isn't that handled by read_tree_block() already? Thanks, Qu > > 1) When assertions are enabled, we effectively hit a BUG_ON() which > requires rebooting the machine later. This also does not tell any > information about which extent buffer is affected, from which root, > the expected and found keys, etc. > > 2) When assertions are disabled, we just ignore the mismatch and assume > everything is ok, which can potentially lead to all sorts of unexpected > problems later after a tree search (in the worst case, could lead to > further silent corruption). > > So improve this by always checking if the first key of an extent buffer is > what it's supposed to be, when doing a key search at key_search(), and > report and return an appropriate error. The overhead is just comparing one > key, which is minimal and is anyway just done in a special case where we > skip the more expensive binary search (the binary search in the parent > node returned 0, exact key match). > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > --- > fs/btrfs/ctree.c | 38 +++++++++++++++++--------------------- > 1 file changed, 17 insertions(+), 21 deletions(-) > > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > index 5b9f602fb9e2..a0bd0278208d 100644 > --- a/fs/btrfs/ctree.c > +++ b/fs/btrfs/ctree.c > @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, > return ret; > } > > -static void key_search_validate(struct extent_buffer *b, > - const struct btrfs_key *key, > - int level) > -{ > -#ifdef CONFIG_BTRFS_ASSERT > - struct btrfs_disk_key disk_key; > - > - btrfs_cpu_key_to_disk(&disk_key, key); > - > - if (level == 0) > - ASSERT(!memcmp_extent_buffer(b, &disk_key, > - offsetof(struct btrfs_leaf, items[0].key), > - sizeof(disk_key))); > - else > - ASSERT(!memcmp_extent_buffer(b, &disk_key, > - offsetof(struct btrfs_node, ptrs[0].key), > - sizeof(disk_key))); > -#endif > -} > - > static int key_search(struct extent_buffer *b, const struct btrfs_key *key, > int level, int *prev_cmp, int *slot) > { > + struct btrfs_key found_key; > + > if (*prev_cmp != 0) { > *prev_cmp = btrfs_bin_search(b, key, level, slot); > return *prev_cmp; > } > > - key_search_validate(b, key, level); > + if (level == 0) > + btrfs_item_key_to_cpu(b, &found_key, 0); > + else > + btrfs_node_key_to_cpu(b, &found_key, 0); > + > + if (btrfs_comp_cpu_keys(&found_key, key) != 0) { > + btrfs_crit(b->fs_info, > +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)", > + btrfs_header_bytenr(b), level, btrfs_header_owner(b), > + key->objectid, key->type, key->offset, > + found_key.objectid, found_key.type, > + found_key.offset); > + return -EUCLEAN; > + } > + > *slot = 0; > > return 0; >
On Tue, Feb 19, 2019 at 12:54 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > On 2019/2/19 上午12:58, fdmanana@kernel.org wrote: > > From: Filipe Manana <fdmanana@suse.com> > > > > When there is a kind of corruption in an extent buffer such that its first > > key does not match the key at the respective parent slot, one of two things > > happens: > > Isn't that handled by read_tree_block() already? It is, but only at the time we read a node/leaf from disk. By doing the check here we can actually catch other types of bugs and memory corruption. To be honest I missed that since this is motivated by a report on older kernel (SLE12 SP3). So I still find it useful to have due to the reason pointed above, however I'm not against simply removing the check from key_search(). > Thanks, > Qu > > > > > 1) When assertions are enabled, we effectively hit a BUG_ON() which > > requires rebooting the machine later. This also does not tell any > > information about which extent buffer is affected, from which root, > > the expected and found keys, etc. > > > > 2) When assertions are disabled, we just ignore the mismatch and assume > > everything is ok, which can potentially lead to all sorts of unexpected > > problems later after a tree search (in the worst case, could lead to > > further silent corruption). > > > > So improve this by always checking if the first key of an extent buffer is > > what it's supposed to be, when doing a key search at key_search(), and > > report and return an appropriate error. The overhead is just comparing one > > key, which is minimal and is anyway just done in a special case where we > > skip the more expensive binary search (the binary search in the parent > > node returned 0, exact key match). > > > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > --- > > fs/btrfs/ctree.c | 38 +++++++++++++++++--------------------- > > 1 file changed, 17 insertions(+), 21 deletions(-) > > > > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > > index 5b9f602fb9e2..a0bd0278208d 100644 > > --- a/fs/btrfs/ctree.c > > +++ b/fs/btrfs/ctree.c > > @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, > > return ret; > > } > > > > -static void key_search_validate(struct extent_buffer *b, > > - const struct btrfs_key *key, > > - int level) > > -{ > > -#ifdef CONFIG_BTRFS_ASSERT > > - struct btrfs_disk_key disk_key; > > - > > - btrfs_cpu_key_to_disk(&disk_key, key); > > - > > - if (level == 0) > > - ASSERT(!memcmp_extent_buffer(b, &disk_key, > > - offsetof(struct btrfs_leaf, items[0].key), > > - sizeof(disk_key))); > > - else > > - ASSERT(!memcmp_extent_buffer(b, &disk_key, > > - offsetof(struct btrfs_node, ptrs[0].key), > > - sizeof(disk_key))); > > -#endif > > -} > > - > > static int key_search(struct extent_buffer *b, const struct btrfs_key *key, > > int level, int *prev_cmp, int *slot) > > { > > + struct btrfs_key found_key; > > + > > if (*prev_cmp != 0) { > > *prev_cmp = btrfs_bin_search(b, key, level, slot); > > return *prev_cmp; > > } > > > > - key_search_validate(b, key, level); > > + if (level == 0) > > + btrfs_item_key_to_cpu(b, &found_key, 0); > > + else > > + btrfs_node_key_to_cpu(b, &found_key, 0); > > + > > + if (btrfs_comp_cpu_keys(&found_key, key) != 0) { > > + btrfs_crit(b->fs_info, > > +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)", > > + btrfs_header_bytenr(b), level, btrfs_header_owner(b), > > + key->objectid, key->type, key->offset, > > + found_key.objectid, found_key.type, > > + found_key.offset); > > + return -EUCLEAN; > > + } > > + > > *slot = 0; > > > > return 0; > > >
On 2019/2/19 下午7:59, Filipe Manana wrote: > On Tue, Feb 19, 2019 at 12:54 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: >> >> >> >> On 2019/2/19 上午12:58, fdmanana@kernel.org wrote: >>> From: Filipe Manana <fdmanana@suse.com> >>> >>> When there is a kind of corruption in an extent buffer such that its first >>> key does not match the key at the respective parent slot, one of two things >>> happens: >> >> Isn't that handled by read_tree_block() already? > > It is, but only at the time we read a node/leaf from disk. > By doing the check here we can actually catch other types of bugs and > memory corruption. Although when memory corruption happens it's more concerning than mismatch keys. > > To be honest I missed that since this is motivated by a report on > older kernel (SLE12 SP3). > So I still find it useful to have due to the reason pointed above, > however I'm not against simply removing the check from key_search(). Removing the check looks good to me. Especially since we're going to have mandatory write time tree checker, it should be mostly fine. Thanks, Qu > >> Thanks, >> Qu >> >>> >>> 1) When assertions are enabled, we effectively hit a BUG_ON() which >>> requires rebooting the machine later. This also does not tell any >>> information about which extent buffer is affected, from which root, >>> the expected and found keys, etc. >>> >>> 2) When assertions are disabled, we just ignore the mismatch and assume >>> everything is ok, which can potentially lead to all sorts of unexpected >>> problems later after a tree search (in the worst case, could lead to >>> further silent corruption). >>> >>> So improve this by always checking if the first key of an extent buffer is >>> what it's supposed to be, when doing a key search at key_search(), and >>> report and return an appropriate error. The overhead is just comparing one >>> key, which is minimal and is anyway just done in a special case where we >>> skip the more expensive binary search (the binary search in the parent >>> node returned 0, exact key match). >>> >>> Signed-off-by: Filipe Manana <fdmanana@suse.com> >>> --- >>> fs/btrfs/ctree.c | 38 +++++++++++++++++--------------------- >>> 1 file changed, 17 insertions(+), 21 deletions(-) >>> >>> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c >>> index 5b9f602fb9e2..a0bd0278208d 100644 >>> --- a/fs/btrfs/ctree.c >>> +++ b/fs/btrfs/ctree.c >>> @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, >>> return ret; >>> } >>> >>> -static void key_search_validate(struct extent_buffer *b, >>> - const struct btrfs_key *key, >>> - int level) >>> -{ >>> -#ifdef CONFIG_BTRFS_ASSERT >>> - struct btrfs_disk_key disk_key; >>> - >>> - btrfs_cpu_key_to_disk(&disk_key, key); >>> - >>> - if (level == 0) >>> - ASSERT(!memcmp_extent_buffer(b, &disk_key, >>> - offsetof(struct btrfs_leaf, items[0].key), >>> - sizeof(disk_key))); >>> - else >>> - ASSERT(!memcmp_extent_buffer(b, &disk_key, >>> - offsetof(struct btrfs_node, ptrs[0].key), >>> - sizeof(disk_key))); >>> -#endif >>> -} >>> - >>> static int key_search(struct extent_buffer *b, const struct btrfs_key *key, >>> int level, int *prev_cmp, int *slot) >>> { >>> + struct btrfs_key found_key; >>> + >>> if (*prev_cmp != 0) { >>> *prev_cmp = btrfs_bin_search(b, key, level, slot); >>> return *prev_cmp; >>> } >>> >>> - key_search_validate(b, key, level); >>> + if (level == 0) >>> + btrfs_item_key_to_cpu(b, &found_key, 0); >>> + else >>> + btrfs_node_key_to_cpu(b, &found_key, 0); >>> + >>> + if (btrfs_comp_cpu_keys(&found_key, key) != 0) { >>> + btrfs_crit(b->fs_info, >>> +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)", >>> + btrfs_header_bytenr(b), level, btrfs_header_owner(b), >>> + key->objectid, key->type, key->offset, >>> + found_key.objectid, found_key.type, >>> + found_key.offset); >>> + return -EUCLEAN; >>> + } >>> + >>> *slot = 0; >>> >>> return 0; >>> >>
On Tue, Feb 19, 2019 at 12:26 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > On 2019/2/19 下午7:59, Filipe Manana wrote: > > On Tue, Feb 19, 2019 at 12:54 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > >> > >> > >> > >> On 2019/2/19 上午12:58, fdmanana@kernel.org wrote: > >>> From: Filipe Manana <fdmanana@suse.com> > >>> > >>> When there is a kind of corruption in an extent buffer such that its first > >>> key does not match the key at the respective parent slot, one of two things > >>> happens: > >> > >> Isn't that handled by read_tree_block() already? > > > > It is, but only at the time we read a node/leaf from disk. > > By doing the check here we can actually catch other types of bugs and > > memory corruption. > > Although when memory corruption happens it's more concerning than > mismatch keys. > > > > > To be honest I missed that since this is motivated by a report on > > older kernel (SLE12 SP3). > > So I still find it useful to have due to the reason pointed above, > > however I'm not against simply removing the check from key_search(). > > Removing the check looks good to me. > Especially since we're going to have mandatory write time tree checker, > it should be mostly fine. Looks reasonable. Sent as https://patchwork.kernel.org/patch/10821851/ and replaces the patch from this thread. > > Thanks, > Qu > > > > >> Thanks, > >> Qu > >> > >>> > >>> 1) When assertions are enabled, we effectively hit a BUG_ON() which > >>> requires rebooting the machine later. This also does not tell any > >>> information about which extent buffer is affected, from which root, > >>> the expected and found keys, etc. > >>> > >>> 2) When assertions are disabled, we just ignore the mismatch and assume > >>> everything is ok, which can potentially lead to all sorts of unexpected > >>> problems later after a tree search (in the worst case, could lead to > >>> further silent corruption). > >>> > >>> So improve this by always checking if the first key of an extent buffer is > >>> what it's supposed to be, when doing a key search at key_search(), and > >>> report and return an appropriate error. The overhead is just comparing one > >>> key, which is minimal and is anyway just done in a special case where we > >>> skip the more expensive binary search (the binary search in the parent > >>> node returned 0, exact key match). > >>> > >>> Signed-off-by: Filipe Manana <fdmanana@suse.com> > >>> --- > >>> fs/btrfs/ctree.c | 38 +++++++++++++++++--------------------- > >>> 1 file changed, 17 insertions(+), 21 deletions(-) > >>> > >>> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > >>> index 5b9f602fb9e2..a0bd0278208d 100644 > >>> --- a/fs/btrfs/ctree.c > >>> +++ b/fs/btrfs/ctree.c > >>> @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, > >>> return ret; > >>> } > >>> > >>> -static void key_search_validate(struct extent_buffer *b, > >>> - const struct btrfs_key *key, > >>> - int level) > >>> -{ > >>> -#ifdef CONFIG_BTRFS_ASSERT > >>> - struct btrfs_disk_key disk_key; > >>> - > >>> - btrfs_cpu_key_to_disk(&disk_key, key); > >>> - > >>> - if (level == 0) > >>> - ASSERT(!memcmp_extent_buffer(b, &disk_key, > >>> - offsetof(struct btrfs_leaf, items[0].key), > >>> - sizeof(disk_key))); > >>> - else > >>> - ASSERT(!memcmp_extent_buffer(b, &disk_key, > >>> - offsetof(struct btrfs_node, ptrs[0].key), > >>> - sizeof(disk_key))); > >>> -#endif > >>> -} > >>> - > >>> static int key_search(struct extent_buffer *b, const struct btrfs_key *key, > >>> int level, int *prev_cmp, int *slot) > >>> { > >>> + struct btrfs_key found_key; > >>> + > >>> if (*prev_cmp != 0) { > >>> *prev_cmp = btrfs_bin_search(b, key, level, slot); > >>> return *prev_cmp; > >>> } > >>> > >>> - key_search_validate(b, key, level); > >>> + if (level == 0) > >>> + btrfs_item_key_to_cpu(b, &found_key, 0); > >>> + else > >>> + btrfs_node_key_to_cpu(b, &found_key, 0); > >>> + > >>> + if (btrfs_comp_cpu_keys(&found_key, key) != 0) { > >>> + btrfs_crit(b->fs_info, > >>> +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)", > >>> + btrfs_header_bytenr(b), level, btrfs_header_owner(b), > >>> + key->objectid, key->type, key->offset, > >>> + found_key.objectid, found_key.type, > >>> + found_key.offset); > >>> + return -EUCLEAN; > >>> + } > >>> + > >>> *slot = 0; > >>> > >>> return 0; > >>> > >> >
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5b9f602fb9e2..a0bd0278208d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2529,35 +2529,31 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, return ret; } -static void key_search_validate(struct extent_buffer *b, - const struct btrfs_key *key, - int level) -{ -#ifdef CONFIG_BTRFS_ASSERT - struct btrfs_disk_key disk_key; - - btrfs_cpu_key_to_disk(&disk_key, key); - - if (level == 0) - ASSERT(!memcmp_extent_buffer(b, &disk_key, - offsetof(struct btrfs_leaf, items[0].key), - sizeof(disk_key))); - else - ASSERT(!memcmp_extent_buffer(b, &disk_key, - offsetof(struct btrfs_node, ptrs[0].key), - sizeof(disk_key))); -#endif -} - static int key_search(struct extent_buffer *b, const struct btrfs_key *key, int level, int *prev_cmp, int *slot) { + struct btrfs_key found_key; + if (*prev_cmp != 0) { *prev_cmp = btrfs_bin_search(b, key, level, slot); return *prev_cmp; } - key_search_validate(b, key, level); + if (level == 0) + btrfs_item_key_to_cpu(b, &found_key, 0); + else + btrfs_node_key_to_cpu(b, &found_key, 0); + + if (btrfs_comp_cpu_keys(&found_key, key) != 0) { + btrfs_crit(b->fs_info, +"unexpected first key for extent buffer: bytenr=%llu level=%d root=%llu expected key=(%llu %u %llu) found key=(%llu %u %llu)", + btrfs_header_bytenr(b), level, btrfs_header_owner(b), + key->objectid, key->type, key->offset, + found_key.objectid, found_key.type, + found_key.offset); + return -EUCLEAN; + } + *slot = 0; return 0;