Message ID | 20180801023721.32143-5-wqu@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: Enhanced validation check for fuzzed images | expand |
On 08/01/2018 10:37 AM, Qu Wenruo wrote: > This patch will introduce chunk <-> dev extent mapping check, to protect > us against invalid dev extents or chunks. > > Since chunk mapping is the fundamental infrastructure of btrfs, extra > check at mount time could prevent a lot of unexpected behavior (BUG_ON). > > Reported-by: Xu Wen <wen.xu@gatech.edu> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403 > Link: https://bugzilla.kernel.org/show_bug.cgi?id=200407 > Signed-off-by: Qu Wenruo <wqu@suse.com> LGTM. Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com> > --- > fs/btrfs/disk-io.c | 7 ++ > fs/btrfs/volumes.c | 183 +++++++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/volumes.h | 2 + > 3 files changed, 192 insertions(+) > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 205092dc9390..068ca7498e94 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -3075,6 +3075,13 @@ int open_ctree(struct super_block *sb, > fs_info->generation = generation; > fs_info->last_trans_committed = generation; > > + ret = btrfs_verify_dev_extents(fs_info); > + if (ret) { > + btrfs_err(fs_info, > + "failed to verify dev extents against chunks: %d", > + ret); > + goto fail_block_groups; > + } > ret = btrfs_recover_balance(fs_info); > if (ret) { > btrfs_err(fs_info, "failed to recover balance: %d", ret); > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index e6a8e4aabc66..467a589854fa 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -6440,6 +6440,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, > map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); > map->type = btrfs_chunk_type(leaf, chunk); > map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); > + map->verified_stripes = 0; > for (i = 0; i < num_stripes; i++) { > map->stripes[i].physical = > btrfs_stripe_offset_nr(leaf, chunk, i); > @@ -7295,3 +7296,185 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) > fs_devices = fs_devices->seed; > } > } > + > +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) > +{ > + int index = btrfs_bg_flags_to_raid_index(type); > + int ncopies = btrfs_raid_array[index].ncopies; > + int data_stripes; > + > + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { > + case BTRFS_BLOCK_GROUP_RAID5: > + data_stripes = num_stripes - 1; > + break; > + case BTRFS_BLOCK_GROUP_RAID6: > + data_stripes = num_stripes - 2; > + break; > + default: > + data_stripes = num_stripes / ncopies; > + break; > + } > + return div_u64(chunk_len, data_stripes); > +} > +static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, > + u64 chunk_offset, u64 devid, > + u64 physical_offset, u64 physical_len) > +{ > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; > + struct extent_map *em; > + struct map_lookup *map; > + u64 stripe_len; > + bool found = false; > + int ret = 0; > + int i; > + > + read_lock(&em_tree->lock); > + em = lookup_extent_mapping(em_tree, chunk_offset, 1); > + read_unlock(&em_tree->lock); > + > + if (!em) { > + ret = -EUCLEAN; > + btrfs_err(fs_info, > + "dev extent (%llu, %llu) doesn't have corresponding chunk", > + devid, physical_offset); > + goto out; > + } > + > + map = em->map_lookup; > + stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes); > + if (physical_len != stripe_len) { > + btrfs_err(fs_info, > +"dev extent (%llu, %llu) length doesn't match with chunk %llu, have %llu expect %llu", > + devid, physical_offset, em->start, physical_len, > + stripe_len); > + ret = -EUCLEAN; > + goto out; > + } > + > + for (i = 0; i < map->num_stripes; i++) { > + if (map->stripes[i].dev->devid == devid && > + map->stripes[i].physical == physical_offset) { > + found = true; > + if (map->verified_stripes >= map->num_stripes) { > + btrfs_err(fs_info, > + "too many dev extent for chunk %llu is detected", > + em->start); > + ret = -EUCLEAN; > + goto out; > + } > + map->verified_stripes++; > + break; > + } > + } > + if (!found) { > + ret = -EUCLEAN; > + btrfs_err(fs_info, > + "dev extent (%llu, %llu) has no corresponding chunk", > + devid, physical_offset); > + } > +out: > + free_extent_map(em); > + return ret; > +} > + > +static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) > +{ > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; > + struct extent_map *em; > + struct rb_node *node; > + int ret = 0; > + > + read_lock(&em_tree->lock); > + for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { > + em = rb_entry(node, struct extent_map, rb_node); > + if (em->map_lookup->num_stripes != > + em->map_lookup->verified_stripes) { > + btrfs_err(fs_info, > + "chunk %llu has missing dev extent, have %d expect %d", > + em->start, em->map_lookup->verified_stripes, > + em->map_lookup->num_stripes); > + ret = -EUCLEAN; > + goto out; > + } > + } > +out: > + read_unlock(&em_tree->lock); > + return ret; > +} > + > +/* > + * Ensure all dev extents are mapped to correct chunk. > + * Or later chunk allocation/free would cause unexpected behavior. > + * > + * NOTE: This will iterate through the whole device tree, which should be > + * at the same size level of chunk tree. > + * This would increase mount time by a tiny fraction. > + */ > +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_path *path; > + struct btrfs_root *root = fs_info->dev_root; > + struct btrfs_key key; > + int ret = 0; > + > + key.objectid = 1; > + key.type = BTRFS_DEV_EXTENT_KEY; > + key.offset = 0; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + > + path->reada = READA_FORWARD; > + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); > + if (ret < 0) > + goto out; > + > + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { > + ret = btrfs_next_item(root, path); > + if (ret < 0) > + goto out; > + /* No dev extents at all? Not good */ > + if (ret > 0) { > + ret = -EUCLEAN; > + goto out; > + } > + } > + while (1) { > + struct extent_buffer *leaf = path->nodes[0]; > + struct btrfs_dev_extent *dext; > + int slot = path->slots[0]; > + u64 chunk_offset; > + u64 physical_offset; > + u64 physical_len; > + u64 devid; > + > + btrfs_item_key_to_cpu(leaf, &key, slot); > + if (key.type != BTRFS_DEV_EXTENT_KEY) > + break; > + devid = key.objectid; > + physical_offset = key.offset; > + > + dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); > + chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); > + physical_len = btrfs_dev_extent_length(leaf, dext); > + > + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, > + physical_offset, physical_len); > + if (ret < 0) > + goto out; > + ret = btrfs_next_item(root, path); > + if (ret < 0) > + goto out; > + if (ret > 0) { > + ret = 0; > + break; > + } > + } > + > + /* Ensure all chunks have corresponding dev extents */ > + ret = verify_chunk_dev_extent_mapping(fs_info); > +out: > + btrfs_free_path(path); > + return ret; > +} > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 6d4f38ad9f5c..4301bf2d0534 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -345,6 +345,7 @@ struct map_lookup { > u64 stripe_len; > int num_stripes; > int sub_stripes; > + int verified_stripes; /* For mount time dev extent verification */ > struct btrfs_bio_stripe stripes[]; > }; > > @@ -559,5 +560,6 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); > void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); > bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, > struct btrfs_device *failing_dev); > +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); > > #endif > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Aug 1, 2018 at 3:39 AM Qu Wenruo <wqu@suse.com> wrote: > > This patch will introduce chunk <-> dev extent mapping check, to protect > us against invalid dev extents or chunks. > > Since chunk mapping is the fundamental infrastructure of btrfs, extra > check at mount time could prevent a lot of unexpected behavior (BUG_ON). > > Reported-by: Xu Wen <wen.xu@gatech.edu> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403 > Link: https://bugzilla.kernel.org/show_bug.cgi?id=200407 > Signed-off-by: Qu Wenruo <wqu@suse.com> Btw, this makes at least one test case from btrfs-progs fail: root 17:12:02 /home/fdmanana/git/hub/btrfs-progs/tests ((v4.19.1))> TEST=021\* ./misc-tests.sh [TEST/misc] 021-image-multi-devices failed: mount /dev/loop2 /home/fdmanana/git/hub/btrfs-progs/tests//mnt test failed for case 021-image-multi-devices dmesg/syslog has: [432229.206699] BTRFS error (device loop0): dev extent physical offset 22020096 devid 1 has no corresponding chunk [432229.207497] BTRFS error (device loop0): failed to find devid 1 [432229.208281] BTRFS error (device loop0): failed to verify dev extents against chunks: -117 [432229.246286] BTRFS error (device loop0): open_ctree failed Thanks. > --- > fs/btrfs/disk-io.c | 7 ++ > fs/btrfs/volumes.c | 183 +++++++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/volumes.h | 2 + > 3 files changed, 192 insertions(+) > > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 205092dc9390..068ca7498e94 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -3075,6 +3075,13 @@ int open_ctree(struct super_block *sb, > fs_info->generation = generation; > fs_info->last_trans_committed = generation; > > + ret = btrfs_verify_dev_extents(fs_info); > + if (ret) { > + btrfs_err(fs_info, > + "failed to verify dev extents against chunks: %d", > + ret); > + goto fail_block_groups; > + } > ret = btrfs_recover_balance(fs_info); > if (ret) { > btrfs_err(fs_info, "failed to recover balance: %d", ret); > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index e6a8e4aabc66..467a589854fa 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -6440,6 +6440,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, > map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); > map->type = btrfs_chunk_type(leaf, chunk); > map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); > + map->verified_stripes = 0; > for (i = 0; i < num_stripes; i++) { > map->stripes[i].physical = > btrfs_stripe_offset_nr(leaf, chunk, i); > @@ -7295,3 +7296,185 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) > fs_devices = fs_devices->seed; > } > } > + > +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) > +{ > + int index = btrfs_bg_flags_to_raid_index(type); > + int ncopies = btrfs_raid_array[index].ncopies; > + int data_stripes; > + > + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { > + case BTRFS_BLOCK_GROUP_RAID5: > + data_stripes = num_stripes - 1; > + break; > + case BTRFS_BLOCK_GROUP_RAID6: > + data_stripes = num_stripes - 2; > + break; > + default: > + data_stripes = num_stripes / ncopies; > + break; > + } > + return div_u64(chunk_len, data_stripes); > +} > +static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, > + u64 chunk_offset, u64 devid, > + u64 physical_offset, u64 physical_len) > +{ > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; > + struct extent_map *em; > + struct map_lookup *map; > + u64 stripe_len; > + bool found = false; > + int ret = 0; > + int i; > + > + read_lock(&em_tree->lock); > + em = lookup_extent_mapping(em_tree, chunk_offset, 1); > + read_unlock(&em_tree->lock); > + > + if (!em) { > + ret = -EUCLEAN; > + btrfs_err(fs_info, > + "dev extent (%llu, %llu) doesn't have corresponding chunk", > + devid, physical_offset); > + goto out; > + } > + > + map = em->map_lookup; > + stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes); > + if (physical_len != stripe_len) { > + btrfs_err(fs_info, > +"dev extent (%llu, %llu) length doesn't match with chunk %llu, have %llu expect %llu", > + devid, physical_offset, em->start, physical_len, > + stripe_len); > + ret = -EUCLEAN; > + goto out; > + } > + > + for (i = 0; i < map->num_stripes; i++) { > + if (map->stripes[i].dev->devid == devid && > + map->stripes[i].physical == physical_offset) { > + found = true; > + if (map->verified_stripes >= map->num_stripes) { > + btrfs_err(fs_info, > + "too many dev extent for chunk %llu is detected", > + em->start); > + ret = -EUCLEAN; > + goto out; > + } > + map->verified_stripes++; > + break; > + } > + } > + if (!found) { > + ret = -EUCLEAN; > + btrfs_err(fs_info, > + "dev extent (%llu, %llu) has no corresponding chunk", > + devid, physical_offset); > + } > +out: > + free_extent_map(em); > + return ret; > +} > + > +static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) > +{ > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; > + struct extent_map *em; > + struct rb_node *node; > + int ret = 0; > + > + read_lock(&em_tree->lock); > + for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { > + em = rb_entry(node, struct extent_map, rb_node); > + if (em->map_lookup->num_stripes != > + em->map_lookup->verified_stripes) { > + btrfs_err(fs_info, > + "chunk %llu has missing dev extent, have %d expect %d", > + em->start, em->map_lookup->verified_stripes, > + em->map_lookup->num_stripes); > + ret = -EUCLEAN; > + goto out; > + } > + } > +out: > + read_unlock(&em_tree->lock); > + return ret; > +} > + > +/* > + * Ensure all dev extents are mapped to correct chunk. > + * Or later chunk allocation/free would cause unexpected behavior. > + * > + * NOTE: This will iterate through the whole device tree, which should be > + * at the same size level of chunk tree. > + * This would increase mount time by a tiny fraction. > + */ > +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_path *path; > + struct btrfs_root *root = fs_info->dev_root; > + struct btrfs_key key; > + int ret = 0; > + > + key.objectid = 1; > + key.type = BTRFS_DEV_EXTENT_KEY; > + key.offset = 0; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + > + path->reada = READA_FORWARD; > + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); > + if (ret < 0) > + goto out; > + > + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { > + ret = btrfs_next_item(root, path); > + if (ret < 0) > + goto out; > + /* No dev extents at all? Not good */ > + if (ret > 0) { > + ret = -EUCLEAN; > + goto out; > + } > + } > + while (1) { > + struct extent_buffer *leaf = path->nodes[0]; > + struct btrfs_dev_extent *dext; > + int slot = path->slots[0]; > + u64 chunk_offset; > + u64 physical_offset; > + u64 physical_len; > + u64 devid; > + > + btrfs_item_key_to_cpu(leaf, &key, slot); > + if (key.type != BTRFS_DEV_EXTENT_KEY) > + break; > + devid = key.objectid; > + physical_offset = key.offset; > + > + dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); > + chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); > + physical_len = btrfs_dev_extent_length(leaf, dext); > + > + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, > + physical_offset, physical_len); > + if (ret < 0) > + goto out; > + ret = btrfs_next_item(root, path); > + if (ret < 0) > + goto out; > + if (ret > 0) { > + ret = 0; > + break; > + } > + } > + > + /* Ensure all chunks have corresponding dev extents */ > + ret = verify_chunk_dev_extent_mapping(fs_info); > +out: > + btrfs_free_path(path); > + return ret; > +} > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 6d4f38ad9f5c..4301bf2d0534 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -345,6 +345,7 @@ struct map_lookup { > u64 stripe_len; > int num_stripes; > int sub_stripes; > + int verified_stripes; /* For mount time dev extent verification */ > struct btrfs_bio_stripe stripes[]; > }; > > @@ -559,5 +560,6 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); > void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); > bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, > struct btrfs_device *failing_dev); > +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); > > #endif > -- > 2.18.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2019/1/14 下午7:09, Filipe Manana wrote: > On Wed, Aug 1, 2018 at 3:39 AM Qu Wenruo <wqu@suse.com> wrote: >> >> This patch will introduce chunk <-> dev extent mapping check, to protect >> us against invalid dev extents or chunks. >> >> Since chunk mapping is the fundamental infrastructure of btrfs, extra >> check at mount time could prevent a lot of unexpected behavior (BUG_ON). >> >> Reported-by: Xu Wen <wen.xu@gatech.edu> >> Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403 >> Link: https://bugzilla.kernel.org/show_bug.cgi?id=200407 >> Signed-off-by: Qu Wenruo <wqu@suse.com> > > Btw, this makes at least one test case from btrfs-progs fail: > > root 17:12:02 /home/fdmanana/git/hub/btrfs-progs/tests ((v4.19.1))> > TEST=021\* ./misc-tests.sh > [TEST/misc] 021-image-multi-devices > failed: mount /dev/loop2 /home/fdmanana/git/hub/btrfs-progs/tests//mnt > test failed for case 021-image-multi-devices That is fixed by the following commits already in devel: 9996feb94d btrfs-progs: misc-tests/021: Do extra btrfs check before mounting a1a98ee7a8 btrfs-progs: image: Remove all existing dev extents for later rebuild e6c1fa297a btrfs-progs: volumes: Refactor btrfs_alloc_dev_extent() into two functions 9a65b425bb btrfs-progs: image: Fix block group item flags when restoring multi-device image to single device ca73162b48 btrfs-progs: image: Refactor fixup_devices() to fixup_chunks_and_devices() And they are pretty early detected and merged, just after v4.19.1. Thanks, Qu > > dmesg/syslog has: > > [432229.206699] BTRFS error (device loop0): dev extent physical offset > 22020096 devid 1 has no corresponding chunk > [432229.207497] BTRFS error (device loop0): failed to find devid 1 > [432229.208281] BTRFS error (device loop0): failed to verify dev > extents against chunks: -117 > [432229.246286] BTRFS error (device loop0): open_ctree failed > > Thanks. > >> --- >> fs/btrfs/disk-io.c | 7 ++ >> fs/btrfs/volumes.c | 183 +++++++++++++++++++++++++++++++++++++++++++++ >> fs/btrfs/volumes.h | 2 + >> 3 files changed, 192 insertions(+) >> >> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c >> index 205092dc9390..068ca7498e94 100644 >> --- a/fs/btrfs/disk-io.c >> +++ b/fs/btrfs/disk-io.c >> @@ -3075,6 +3075,13 @@ int open_ctree(struct super_block *sb, >> fs_info->generation = generation; >> fs_info->last_trans_committed = generation; >> >> + ret = btrfs_verify_dev_extents(fs_info); >> + if (ret) { >> + btrfs_err(fs_info, >> + "failed to verify dev extents against chunks: %d", >> + ret); >> + goto fail_block_groups; >> + } >> ret = btrfs_recover_balance(fs_info); >> if (ret) { >> btrfs_err(fs_info, "failed to recover balance: %d", ret); >> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c >> index e6a8e4aabc66..467a589854fa 100644 >> --- a/fs/btrfs/volumes.c >> +++ b/fs/btrfs/volumes.c >> @@ -6440,6 +6440,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, >> map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); >> map->type = btrfs_chunk_type(leaf, chunk); >> map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); >> + map->verified_stripes = 0; >> for (i = 0; i < num_stripes; i++) { >> map->stripes[i].physical = >> btrfs_stripe_offset_nr(leaf, chunk, i); >> @@ -7295,3 +7296,185 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) >> fs_devices = fs_devices->seed; >> } >> } >> + >> +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) >> +{ >> + int index = btrfs_bg_flags_to_raid_index(type); >> + int ncopies = btrfs_raid_array[index].ncopies; >> + int data_stripes; >> + >> + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { >> + case BTRFS_BLOCK_GROUP_RAID5: >> + data_stripes = num_stripes - 1; >> + break; >> + case BTRFS_BLOCK_GROUP_RAID6: >> + data_stripes = num_stripes - 2; >> + break; >> + default: >> + data_stripes = num_stripes / ncopies; >> + break; >> + } >> + return div_u64(chunk_len, data_stripes); >> +} >> +static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, >> + u64 chunk_offset, u64 devid, >> + u64 physical_offset, u64 physical_len) >> +{ >> + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; >> + struct extent_map *em; >> + struct map_lookup *map; >> + u64 stripe_len; >> + bool found = false; >> + int ret = 0; >> + int i; >> + >> + read_lock(&em_tree->lock); >> + em = lookup_extent_mapping(em_tree, chunk_offset, 1); >> + read_unlock(&em_tree->lock); >> + >> + if (!em) { >> + ret = -EUCLEAN; >> + btrfs_err(fs_info, >> + "dev extent (%llu, %llu) doesn't have corresponding chunk", >> + devid, physical_offset); >> + goto out; >> + } >> + >> + map = em->map_lookup; >> + stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes); >> + if (physical_len != stripe_len) { >> + btrfs_err(fs_info, >> +"dev extent (%llu, %llu) length doesn't match with chunk %llu, have %llu expect %llu", >> + devid, physical_offset, em->start, physical_len, >> + stripe_len); >> + ret = -EUCLEAN; >> + goto out; >> + } >> + >> + for (i = 0; i < map->num_stripes; i++) { >> + if (map->stripes[i].dev->devid == devid && >> + map->stripes[i].physical == physical_offset) { >> + found = true; >> + if (map->verified_stripes >= map->num_stripes) { >> + btrfs_err(fs_info, >> + "too many dev extent for chunk %llu is detected", >> + em->start); >> + ret = -EUCLEAN; >> + goto out; >> + } >> + map->verified_stripes++; >> + break; >> + } >> + } >> + if (!found) { >> + ret = -EUCLEAN; >> + btrfs_err(fs_info, >> + "dev extent (%llu, %llu) has no corresponding chunk", >> + devid, physical_offset); >> + } >> +out: >> + free_extent_map(em); >> + return ret; >> +} >> + >> +static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) >> +{ >> + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; >> + struct extent_map *em; >> + struct rb_node *node; >> + int ret = 0; >> + >> + read_lock(&em_tree->lock); >> + for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { >> + em = rb_entry(node, struct extent_map, rb_node); >> + if (em->map_lookup->num_stripes != >> + em->map_lookup->verified_stripes) { >> + btrfs_err(fs_info, >> + "chunk %llu has missing dev extent, have %d expect %d", >> + em->start, em->map_lookup->verified_stripes, >> + em->map_lookup->num_stripes); >> + ret = -EUCLEAN; >> + goto out; >> + } >> + } >> +out: >> + read_unlock(&em_tree->lock); >> + return ret; >> +} >> + >> +/* >> + * Ensure all dev extents are mapped to correct chunk. >> + * Or later chunk allocation/free would cause unexpected behavior. >> + * >> + * NOTE: This will iterate through the whole device tree, which should be >> + * at the same size level of chunk tree. >> + * This would increase mount time by a tiny fraction. >> + */ >> +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) >> +{ >> + struct btrfs_path *path; >> + struct btrfs_root *root = fs_info->dev_root; >> + struct btrfs_key key; >> + int ret = 0; >> + >> + key.objectid = 1; >> + key.type = BTRFS_DEV_EXTENT_KEY; >> + key.offset = 0; >> + >> + path = btrfs_alloc_path(); >> + if (!path) >> + return -ENOMEM; >> + >> + path->reada = READA_FORWARD; >> + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); >> + if (ret < 0) >> + goto out; >> + >> + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { >> + ret = btrfs_next_item(root, path); >> + if (ret < 0) >> + goto out; >> + /* No dev extents at all? Not good */ >> + if (ret > 0) { >> + ret = -EUCLEAN; >> + goto out; >> + } >> + } >> + while (1) { >> + struct extent_buffer *leaf = path->nodes[0]; >> + struct btrfs_dev_extent *dext; >> + int slot = path->slots[0]; >> + u64 chunk_offset; >> + u64 physical_offset; >> + u64 physical_len; >> + u64 devid; >> + >> + btrfs_item_key_to_cpu(leaf, &key, slot); >> + if (key.type != BTRFS_DEV_EXTENT_KEY) >> + break; >> + devid = key.objectid; >> + physical_offset = key.offset; >> + >> + dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); >> + chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); >> + physical_len = btrfs_dev_extent_length(leaf, dext); >> + >> + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, >> + physical_offset, physical_len); >> + if (ret < 0) >> + goto out; >> + ret = btrfs_next_item(root, path); >> + if (ret < 0) >> + goto out; >> + if (ret > 0) { >> + ret = 0; >> + break; >> + } >> + } >> + >> + /* Ensure all chunks have corresponding dev extents */ >> + ret = verify_chunk_dev_extent_mapping(fs_info); >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h >> index 6d4f38ad9f5c..4301bf2d0534 100644 >> --- a/fs/btrfs/volumes.h >> +++ b/fs/btrfs/volumes.h >> @@ -345,6 +345,7 @@ struct map_lookup { >> u64 stripe_len; >> int num_stripes; >> int sub_stripes; >> + int verified_stripes; /* For mount time dev extent verification */ >> struct btrfs_bio_stripe stripes[]; >> }; >> >> @@ -559,5 +560,6 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); >> void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); >> bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, >> struct btrfs_device *failing_dev); >> +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); >> >> #endif >> -- >> 2.18.0 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > >
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 205092dc9390..068ca7498e94 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3075,6 +3075,13 @@ int open_ctree(struct super_block *sb, fs_info->generation = generation; fs_info->last_trans_committed = generation; + ret = btrfs_verify_dev_extents(fs_info); + if (ret) { + btrfs_err(fs_info, + "failed to verify dev extents against chunks: %d", + ret); + goto fail_block_groups; + } ret = btrfs_recover_balance(fs_info); if (ret) { btrfs_err(fs_info, "failed to recover balance: %d", ret); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e6a8e4aabc66..467a589854fa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6440,6 +6440,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); map->type = btrfs_chunk_type(leaf, chunk); map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + map->verified_stripes = 0; for (i = 0; i < num_stripes; i++) { map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); @@ -7295,3 +7296,185 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) fs_devices = fs_devices->seed; } } + +static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) +{ + int index = btrfs_bg_flags_to_raid_index(type); + int ncopies = btrfs_raid_array[index].ncopies; + int data_stripes; + + switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case BTRFS_BLOCK_GROUP_RAID5: + data_stripes = num_stripes - 1; + break; + case BTRFS_BLOCK_GROUP_RAID6: + data_stripes = num_stripes - 2; + break; + default: + data_stripes = num_stripes / ncopies; + break; + } + return div_u64(chunk_len, data_stripes); +} +static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, + u64 chunk_offset, u64 devid, + u64 physical_offset, u64 physical_len) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + u64 stripe_len; + bool found = false; + int ret = 0; + int i; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + + if (!em) { + ret = -EUCLEAN; + btrfs_err(fs_info, + "dev extent (%llu, %llu) doesn't have corresponding chunk", + devid, physical_offset); + goto out; + } + + map = em->map_lookup; + stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes); + if (physical_len != stripe_len) { + btrfs_err(fs_info, +"dev extent (%llu, %llu) length doesn't match with chunk %llu, have %llu expect %llu", + devid, physical_offset, em->start, physical_len, + stripe_len); + ret = -EUCLEAN; + goto out; + } + + for (i = 0; i < map->num_stripes; i++) { + if (map->stripes[i].dev->devid == devid && + map->stripes[i].physical == physical_offset) { + found = true; + if (map->verified_stripes >= map->num_stripes) { + btrfs_err(fs_info, + "too many dev extent for chunk %llu is detected", + em->start); + ret = -EUCLEAN; + goto out; + } + map->verified_stripes++; + break; + } + } + if (!found) { + ret = -EUCLEAN; + btrfs_err(fs_info, + "dev extent (%llu, %llu) has no corresponding chunk", + devid, physical_offset); + } +out: + free_extent_map(em); + return ret; +} + +static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct rb_node *node; + int ret = 0; + + read_lock(&em_tree->lock); + for (node = rb_first(&em_tree->map); node; node = rb_next(node)) { + em = rb_entry(node, struct extent_map, rb_node); + if (em->map_lookup->num_stripes != + em->map_lookup->verified_stripes) { + btrfs_err(fs_info, + "chunk %llu has missing dev extent, have %d expect %d", + em->start, em->map_lookup->verified_stripes, + em->map_lookup->num_stripes); + ret = -EUCLEAN; + goto out; + } + } +out: + read_unlock(&em_tree->lock); + return ret; +} + +/* + * Ensure all dev extents are mapped to correct chunk. + * Or later chunk allocation/free would cause unexpected behavior. + * + * NOTE: This will iterate through the whole device tree, which should be + * at the same size level of chunk tree. + * This would increase mount time by a tiny fraction. + */ +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) +{ + struct btrfs_path *path; + struct btrfs_root *root = fs_info->dev_root; + struct btrfs_key key; + int ret = 0; + + key.objectid = 1; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->reada = READA_FORWARD; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_item(root, path); + if (ret < 0) + goto out; + /* No dev extents at all? Not good */ + if (ret > 0) { + ret = -EUCLEAN; + goto out; + } + } + while (1) { + struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_dev_extent *dext; + int slot = path->slots[0]; + u64 chunk_offset; + u64 physical_offset; + u64 physical_len; + u64 devid; + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.type != BTRFS_DEV_EXTENT_KEY) + break; + devid = key.objectid; + physical_offset = key.offset; + + dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext); + physical_len = btrfs_dev_extent_length(leaf, dext); + + ret = verify_one_dev_extent(fs_info, chunk_offset, devid, + physical_offset, physical_len); + if (ret < 0) + goto out; + ret = btrfs_next_item(root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + } + + /* Ensure all chunks have corresponding dev extents */ + ret = verify_chunk_dev_extent_mapping(fs_info); +out: + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6d4f38ad9f5c..4301bf2d0534 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -345,6 +345,7 @@ struct map_lookup { u64 stripe_len; int num_stripes; int sub_stripes; + int verified_stripes; /* For mount time dev extent verification */ struct btrfs_bio_stripe stripes[]; }; @@ -559,5 +560,6 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); +int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); #endif
This patch will introduce chunk <-> dev extent mapping check, to protect us against invalid dev extents or chunks. Since chunk mapping is the fundamental infrastructure of btrfs, extra check at mount time could prevent a lot of unexpected behavior (BUG_ON). Reported-by: Xu Wen <wen.xu@gatech.edu> Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403 Link: https://bugzilla.kernel.org/show_bug.cgi?id=200407 Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/disk-io.c | 7 ++ fs/btrfs/volumes.c | 183 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 2 + 3 files changed, 192 insertions(+)