Message ID | 9b24d30f3703d7d714c4bb37ce817d1eaa92980b.1629322156.git.josef@toxicpanda.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs-progs: make check handle invalid bg items | expand |
On 2021/8/19 上午5:33, Josef Bacik wrote: > The lowmem mode validates the used field of the block group item, but > the normal mode does not. Fix this by keeping a running tally of what > we think the used value for the block group should be, and then if it > mismatches report an error and fix the problem if we have repair set. > We have to keep track of pending extents because we process leaves as we > see them, so it could be much later in the process that we find the > block group item to associate the extents with. > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > --- > check/common.h | 5 +++ > check/main.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 90 insertions(+), 4 deletions(-) > > diff --git a/check/common.h b/check/common.h > index e72379a0..ba4e291e 100644 > --- a/check/common.h > +++ b/check/common.h > @@ -37,10 +37,14 @@ struct block_group_record { > u64 offset; > > u64 flags; > + > + u64 disk_used; > + u64 actual_used; > }; > > struct block_group_tree { > struct cache_tree tree; > + struct extent_io_tree pending_extents; > struct list_head block_groups; > }; > > @@ -141,6 +145,7 @@ u64 calc_stripe_length(u64 type, u64 length, int num_stripes); > static inline void block_group_tree_init(struct block_group_tree *tree) > { > cache_tree_init(&tree->tree); > + extent_io_tree_init(&tree->pending_extents); > INIT_LIST_HEAD(&tree->block_groups); > } > > diff --git a/check/main.c b/check/main.c > index 3f6db8f8..af9e0ff3 100644 > --- a/check/main.c > +++ b/check/main.c > @@ -5083,9 +5083,27 @@ static void free_block_group_record(struct cache_extent *cache) > > void free_block_group_tree(struct block_group_tree *tree) > { > + extent_io_tree_cleanup(&tree->pending_extents); > cache_tree_free_extents(&tree->tree, free_block_group_record); > } > > +static void update_block_group_used(struct block_group_tree *tree, > + u64 bytenr, u64 num_bytes) > +{ > + struct cache_extent *bg_item; > + struct block_group_record *bg_rec; > + > + bg_item = lookup_cache_extent(&tree->tree, bytenr, num_bytes); > + if (!bg_item) { > + set_extent_dirty(&tree->pending_extents, bytenr, > + bytenr + num_bytes - 1); > + return; > + } I guess this is to handle cases where the extent item shows up before we reached the block group item. So we set the pending_extents range dirty, and waiting for the block group item to show up. But I'm not sure if we really need to handle them like this. Can't we just set the range dirty and call it a day, then check the tree to calculate the actual used space for each block group item after we iterated the whole extent tree? Thanks, Qu > + bg_rec = container_of(bg_item, struct block_group_record, > + cache); > + bg_rec->actual_used += num_bytes; > +} > + > int insert_device_extent_record(struct device_extent_tree *tree, > struct device_extent_record *de_rec) > { > @@ -5270,6 +5288,7 @@ btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, > > ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item); > rec->flags = btrfs_block_group_flags(leaf, ptr); > + rec->disk_used = btrfs_block_group_used(leaf, ptr); > > INIT_LIST_HEAD(&rec->list); > > @@ -5281,6 +5300,7 @@ static int process_block_group_item(struct block_group_tree *block_group_cache, > struct extent_buffer *eb, int slot) > { > struct block_group_record *rec; > + u64 start, end; > int ret = 0; > > rec = btrfs_new_block_group_record(eb, key, slot); > @@ -5289,6 +5309,22 @@ static int process_block_group_item(struct block_group_tree *block_group_cache, > fprintf(stderr, "Block Group[%llu, %llu] existed.\n", > rec->objectid, rec->offset); > free(rec); > + return ret; > + } > + > + while (!find_first_extent_bit(&block_group_cache->pending_extents, > + rec->objectid, &start, &end, > + EXTENT_DIRTY)) { > + u64 len; > + > + if (start >= rec->objectid + rec->offset) > + break; > + start = max(start, rec->objectid); > + len = min(end - start + 1, > + rec->objectid + rec->offset - start); > + rec->actual_used += len; > + clear_extent_dirty(&block_group_cache->pending_extents, start, > + start + len - 1); > } > > return ret; > @@ -5352,6 +5388,7 @@ process_device_extent_item(struct device_extent_tree *dev_extent_cache, > > static int process_extent_item(struct btrfs_root *root, > struct cache_tree *extent_cache, > + struct block_group_tree *block_group_cache, > struct extent_buffer *eb, int slot) > { > struct btrfs_extent_item *ei; > @@ -5380,6 +5417,8 @@ static int process_extent_item(struct btrfs_root *root, > num_bytes = key.offset; > } > > + update_block_group_used(block_group_cache, key.objectid, num_bytes); > + > if (!IS_ALIGNED(key.objectid, gfs_info->sectorsize)) { > error("ignoring invalid extent, bytenr %llu is not aligned to %u", > key.objectid, gfs_info->sectorsize); > @@ -6348,13 +6387,13 @@ static int run_next_block(struct btrfs_root *root, > continue; > } > if (key.type == BTRFS_EXTENT_ITEM_KEY) { > - process_extent_item(root, extent_cache, buf, > - i); > + process_extent_item(root, extent_cache, > + block_group_cache, buf, i); > continue; > } > if (key.type == BTRFS_METADATA_ITEM_KEY) { > - process_extent_item(root, extent_cache, buf, > - i); > + process_extent_item(root, extent_cache, > + block_group_cache, buf, i); > continue; > } > if (key.type == BTRFS_EXTENT_CSUM_KEY) { > @@ -8619,6 +8658,41 @@ static int deal_root_from_list(struct list_head *list, > return ret; > } > > +static int check_block_groups(struct block_group_tree *bg_cache) > +{ > + struct btrfs_trans_handle *trans; > + struct cache_extent *item; > + struct block_group_record *bg_rec; > + int ret = 0; > + > + for (item = first_cache_extent(&bg_cache->tree); item; > + item = next_cache_extent(item)) { > + bg_rec = container_of(item, struct block_group_record, > + cache); > + if (bg_rec->disk_used == bg_rec->actual_used) > + continue; > + fprintf(stderr, > + "block group[%llu %llu] used %llu but extent items used %llu\n", > + bg_rec->objectid, bg_rec->offset, bg_rec->disk_used, > + bg_rec->actual_used); > + ret = -1; > + } > + > + if (!repair || !ret) > + return ret; > + > + trans = btrfs_start_transaction(gfs_info->extent_root, 1); > + if (IS_ERR(trans)) { > + ret = PTR_ERR(trans); > + fprintf(stderr, "Failed to start a transaction\n"); > + return ret; > + } > + > + ret = btrfs_fix_block_accounting(trans); > + btrfs_commit_transaction(trans, gfs_info->extent_root); > + return ret ? ret : -EAGAIN; > +} > + > /** > * parse_tree_roots - Go over all roots in the tree root and add each one to > * a list. > @@ -8910,6 +8984,13 @@ again: > goto out; > } > > + ret = check_block_groups(&block_group_cache); > + if (ret) { > + if (ret == -EAGAIN) > + goto loop; > + goto out; > + } > + > ret = check_devices(&dev_cache, &dev_extent_cache); > if (ret && err) > ret = err; >
diff --git a/check/common.h b/check/common.h index e72379a0..ba4e291e 100644 --- a/check/common.h +++ b/check/common.h @@ -37,10 +37,14 @@ struct block_group_record { u64 offset; u64 flags; + + u64 disk_used; + u64 actual_used; }; struct block_group_tree { struct cache_tree tree; + struct extent_io_tree pending_extents; struct list_head block_groups; }; @@ -141,6 +145,7 @@ u64 calc_stripe_length(u64 type, u64 length, int num_stripes); static inline void block_group_tree_init(struct block_group_tree *tree) { cache_tree_init(&tree->tree); + extent_io_tree_init(&tree->pending_extents); INIT_LIST_HEAD(&tree->block_groups); } diff --git a/check/main.c b/check/main.c index 3f6db8f8..af9e0ff3 100644 --- a/check/main.c +++ b/check/main.c @@ -5083,9 +5083,27 @@ static void free_block_group_record(struct cache_extent *cache) void free_block_group_tree(struct block_group_tree *tree) { + extent_io_tree_cleanup(&tree->pending_extents); cache_tree_free_extents(&tree->tree, free_block_group_record); } +static void update_block_group_used(struct block_group_tree *tree, + u64 bytenr, u64 num_bytes) +{ + struct cache_extent *bg_item; + struct block_group_record *bg_rec; + + bg_item = lookup_cache_extent(&tree->tree, bytenr, num_bytes); + if (!bg_item) { + set_extent_dirty(&tree->pending_extents, bytenr, + bytenr + num_bytes - 1); + return; + } + bg_rec = container_of(bg_item, struct block_group_record, + cache); + bg_rec->actual_used += num_bytes; +} + int insert_device_extent_record(struct device_extent_tree *tree, struct device_extent_record *de_rec) { @@ -5270,6 +5288,7 @@ btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item); rec->flags = btrfs_block_group_flags(leaf, ptr); + rec->disk_used = btrfs_block_group_used(leaf, ptr); INIT_LIST_HEAD(&rec->list); @@ -5281,6 +5300,7 @@ static int process_block_group_item(struct block_group_tree *block_group_cache, struct extent_buffer *eb, int slot) { struct block_group_record *rec; + u64 start, end; int ret = 0; rec = btrfs_new_block_group_record(eb, key, slot); @@ -5289,6 +5309,22 @@ static int process_block_group_item(struct block_group_tree *block_group_cache, fprintf(stderr, "Block Group[%llu, %llu] existed.\n", rec->objectid, rec->offset); free(rec); + return ret; + } + + while (!find_first_extent_bit(&block_group_cache->pending_extents, + rec->objectid, &start, &end, + EXTENT_DIRTY)) { + u64 len; + + if (start >= rec->objectid + rec->offset) + break; + start = max(start, rec->objectid); + len = min(end - start + 1, + rec->objectid + rec->offset - start); + rec->actual_used += len; + clear_extent_dirty(&block_group_cache->pending_extents, start, + start + len - 1); } return ret; @@ -5352,6 +5388,7 @@ process_device_extent_item(struct device_extent_tree *dev_extent_cache, static int process_extent_item(struct btrfs_root *root, struct cache_tree *extent_cache, + struct block_group_tree *block_group_cache, struct extent_buffer *eb, int slot) { struct btrfs_extent_item *ei; @@ -5380,6 +5417,8 @@ static int process_extent_item(struct btrfs_root *root, num_bytes = key.offset; } + update_block_group_used(block_group_cache, key.objectid, num_bytes); + if (!IS_ALIGNED(key.objectid, gfs_info->sectorsize)) { error("ignoring invalid extent, bytenr %llu is not aligned to %u", key.objectid, gfs_info->sectorsize); @@ -6348,13 +6387,13 @@ static int run_next_block(struct btrfs_root *root, continue; } if (key.type == BTRFS_EXTENT_ITEM_KEY) { - process_extent_item(root, extent_cache, buf, - i); + process_extent_item(root, extent_cache, + block_group_cache, buf, i); continue; } if (key.type == BTRFS_METADATA_ITEM_KEY) { - process_extent_item(root, extent_cache, buf, - i); + process_extent_item(root, extent_cache, + block_group_cache, buf, i); continue; } if (key.type == BTRFS_EXTENT_CSUM_KEY) { @@ -8619,6 +8658,41 @@ static int deal_root_from_list(struct list_head *list, return ret; } +static int check_block_groups(struct block_group_tree *bg_cache) +{ + struct btrfs_trans_handle *trans; + struct cache_extent *item; + struct block_group_record *bg_rec; + int ret = 0; + + for (item = first_cache_extent(&bg_cache->tree); item; + item = next_cache_extent(item)) { + bg_rec = container_of(item, struct block_group_record, + cache); + if (bg_rec->disk_used == bg_rec->actual_used) + continue; + fprintf(stderr, + "block group[%llu %llu] used %llu but extent items used %llu\n", + bg_rec->objectid, bg_rec->offset, bg_rec->disk_used, + bg_rec->actual_used); + ret = -1; + } + + if (!repair || !ret) + return ret; + + trans = btrfs_start_transaction(gfs_info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + fprintf(stderr, "Failed to start a transaction\n"); + return ret; + } + + ret = btrfs_fix_block_accounting(trans); + btrfs_commit_transaction(trans, gfs_info->extent_root); + return ret ? ret : -EAGAIN; +} + /** * parse_tree_roots - Go over all roots in the tree root and add each one to * a list. @@ -8910,6 +8984,13 @@ again: goto out; } + ret = check_block_groups(&block_group_cache); + if (ret) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + ret = check_devices(&dev_cache, &dev_extent_cache); if (ret && err) ret = err;
The lowmem mode validates the used field of the block group item, but the normal mode does not. Fix this by keeping a running tally of what we think the used value for the block group should be, and then if it mismatches report an error and fix the problem if we have repair set. We have to keep track of pending extents because we process leaves as we see them, so it could be much later in the process that we find the block group item to associate the extents with. Signed-off-by: Josef Bacik <josef@toxicpanda.com> --- check/common.h | 5 +++ check/main.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 4 deletions(-)