Message ID | af1830174f9dd9e2651dab213c0b984d90811138.1604065695.git.naohiro.aota@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: zoned block device support | expand |
On 10/30/20 9:51 AM, Naohiro Aota wrote: > Conventional zones do not have a write pointer, so we cannot use it to > determine the allocation offset if a block group contains a conventional > zone. > > But instead, we can consider the end of the last allocated extent in the > block group as an allocation offset. > > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> > --- > fs/btrfs/zoned.c | 119 ++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 113 insertions(+), 6 deletions(-) > > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index 0aa821893a51..8f58d0853cc3 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -740,6 +740,104 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) > return 0; > } > > +static int emulate_write_pointer(struct btrfs_block_group *cache, > + u64 *offset_ret) > +{ > + struct btrfs_fs_info *fs_info = cache->fs_info; > + struct btrfs_root *root = fs_info->extent_root; > + struct btrfs_path *path; > + struct extent_buffer *leaf; > + struct btrfs_key search_key; > + struct btrfs_key found_key; > + int slot; > + int ret; > + u64 length; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + > + search_key.objectid = cache->start + cache->length; > + search_key.type = 0; > + search_key.offset = 0; > + You can just use 'key', don't have to use 'search_key'. Also you don't check for things like BTRFS_TREE_BLOCK_REF_KEY or whatever in the case that we don't have an inline extent ref, so this could error out with a fs with lots of snapshots and different references. What you need is to search back until you hit an BTRFS_METADATA_ITEM_KEY or a BTRFS_EXTENT_ITEM_KEY, and then check the offset of that thing. Otherwise this will screw up. Thanks, Josef
On Mon, Nov 02, 2020 at 03:37:35PM -0500, Josef Bacik wrote: >On 10/30/20 9:51 AM, Naohiro Aota wrote: >>Conventional zones do not have a write pointer, so we cannot use it to >>determine the allocation offset if a block group contains a conventional >>zone. >> >>But instead, we can consider the end of the last allocated extent in the >>block group as an allocation offset. >> >>Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> >>--- >> fs/btrfs/zoned.c | 119 ++++++++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 113 insertions(+), 6 deletions(-) >> >>diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c >>index 0aa821893a51..8f58d0853cc3 100644 >>--- a/fs/btrfs/zoned.c >>+++ b/fs/btrfs/zoned.c >>@@ -740,6 +740,104 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) >> return 0; >> } >>+static int emulate_write_pointer(struct btrfs_block_group *cache, >>+ u64 *offset_ret) >>+{ >>+ struct btrfs_fs_info *fs_info = cache->fs_info; >>+ struct btrfs_root *root = fs_info->extent_root; >>+ struct btrfs_path *path; >>+ struct extent_buffer *leaf; >>+ struct btrfs_key search_key; >>+ struct btrfs_key found_key; >>+ int slot; >>+ int ret; >>+ u64 length; >>+ >>+ path = btrfs_alloc_path(); >>+ if (!path) >>+ return -ENOMEM; >>+ >>+ search_key.objectid = cache->start + cache->length; >>+ search_key.type = 0; >>+ search_key.offset = 0; >>+ > >You can just use 'key', don't have to use 'search_key'. Fixed. > >Also you don't check for things like BTRFS_TREE_BLOCK_REF_KEY or >whatever in the case that we don't have an inline extent ref, so this >could error out with a fs with lots of snapshots and different >references. What you need is to search back until you hit an >BTRFS_METADATA_ITEM_KEY or a BTRFS_EXTENT_ITEM_KEY, and then check the >offset of that thing. Otherwise this will screw up. Thanks, > >Josef I overlooked that case. And, I just noticed we can use btrfs_previous_extent_item() here. It looks much simpler now. Thanks.
On Fri, Oct 30, 2020 at 10:51:22PM +0900, Naohiro Aota wrote: > + struct btrfs_key found_key; > + int slot; > + int ret; > + u64 length; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + > + search_key.objectid = cache->start + cache->length; > + search_key.type = 0; > + search_key.offset = 0; > + > + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); > + if (ret < 0) > + goto out; > + ASSERT(ret != 0); This should be a runtime check not an assert, if this happens it's likely a corruption > + slot = path->slots[0]; > + leaf = path->nodes[0]; > + ASSERT(slot != 0); Same > + slot--; > + btrfs_item_key_to_cpu(leaf, &found_key, slot); > + > + if (found_key.objectid < cache->start) { > + *offset_ret = 0; > + } else if (found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { > + struct btrfs_key extent_item_key; > + > + if (found_key.objectid != cache->start) { > + ret = -EUCLEAN; > + goto out; > + } > + > + length = 0; > + > + /* metadata may have METADATA_ITEM_KEY */ /* Metadata ... */ > + if (slot == 0) { > + btrfs_set_path_blocking(path); > + ret = btrfs_prev_leaf(root, path); > + if (ret < 0) > + goto out; > + if (ret == 0) { > + slot = btrfs_header_nritems(leaf) - 1; > + btrfs_item_key_to_cpu(leaf, &extent_item_key, > + slot); > + } > + } else { > + btrfs_item_key_to_cpu(leaf, &extent_item_key, slot - 1); > + ret = 0; > + } > + > + if (ret == 0 && > + extent_item_key.objectid == cache->start) { > + if (extent_item_key.type == BTRFS_METADATA_ITEM_KEY) if (...) { > + length = fs_info->nodesize; } else if (...) { > + else if (extent_item_key.type == BTRFS_EXTENT_ITEM_KEY) > + length = extent_item_key.offset; } else { > + else { > + ret = -EUCLEAN; > + goto out; > + } > + } > + > + *offset_ret = length; > + } else if (found_key.type == BTRFS_EXTENT_ITEM_KEY || > + found_key.type == BTRFS_METADATA_ITEM_KEY) { > + > + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) > + length = found_key.offset; > + else > + length = fs_info->nodesize; > + > + if (!(found_key.objectid >= cache->start && > + found_key.objectid + length <= > + cache->start + cache->length)) { > + ret = -EUCLEAN; > + goto out; > + } > + *offset_ret = found_key.objectid + length - cache->start; > + } else { > + ret = -EUCLEAN; > + goto out; > + } > + ret = 0; > + > +out: > + btrfs_free_path(path); > + return ret; > +} > + > int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) > { > struct btrfs_fs_info *fs_info = cache->fs_info; > @@ -754,6 +852,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) > int i; > unsigned int nofs_flag; > u64 *alloc_offsets = NULL; > + u64 emulated_offset = 0; > u32 num_sequential = 0, num_conventional = 0; > > if (!btrfs_is_zoned(fs_info)) > @@ -854,12 +953,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) > } > > if (num_conventional > 0) { > - /* > - * Since conventional zones does not have write pointer, we > - * cannot determine alloc_offset from the pointer > - */ > - ret = -EINVAL; > - goto out; > + ret = emulate_write_pointer(cache, &emulated_offset); > + if (ret || map->num_stripes == num_conventional) { > + if (!ret) > + cache->alloc_offset = emulated_offset; > + goto out; > + } > } > > switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { > @@ -881,6 +980,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) > } > > out: > + /* an extent is allocated after the write pointer */ /* An ... */ > + if (num_conventional && emulated_offset > cache->alloc_offset) { > + btrfs_err(fs_info, > + "got wrong write pointer in BG %llu: %llu > %llu", "zoned: got wrong write pointer in block group %llu found %llu emulated %llu" > + logical, emulated_offset, cache->alloc_offset); > + ret = -EIO; > + } > + > kfree(alloc_offsets); > free_extent_map(em); > > -- > 2.27.0
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 0aa821893a51..8f58d0853cc3 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -740,6 +740,104 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) return 0; } +static int emulate_write_pointer(struct btrfs_block_group *cache, + u64 *offset_ret) +{ + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key search_key; + struct btrfs_key found_key; + int slot; + int ret; + u64 length; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + search_key.objectid = cache->start + cache->length; + search_key.type = 0; + search_key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto out; + ASSERT(ret != 0); + slot = path->slots[0]; + leaf = path->nodes[0]; + ASSERT(slot != 0); + slot--; + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid < cache->start) { + *offset_ret = 0; + } else if (found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + struct btrfs_key extent_item_key; + + if (found_key.objectid != cache->start) { + ret = -EUCLEAN; + goto out; + } + + length = 0; + + /* metadata may have METADATA_ITEM_KEY */ + if (slot == 0) { + btrfs_set_path_blocking(path); + ret = btrfs_prev_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 0) { + slot = btrfs_header_nritems(leaf) - 1; + btrfs_item_key_to_cpu(leaf, &extent_item_key, + slot); + } + } else { + btrfs_item_key_to_cpu(leaf, &extent_item_key, slot - 1); + ret = 0; + } + + if (ret == 0 && + extent_item_key.objectid == cache->start) { + if (extent_item_key.type == BTRFS_METADATA_ITEM_KEY) + length = fs_info->nodesize; + else if (extent_item_key.type == BTRFS_EXTENT_ITEM_KEY) + length = extent_item_key.offset; + else { + ret = -EUCLEAN; + goto out; + } + } + + *offset_ret = length; + } else if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) { + + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) + length = found_key.offset; + else + length = fs_info->nodesize; + + if (!(found_key.objectid >= cache->start && + found_key.objectid + length <= + cache->start + cache->length)) { + ret = -EUCLEAN; + goto out; + } + *offset_ret = found_key.objectid + length - cache->start; + } else { + ret = -EUCLEAN; + goto out; + } + ret = 0; + +out: + btrfs_free_path(path); + return ret; +} + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) { struct btrfs_fs_info *fs_info = cache->fs_info; @@ -754,6 +852,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) int i; unsigned int nofs_flag; u64 *alloc_offsets = NULL; + u64 emulated_offset = 0; u32 num_sequential = 0, num_conventional = 0; if (!btrfs_is_zoned(fs_info)) @@ -854,12 +953,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) } if (num_conventional > 0) { - /* - * Since conventional zones does not have write pointer, we - * cannot determine alloc_offset from the pointer - */ - ret = -EINVAL; - goto out; + ret = emulate_write_pointer(cache, &emulated_offset); + if (ret || map->num_stripes == num_conventional) { + if (!ret) + cache->alloc_offset = emulated_offset; + goto out; + } } switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { @@ -881,6 +980,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) } out: + /* an extent is allocated after the write pointer */ + if (num_conventional && emulated_offset > cache->alloc_offset) { + btrfs_err(fs_info, + "got wrong write pointer in BG %llu: %llu > %llu", + logical, emulated_offset, cache->alloc_offset); + ret = -EIO; + } + kfree(alloc_offsets); free_extent_map(em);
Conventional zones do not have a write pointer, so we cannot use it to determine the allocation offset if a block group contains a conventional zone. But instead, we can consider the end of the last allocated extent in the block group as an allocation offset. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> --- fs/btrfs/zoned.c | 119 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 113 insertions(+), 6 deletions(-)