@@ -2337,7 +2337,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
* 1, 2 2nd and 3rd backup copy
* -1 skip bytenr check
*/
-int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
+int btrfs_validate_super(struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *sb, int mirror_num)
{
u64 nodesize = btrfs_super_nodesize(sb);
@@ -2495,24 +2495,7 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
- /*
- * Obvious sys_chunk_array corruptions, it must hold at least one key
- * and one chunk
- */
- if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
- btrfs_err(fs_info, "system chunk array too big %u > %u",
- btrfs_super_sys_array_size(sb),
- BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
- ret = -EINVAL;
- }
- if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
- + sizeof(struct btrfs_chunk)) {
- btrfs_err(fs_info, "system chunk array too small %u < %zu",
- btrfs_super_sys_array_size(sb),
- sizeof(struct btrfs_disk_key)
- + sizeof(struct btrfs_chunk));
- ret = -EINVAL;
- }
+ ret = btrfs_check_system_chunk_array(fs_info, sb);
/*
* The generation is a global counter, we'll trust it more than the others
@@ -54,7 +54,7 @@ int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *disk_sb);
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices);
void __cold close_ctree(struct btrfs_fs_info *fs_info);
-int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
+int btrfs_validate_super(struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *sb, int mirror_num);
int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
@@ -973,6 +973,105 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
return 0;
}
+int btrfs_check_system_chunk_array(struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *sb)
+{
+ struct extent_buffer *dummy;
+ u32 array_size;
+ u32 cur_offset = 0;
+ u32 len;
+ int ret = 0;
+
+ /*
+ * We allocated a dummy extent, just to use extent buffer accessors.
+ * There will be unused space after BTRFS_SUPER_INFO_SIZE, but
+ * that's fine, we will not go beyond system chunk array anyway.
+ */
+ dummy = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET);
+ if (!dummy)
+ return -ENOMEM;
+ set_extent_buffer_uptodate(dummy);
+ write_extent_buffer(dummy, sb, 0, BTRFS_SUPER_INFO_SIZE);
+
+ array_size = btrfs_super_sys_array_size(sb);
+ if (array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ btrfs_crit(fs_info,
+ "superblock syschunk too large, have %u expect <=%u",
+ array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ while (cur_offset < array_size) {
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_key key;
+ struct btrfs_chunk *chunk;
+ u32 num_stripes;
+ u64 type;
+
+ len = sizeof(*disk_key);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+ disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur_offset);
+ btrfs_disk_key_to_cpu(&key, disk_key);
+ cur_offset += len;
+
+ if (key.type != BTRFS_CHUNK_ITEM_KEY) {
+ btrfs_crit(fs_info,
+ "unexpected item type %u in sys_array at offset %u",
+ (u32)key.type, cur_offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ /*
+ * At least one btrfs_chunk with one stripe must be present,
+ * exact stripe count check comes afterwards
+ */
+ len = btrfs_chunk_item_size(1);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ chunk = (struct btrfs_chunk *)
+ (offsetof(struct btrfs_super_block, sys_chunk_array) +
+ cur_offset);
+ num_stripes = btrfs_chunk_num_stripes(dummy, chunk);
+ if (!num_stripes) {
+ btrfs_crit(fs_info,
+ "invalid number of stripes %u in sys_array at offset %u",
+ num_stripes, cur_offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ type = btrfs_chunk_type(dummy, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ btrfs_err(fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ ret = btrfs_check_chunk_valid(dummy, chunk, key.offset);
+ if (ret)
+ goto out;
+ cur_offset += len;
+ }
+out:
+ free_extent_buffer_stale(dummy);
+ return ret;
+
+out_short_read:
+ btrfs_crit(fs_info,
+ "sys_array too short to read %u bytes at offset %u array size %u",
+ len, cur_offset, array_size);
+ free_extent_buffer_stale(dummy);
+ return ret;
+}
+
/*
* Enhanced version of chunk item checker.
*
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <uapi/linux/btrfs_tree.h>
+#include "fs.h"
struct extent_buffer;
struct btrfs_chunk;
@@ -68,6 +69,8 @@ int btrfs_check_node(struct extent_buffer *node);
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
struct btrfs_chunk *chunk, u64 logical);
+int btrfs_check_system_chunk_array(struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *sb);
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
int btrfs_verify_level_key(struct extent_buffer *eb,
const struct btrfs_tree_parent_check *check);
@@ -7002,16 +7002,6 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
warn_32bit_meta_chunk(fs_info, logical, length, type);
#endif
- /*
- * Only need to verify chunk item if we're reading from sys chunk array,
- * as chunk item in tree block is already verified by tree-checker.
- */
- if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
- ret = btrfs_check_chunk_valid(leaf, chunk, logical);
- if (ret)
- return ret;
- }
-
map = btrfs_find_chunk_map(fs_info, logical, 1);
/* already mapped? */
@@ -7274,11 +7264,9 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
u8 *array_ptr;
unsigned long sb_array_offset;
int ret = 0;
- u32 num_stripes;
u32 array_size;
u32 len = 0;
u32 cur_offset;
- u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
@@ -7301,10 +7289,17 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
cur_offset = 0;
while (cur_offset < array_size) {
+ u32 num_stripes;
+
disk_key = (struct btrfs_disk_key *)array_ptr;
len = sizeof(*disk_key);
- if (cur_offset + len > array_size)
- goto out_short_read;
+
+ /*
+ * The super block should have passed
+ * btrfs_check_system_chunk_array(), thus we only do
+ * ASSERT() for those sanity checks.
+ */
+ ASSERT(cur_offset + len <= array_size);
btrfs_disk_key_to_cpu(&key, disk_key);
@@ -7312,44 +7307,24 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
sb_array_offset += len;
cur_offset += len;
- if (key.type != BTRFS_CHUNK_ITEM_KEY) {
- btrfs_err(fs_info,
- "unexpected item type %u in sys_array at offset %u",
- (u32)key.type, cur_offset);
- ret = -EIO;
- break;
- }
+ ASSERT(key.type == BTRFS_CHUNK_ITEM_KEY);
chunk = (struct btrfs_chunk *)sb_array_offset;
/*
* At least one btrfs_chunk with one stripe must be present,
* exact stripe count check comes afterwards
*/
- len = btrfs_chunk_item_size(1);
- if (cur_offset + len > array_size)
- goto out_short_read;
+ ASSERT(cur_offset + btrfs_chunk_item_size(1) <= array_size);
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
- if (!num_stripes) {
- btrfs_err(fs_info,
- "invalid number of stripes %u in sys_array at offset %u",
- num_stripes, cur_offset);
- ret = -EIO;
- break;
- }
+ /* Should have at least one stripe. */
+ ASSERT(num_stripes);
- type = btrfs_chunk_type(sb, chunk);
- if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
- btrfs_err(fs_info,
- "invalid chunk type %llu in sys_array at offset %u",
- type, cur_offset);
- ret = -EIO;
- break;
- }
+ /* Only system chunks are allowed in system chunk array. */
+ ASSERT(btrfs_chunk_type(sb, chunk) & BTRFS_BLOCK_GROUP_SYSTEM);
len = btrfs_chunk_item_size(num_stripes);
- if (cur_offset + len > array_size)
- goto out_short_read;
+ ASSERT(cur_offset + len <= array_size);
ret = read_one_chunk(&key, sb, chunk);
if (ret)
@@ -7362,13 +7337,6 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
clear_extent_buffer_uptodate(sb);
free_extent_buffer_stale(sb);
return ret;
-
-out_short_read:
- btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
- len, cur_offset);
- clear_extent_buffer_uptodate(sb);
- free_extent_buffer_stale(sb);
- return -EIO;
}
/*
Currently btrfs_validate_super() only does a very basic check on the array chunk size (not too large than the available space, but not too small to contain no chunk). The more comprehensive checks (the regular chunk checks and size check inside the system chunk array) is all done inside btrfs_read_sys_array(). It's not a big deal, but for the sake of concentrated verification, we should validate the system chunk array at the time of super block validation. So this patch does the following modification: - Introduce a helper btrfs_check_system_chunk_array() * Validate the disk key * Validate the size before we access the full chunk/stripe items. * Do the full chunk item validation - Call btrfs_check_system_chunk_array() at btrfs_validate_super() - Simplify the checks inside btrfs_read_sys_array() Now the checks will be converted to an ASSERT(). - Simplify the checks inside read_one_chunk() Now all chunk items inside system chunk array and chunk tree is verified, there is no need to verify it again inside read_one_chunk(). This change has the following advantages: - More comprehensive checks at write time Although this also means extra memcpy() for the superblocks at write time, due to the limits that we need a dummy extent buffer to utilize all the extent buffer helpers. - Slightly improved readablity when iterating the system chunk array Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/disk-io.c | 21 +-------- fs/btrfs/disk-io.h | 2 +- fs/btrfs/tree-checker.c | 99 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-checker.h | 3 ++ fs/btrfs/volumes.c | 64 +++++++------------------- 5 files changed, 121 insertions(+), 68 deletions(-)