@@ -3563,6 +3563,77 @@ static int open_ctree_features_init(struct btrfs_fs_info *fs_info)
return 0;
}
+static int open_ctree_chunk_tree_init(struct btrfs_fs_info *fs_info)
+{
+ u64 generation;
+ int level;
+ int ret;
+
+ mutex_lock(&fs_info->chunk_mutex);
+ ret = btrfs_read_sys_array(fs_info);
+ mutex_unlock(&fs_info->chunk_mutex);
+ if (ret) {
+ btrfs_err(fs_info, "failed to read the system array: %d", ret);
+ goto free_mapping;
+ }
+
+ generation = btrfs_super_chunk_root_generation(fs_info->super_copy);
+ level = btrfs_super_chunk_root_level(fs_info->super_copy);
+ ret = load_super_root(fs_info->chunk_root,
+ btrfs_super_chunk_root(fs_info->super_copy),
+ generation, level);
+ if (ret) {
+ btrfs_err(fs_info, "failed to read chunk root");
+ goto free_root;
+ }
+
+ read_extent_buffer(fs_info->chunk_root->node, fs_info->chunk_tree_uuid,
+ offsetof(struct btrfs_header, chunk_tree_uuid),
+ BTRFS_UUID_SIZE);
+
+ ret = btrfs_read_chunk_tree(fs_info);
+ if (ret) {
+ btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
+ goto free_root;
+ }
+
+ /*
+ * At this point we know all the devices that make this filesystem,
+ * including the seed devices but we don't know yet if the replace
+ * target is required. So free devices that are not part of this
+ * filesystem but skip the replace target device which is checked
+ * below in btrfs_init_dev_replace().
+ */
+ btrfs_free_extra_devids(fs_info->fs_devices);
+ if (!fs_info->fs_devices->latest_dev->bdev) {
+ btrfs_err(fs_info, "failed to read devices");
+ goto free_root;
+ }
+
+ /* We have full chunk tree loaded, can do degradable check now. */
+ if (!sb_rdonly(fs_info->sb) && fs_info->fs_devices->missing_devices &&
+ !btrfs_check_rw_degradable(fs_info, NULL)) {
+ btrfs_warn(fs_info,
+ "writable mount is not allowed due to too many missing devices");
+ ret = -EIO;
+ goto free_root;
+ }
+
+ return 0;
+
+free_root:
+ free_root_extent_buffers(fs_info->chunk_root);
+free_mapping:
+ btrfs_mapping_tree_free(&fs_info->mapping_tree);
+ return ret;
+}
+
+static void open_ctree_chunk_tree_exit(struct btrfs_fs_info *fs_info)
+{
+ free_root_extent_buffers(fs_info->chunk_root);
+ btrfs_mapping_tree_free(&fs_info->mapping_tree);
+}
+
struct init_sequence {
int (*init_func)(struct btrfs_fs_info *fs_info);
void (*exit_func)(struct btrfs_fs_info *fs_info);
@@ -3581,18 +3652,19 @@ static const struct init_sequence open_ctree_seq[] = {
}, {
.init_func = btrfs_init_workqueues,
.exit_func = btrfs_stop_all_workers,
+ }, {
+ .init_func = open_ctree_chunk_tree_init,
+ .exit_func = open_ctree_chunk_tree_exit,
}
};
int __cold open_ctree(struct super_block *sb, char *options)
{
- u64 generation;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
bool open_ctree_res[ARRAY_SIZE(open_ctree_seq)] = {0};
int ret;
int err = -EINVAL;
- int level;
int i;
fs_info->sb = sb;
@@ -3608,47 +3680,6 @@ int __cold open_ctree(struct super_block *sb, char *options)
open_ctree_res[i] = true;
}
- mutex_lock(&fs_info->chunk_mutex);
- ret = btrfs_read_sys_array(fs_info);
- mutex_unlock(&fs_info->chunk_mutex);
- if (ret) {
- btrfs_err(fs_info, "failed to read the system array: %d", ret);
- goto fail_sb_buffer;
- }
-
- generation = btrfs_super_chunk_root_generation(fs_info->super_copy);
- level = btrfs_super_chunk_root_level(fs_info->super_copy);
- ret = load_super_root(fs_info->chunk_root,
- btrfs_super_chunk_root(fs_info->super_copy),
- generation, level);
- if (ret) {
- btrfs_err(fs_info, "failed to read chunk root");
- goto fail_tree_roots;
- }
-
- read_extent_buffer(fs_info->chunk_root->node, fs_info->chunk_tree_uuid,
- offsetof(struct btrfs_header, chunk_tree_uuid),
- BTRFS_UUID_SIZE);
-
- ret = btrfs_read_chunk_tree(fs_info);
- if (ret) {
- btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
- goto fail_tree_roots;
- }
-
- /*
- * At this point we know all the devices that make this filesystem,
- * including the seed devices but we don't know yet if the replace
- * target is required. So free devices that are not part of this
- * filesystem but skip the replace target device which is checked
- * below in btrfs_init_dev_replace().
- */
- btrfs_free_extra_devids(fs_devices);
- if (!fs_devices->latest_dev->bdev) {
- btrfs_err(fs_info, "failed to read devices");
- goto fail_tree_roots;
- }
-
ret = init_tree_roots(fs_info);
if (ret)
goto fail_tree_roots;
@@ -3738,13 +3769,6 @@ int __cold open_ctree(struct super_block *sb, char *options)
btrfs_free_zone_cache(fs_info);
- if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
- !btrfs_check_rw_degradable(fs_info, NULL)) {
- btrfs_warn(fs_info,
- "writable mount is not allowed due to too many missing devices");
- goto fail_sysfs;
- }
-
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
@@ -3850,10 +3874,7 @@ int __cold open_ctree(struct super_block *sb, char *options)
if (fs_info->data_reloc_root)
btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
free_root_pointers(fs_info, true);
-
-fail_sb_buffer:
btrfs_free_block_groups(fs_info);
- btrfs_mapping_tree_free(&fs_info->mapping_tree);
fail:
for (i = ARRAY_SIZE(open_ctree_seq) - 1; i >= 0; i--) {
if (!open_ctree_res[i] || !open_ctree_seq[i].exit_func)
Involved functional changes: - Properly free the chunk map and chunk root ebs at error handling Previously we rely the final close_ctree() to properly free the chunk root extent buffers. With the more strict open_ctree_seq[] requirement, since we're the first one to fully populate chunk root extent buffers, at error we should also free the extent buffers. Note, the tree root and chunk root themselves are first allocated by open_ctree_btree_inode_init(), thus we should not free the chunk_root pointer, but just the extent buffers. - Do degradable check immediately after loading chunk tree The degradable check only requires the full chunk mapping, can be done immediately after btrfs_read_chunk_tree(). This also exposed one exiting label mismatch, at chunk tree read, we didn't create block group items at all, but at the old fail_sb_buffer: label we call btrfs_free_block_groups(). It doesn't hurt but just shows how bad the original code labels are managed. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/disk-io.c | 127 ++++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 53 deletions(-)