diff mbox

[7/7] btrfs-progs: mkfs: Cleanup temporary chunk to avoid strange balance behavior.

Message ID 1436256928-23812-8-git-send-email-quwenruo@cn.fujitsu.com (mailing list archive)
State Accepted
Headers show

Commit Message

Qu Wenruo July 7, 2015, 8:15 a.m. UTC
[BUG]
 # mkfs.btrfs /dev/sdb /dev/sdd -m raid0 -d raid0
 # mount /dev/sdb /mnt/btrfs
 # btrfs balance start /mnt/btrfs
 # btrfs fi df /mnt/btrfs
 Data, single: total=1.00GiB, used=320.00KiB
 System, single: total=32.00MiB, used=16.00KiB
 Metadata, RAID0: total=256.00MiB, used=112.00KiB
 GlobalReserve, single: total=16.00MiB, used=0.00B

Only metadata stay RAID0. Data and system goes from RAID0 to single.

[REASON]
The problem is caused by the temporary single chunk.
In mkfs, it will always create single data/metadata/sys chunk and them
add device into the temporary btrfs.

When doing all chunk balance, for data and syschunk, they are almost
empty, so balance will move them into the single chunk and remove the
old RAID0 chunk.
For metadata, it has more data and will kick the metadata chunk pre
alloc, so new RAID0 chunk is allocated and the old metadata is move
there. Old RAID0 and single chunks are removed.

[FIX]
Now we add a new function to cleanup the temporary chunks at the end of
mkfs routine.
It will cleanup the chunks which is empty and its profile differs from
the mkfs profile.
So in balance, btrfs will always alloc a new chunk to keep the profile,
other than moving data into the single chunk.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 mkfs.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)

Comments

David Sterba July 14, 2015, 5:33 p.m. UTC | #1
On Tue, Jul 07, 2015 at 04:15:28PM +0800, Qu Wenruo wrote:
[...]
> 
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>

Applied, thanks a lot. I've tested several data/metadata combinations
and the resulting 'fi df' looks ok.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/mkfs.c b/mkfs.c
index b60fc5a..ee8a3cb 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -1182,6 +1182,149 @@  static void list_all_devices(struct btrfs_root *root)
 	printf("\n");
 }
 
+static int is_temp_block_group(struct extent_buffer *node,
+			       struct btrfs_block_group_item *bgi,
+			       u64 data_profile, u64 meta_profile,
+			       u64 sys_profile)
+{
+	u64 flag = btrfs_disk_block_group_flags(node, bgi);
+	u64 flag_type = flag & BTRFS_BLOCK_GROUP_TYPE_MASK;
+	u64 flag_profile = flag & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+	u64 used = btrfs_disk_block_group_used(node, bgi);
+
+	/*
+	 * Chunks meets all the following conditions is a temp chunk
+	 * 1) Empty chunk
+	 * Temp chunk is always empty.
+	 *
+	 * 2) profile dismatch with mkfs profile.
+	 * Temp chunk is always in SINGLE
+	 *
+	 * 3) Size differs with mkfs_alloc
+	 * Special case for SINGLE/SINGLE btrfs.
+	 * In that case, temp data chunk and real data chunk are always empty.
+	 * So we need to use mkfs_alloc to be sure which chunk is the newly
+	 * allocated.
+	 *
+	 * Normally, new chunk size is equal to mkfs one (One chunk)
+	 * If it has multiple chunks, we just refuse to delete any one.
+	 * As they are all single, so no real problem will happen.
+	 * So only use condition 1) and 2) to judge them.
+	 */
+	if (used != 0)
+		return 0;
+	switch (flag_type) {
+	case BTRFS_BLOCK_GROUP_DATA:
+	case BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA:
+		data_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+		if (flag_profile != data_profile)
+			return 1;
+		break;
+	case BTRFS_BLOCK_GROUP_METADATA:
+		meta_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+		if (flag_profile != meta_profile)
+			return 1;
+		break;
+	case BTRFS_BLOCK_GROUP_SYSTEM:
+		sys_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
+		if (flag_profile != sys_profile)
+			return 1;
+		break;
+	}
+	return 0;
+}
+
+/* Note: if current is a block group, it will skip it anyway */
+static int next_block_group(struct btrfs_root *root,
+			    struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	int ret = 0;
+
+	while (1) {
+		ret = btrfs_next_item(root, path);
+		if (ret)
+			goto out;
+
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
+			goto out;
+	}
+out:
+	return ret;
+}
+
+/* This function will cleanup  */
+static int cleanup_temp_chunks(struct btrfs_fs_info *fs_info,
+			       struct mkfs_allocation *alloc,
+			       u64 data_profile, u64 meta_profile,
+			       u64 sys_profile)
+{
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_block_group_item *bgi;
+	struct btrfs_root *root = fs_info->extent_root;
+	struct btrfs_key key;
+	struct btrfs_key found_key;
+	struct btrfs_path *path;
+	int ret = 0;
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	trans = btrfs_start_transaction(root, 1);
+
+	key.objectid = 0;
+	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+	key.offset = 0;
+
+	while (1) {
+		/*
+		 * as the rest of the loop may modify the tree, we need to
+		 * start a new search each time.
+		 */
+		ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+		if (ret < 0)
+			goto out;
+
+		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+				      path->slots[0]);
+		if (found_key.objectid < key.objectid)
+			goto out;
+		if (found_key.type != BTRFS_BLOCK_GROUP_ITEM_KEY) {
+			ret = next_block_group(root, path);
+			if (ret < 0)
+				goto out;
+			if (ret > 0) {
+				ret = 0;
+				goto out;
+			}
+			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+					      path->slots[0]);
+		}
+
+		bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				     struct btrfs_block_group_item);
+		if (is_temp_block_group(path->nodes[0], bgi,
+					data_profile, meta_profile,
+					sys_profile)) {
+			ret = btrfs_free_block_group(trans, fs_info,
+					found_key.objectid, found_key.offset);
+			if (ret < 0)
+				goto out;
+		}
+		btrfs_release_path(path);
+		key.objectid = found_key.objectid + found_key.offset;
+	}
+out:
+	if (trans)
+		btrfs_commit_transaction(trans, root);
+	btrfs_free_path(path);
+	return ret;
+}
+
 int main(int ac, char **av)
 {
 	char *file;
@@ -1669,6 +1812,12 @@  skip_multidev:
 		ret = make_image(source_dir, root, fd);
 		BUG_ON(ret);
 	}
+	ret = cleanup_temp_chunks(root->fs_info, &allocation, data_profile,
+				  metadata_profile, metadata_profile);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to cleanup temporary chunks\n");
+		goto out;
+	}
 
 	if (verbose) {
 		char features_buf[64];
@@ -1703,6 +1852,7 @@  skip_multidev:
 		list_all_devices(root);
 	}
 
+out:
 	ret = close_ctree(root);
 	BUG_ON(ret);
 	free(label);