diff mbox

[3/9] btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier

Message ID 20171129091604.2194-4-wqu@suse.com (mailing list archive)
State New, archived
Headers show

Commit Message

Qu Wenruo Nov. 29, 2017, 9:15 a.m. UTC
Use an easier method to calculate the estimate device for mkfs.btrfs
--rootdir.

The new method will over-estimate, but should ensure we won't encounter
ENOSPC.

It relies on the following data to estimate:
1) number of inodes
   for metadata chunk size
2) rounded up data size of each regular inode
   for data chunk size.

Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier

PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.

Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier

Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from
btrfS_alloc_chunk().
Same profile_multiplier for meta.

This over-estimate calculate is, of course, over-estimate.
But since we will later shrink the fs to its real usage, it doesn't
matter much now.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 mkfs/main.c    | 109 ++++++++++++++++++++++++++--------------------------
 mkfs/rootdir.c | 119 +++++++++++++++++++++++++++++++++++++++------------------
 mkfs/rootdir.h |   5 +--
 3 files changed, 139 insertions(+), 94 deletions(-)
diff mbox

Patch

diff --git a/mkfs/main.c b/mkfs/main.c
index 716395c4b6b4..eb49182ebe81 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -747,8 +747,6 @@  int main(int argc, char **argv)
 	int subvol_name_set = 0;
 	char *source_dir = NULL;
 	int source_dir_set = 0;
-	u64 num_of_meta_chunks = 0;
-	u64 size_of_data = 0;
 	u64 source_dir_size = 0;
 	u64 min_dev_size;
 	int dev_cnt = 0;
@@ -977,6 +975,34 @@  int main(int argc, char **argv)
 
 	min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
 					  data_profile);
+	/*
+	 * Enlarge the destination file or create new one, using the
+	 * size calculated from source dir.
+	 *
+	 * This must be done before minimal device size check.
+	 */
+	if (source_dir_set) {
+		fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP |
+			  S_IWGRP | S_IROTH);
+		if (fd < 0) {
+			error("unable to open %s: %s", file, strerror(errno));
+			goto error;
+		}
+
+		source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
+				min_dev_size, metadata_profile, data_profile);
+		if (block_count < source_dir_size)
+			block_count = source_dir_size;
+		ret = zero_output_file(fd, block_count);
+		if (ret) {
+			error("unable to zero the output file");
+			close(fd);
+			goto error;
+		}
+		/* our "device" is the new image file */
+		dev_block_count = block_count;
+		close(fd);
+	}
 	/* Check device/block_count after the nodesize is determined */
 	if (block_count && block_count < min_dev_size) {
 		error("size %llu is too small to make a usable filesystem",
@@ -1010,51 +1036,28 @@  int main(int argc, char **argv)
 
 	dev_cnt--;
 
-	if (!source_dir_set) {
-		/*
-		 * open without O_EXCL so that the problem should not
-		 * occur by the following processing.
-		 * (btrfs_register_one_device() fails if O_EXCL is on)
-		 */
-		fd = open(file, O_RDWR);
-		if (fd < 0) {
-			error("unable to open %s: %s", file, strerror(errno));
-			goto error;
-		}
-		ret = btrfs_prepare_device(fd, file, &dev_block_count,
-				block_count,
-				(zero_end ? PREP_DEVICE_ZERO_END : 0) |
-				(discard ? PREP_DEVICE_DISCARD : 0) |
-				(verbose ? PREP_DEVICE_VERBOSE : 0));
-		if (ret) {
-			goto error;
-		}
-		if (block_count && block_count > dev_block_count) {
-			error("%s is smaller than requested size, expected %llu, found %llu",
-					file,
-					(unsigned long long)block_count,
-					(unsigned long long)dev_block_count);
-			goto error;
-		}
-	} else {
-		fd = open(file, O_CREAT | O_RDWR,
-				S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
-		if (fd < 0) {
-			error("unable to open %s: %s", file, strerror(errno));
-			goto error;
-		}
-
-		source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
-					&num_of_meta_chunks, &size_of_data);
-		if(block_count < source_dir_size)
-			block_count = source_dir_size;
-		ret = zero_output_file(fd, block_count);
-		if (ret) {
-			error("unable to zero the output file");
-			goto error;
-		}
-		/* our "device" is the new image file */
-		dev_block_count = block_count;
+	/*
+	 * open without O_EXCL so that the problem should not
+	 * occur by the following processing.
+	 * (btrfs_register_one_device() fails if O_EXCL is on)
+	 */
+	fd = open(file, O_RDWR);
+	if (fd < 0) {
+		error("unable to open %s: %s", file, strerror(errno));
+		goto error;
+	}
+	ret = btrfs_prepare_device(fd, file, &dev_block_count,
+			block_count,
+			(zero_end ? PREP_DEVICE_ZERO_END : 0) |
+			(discard ? PREP_DEVICE_DISCARD : 0) |
+			(verbose ? PREP_DEVICE_VERBOSE : 0));
+	if (ret)
+		goto error;
+	if (block_count && block_count > dev_block_count) {
+		error("%s is smaller than requested size, expected %llu, found %llu",
+		      file, (unsigned long long)block_count,
+		      (unsigned long long)dev_block_count);
+		goto error;
 	}
 
 	/* To create the first block group and chunk 0 in make_btrfs */
@@ -1180,13 +1183,11 @@  int main(int argc, char **argv)
 	}
 
 raid_groups:
-	if (!source_dir_set) {
-		ret = create_raid_groups(trans, root, data_profile,
-				 metadata_profile, mixed, &allocation);
-		if (ret) {
-			error("unable to create raid groups: %d", ret);
-			goto out;
-		}
+	ret = create_raid_groups(trans, root, data_profile,
+			 metadata_profile, mixed, &allocation);
+	if (ret) {
+		error("unable to create raid groups: %d", ret);
+		goto out;
 	}
 
 	ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c
index 0eb3f8f26139..f8cff44d06eb 100644
--- a/mkfs/rootdir.c
+++ b/mkfs/rootdir.c
@@ -33,19 +33,29 @@ 
 #include "transaction.h"
 #include "utils.h"
 #include "mkfs/rootdir.h"
+#include "mkfs/common.h"
 #include "send-utils.h"
 
-/*
- * This ignores symlinks with unreadable targets and subdirs that can't
- * be read.  It's a best-effort to give a rough estimate of the size of
- * a subdir.  It doesn't guarantee that prepopulating btrfs from this
- * tree won't still run out of space.
- */
-static u64 global_total_size;
-static u64 fs_block_size;
+static u32 fs_block_size;
 
 static u64 index_cnt = 2;
 
+/*
+ * Size estimate will be done using the following data:
+ * 1) Number of inodes
+ *    Since we will later shrink the fs, over-estimate is completely fine here
+ *    as long as our estimate ensure we can populate the image without ENOSPC.
+ *    So we only records how many inodes there is, and use the maximum space
+ *    usage for every inode.
+ *
+ * 2) Data space each (regular) inode uses
+ *    To estimate data chunk size.
+ *    Don't care if it can fit as inline extent.
+ *    Always round them up to sectorsize.
+ */
+static u64 ftw_meta_nr_inode;
+static u64 ftw_data_size;
+
 static int add_directory_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, u64 objectid,
 			       ino_t parent_inum, const char *name,
@@ -685,53 +695,88 @@  out:
 static int ftw_add_entry_size(const char *fpath, const struct stat *st,
 			      int type)
 {
-	if (type == FTW_F || type == FTW_D)
-		global_total_size += round_up(st->st_size, fs_block_size);
+	/*
+	 * Failed to read dir, mostly due to EPERM.
+	 * Abort ASAP, so we don't need to populate the fs
+	 */
+	if (type == FTW_DNR || type == FTW_NS)
+		return -EPERM;
+
+	if (S_ISREG(st->st_mode))
+		ftw_data_size += round_up(st->st_size, fs_block_size);
+	ftw_meta_nr_inode++;
 
 	return 0;
 }
 
-u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
-			u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
+u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
+			u64 meta_profile, u64 data_profile)
 {
-	u64 dir_size = 0;
 	u64 total_size = 0;
 	int ret;
-	u64 default_chunk_size = SZ_8M;
-	u64 allocated_meta_size = SZ_8M;
-	u64 allocated_total_size = 20 * SZ_1M;	/* 20MB */
-	u64 num_of_meta_chunks = 0;
-	u64 num_of_data_chunks = 0;
-	u64 num_of_allocated_meta_chunks =
-			allocated_meta_size / default_chunk_size;
-
-	global_total_size = 0;
+
+	u64 meta_size = 0;	/* Based on @ftw_meta_nr_inode */
+	u64 meta_chunk_size = 0;/* Based on @meta_size */
+	u64 data_chunk_size = 0;/* Based on @ftw_data_size */
+
+	u64 meta_threshold = SZ_8M;
+	u64 data_threshold = SZ_8M;
+
+	float data_multipler = 1;
+	float meta_multipler = 1;
+
 	fs_block_size = sectorsize;
+	ftw_data_size = 0;
+	ftw_meta_nr_inode = 0;
 	ret = ftw(dir_name, ftw_add_entry_size, 10);
-	dir_size = global_total_size;
 	if (ret < 0) {
 		error("ftw subdir walk of %s failed: %s", dir_name,
 			strerror(errno));
 		exit(1);
 	}
 
-	num_of_data_chunks = (dir_size + default_chunk_size - 1) /
-		default_chunk_size;
 
-	num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
-	if (((dir_size / 2) % default_chunk_size) != 0)
-		num_of_meta_chunks++;
-	if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
-		num_of_meta_chunks = 0;
-	else
-		num_of_meta_chunks -= num_of_allocated_meta_chunks;
+	/*
+	 * Maximum metadata useage for every inode, which will be PATH_MAX
+	 * for the following items:
+	 * 1) DIR_ITEM
+	 * 2) DIR_INDEX
+	 * 3) INODE_REF
+	 *
+	 * Plus possible inline extent size, which is sectorsize.
+	 *
+	 * And finally, allow metadata usage to increase with data size.
+	 * Follow the old kernel 8:1 data:meta ratio.
+	 * This is especially important for --rootdir, as the file extent size
+	 * up limit is 1M, instead of 128M in kernel.
+	 * This can bump meta usage easily.
+	 */
+	meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
+		    ftw_data_size / 8;
 
-	total_size = allocated_total_size +
-		     (num_of_data_chunks * default_chunk_size) +
-		     (num_of_meta_chunks * default_chunk_size);
+	/* Minimal chunk size from btrfs_alloc_chunk(). */
+	if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
+		meta_threshold = SZ_32M;
+		meta_multipler = 2;
+	}
+	if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
+		data_threshold = SZ_64M;
+		data_multipler = 2;
+	}
 
-	*num_of_meta_chunks_ret = num_of_meta_chunks;
-	*size_of_data_ret = num_of_data_chunks * default_chunk_size;
+	/*
+	 * Only when the usage is larger than the minimal chunk size (threshold)
+	 * we need to allocate new chunk, or the initial chunk in the image is
+	 * large enough.
+	 */
+	if (meta_size > meta_threshold)
+		meta_chunk_size = (round_up(meta_size, meta_threshold) -
+				   meta_threshold) * meta_multipler;
+	if (ftw_data_size > data_threshold)
+		data_chunk_size = (round_up(ftw_data_size, data_threshold) -
+				   data_threshold) * data_multipler;
+
+	total_size = data_chunk_size + meta_chunk_size + min_dev_size;
 	return total_size;
 }
 
diff --git a/mkfs/rootdir.h b/mkfs/rootdir.h
index c4b120c5d1e4..ada50ccb6ac4 100644
--- a/mkfs/rootdir.h
+++ b/mkfs/rootdir.h
@@ -30,7 +30,6 @@  struct directory_name_entry {
 
 int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
 			bool verbose);
-u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
-			u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret);
-
+u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
+			u64 meta_profile, u64 data_profile);
 #endif