@@ -747,8 +747,6 @@ int main(int argc, char **argv)
int subvol_name_set = 0;
char *source_dir = NULL;
int source_dir_set = 0;
- u64 num_of_meta_chunks = 0;
- u64 size_of_data = 0;
u64 source_dir_size = 0;
u64 min_dev_size;
int dev_cnt = 0;
@@ -977,6 +975,34 @@ int main(int argc, char **argv)
min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
data_profile);
+ /*
+ * Enlarge the destination file or create new one, using the
+ * size calculated from source dir.
+ *
+ * This must be done before minimal device size check.
+ */
+ if (source_dir_set) {
+ fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP |
+ S_IWGRP | S_IROTH);
+ if (fd < 0) {
+ error("unable to open %s: %s", file, strerror(errno));
+ goto error;
+ }
+
+ source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
+ min_dev_size, metadata_profile, data_profile);
+ if (block_count < source_dir_size)
+ block_count = source_dir_size;
+ ret = zero_output_file(fd, block_count);
+ if (ret) {
+ error("unable to zero the output file");
+ close(fd);
+ goto error;
+ }
+ /* our "device" is the new image file */
+ dev_block_count = block_count;
+ close(fd);
+ }
/* Check device/block_count after the nodesize is determined */
if (block_count && block_count < min_dev_size) {
error("size %llu is too small to make a usable filesystem",
@@ -1010,51 +1036,28 @@ int main(int argc, char **argv)
dev_cnt--;
- if (!source_dir_set) {
- /*
- * open without O_EXCL so that the problem should not
- * occur by the following processing.
- * (btrfs_register_one_device() fails if O_EXCL is on)
- */
- fd = open(file, O_RDWR);
- if (fd < 0) {
- error("unable to open %s: %s", file, strerror(errno));
- goto error;
- }
- ret = btrfs_prepare_device(fd, file, &dev_block_count,
- block_count,
- (zero_end ? PREP_DEVICE_ZERO_END : 0) |
- (discard ? PREP_DEVICE_DISCARD : 0) |
- (verbose ? PREP_DEVICE_VERBOSE : 0));
- if (ret) {
- goto error;
- }
- if (block_count && block_count > dev_block_count) {
- error("%s is smaller than requested size, expected %llu, found %llu",
- file,
- (unsigned long long)block_count,
- (unsigned long long)dev_block_count);
- goto error;
- }
- } else {
- fd = open(file, O_CREAT | O_RDWR,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
- if (fd < 0) {
- error("unable to open %s: %s", file, strerror(errno));
- goto error;
- }
-
- source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
- &num_of_meta_chunks, &size_of_data);
- if(block_count < source_dir_size)
- block_count = source_dir_size;
- ret = zero_output_file(fd, block_count);
- if (ret) {
- error("unable to zero the output file");
- goto error;
- }
- /* our "device" is the new image file */
- dev_block_count = block_count;
+ /*
+ * open without O_EXCL so that the problem should not
+ * occur by the following processing.
+ * (btrfs_register_one_device() fails if O_EXCL is on)
+ */
+ fd = open(file, O_RDWR);
+ if (fd < 0) {
+ error("unable to open %s: %s", file, strerror(errno));
+ goto error;
+ }
+ ret = btrfs_prepare_device(fd, file, &dev_block_count,
+ block_count,
+ (zero_end ? PREP_DEVICE_ZERO_END : 0) |
+ (discard ? PREP_DEVICE_DISCARD : 0) |
+ (verbose ? PREP_DEVICE_VERBOSE : 0));
+ if (ret)
+ goto error;
+ if (block_count && block_count > dev_block_count) {
+ error("%s is smaller than requested size, expected %llu, found %llu",
+ file, (unsigned long long)block_count,
+ (unsigned long long)dev_block_count);
+ goto error;
}
/* To create the first block group and chunk 0 in make_btrfs */
@@ -1180,13 +1183,11 @@ int main(int argc, char **argv)
}
raid_groups:
- if (!source_dir_set) {
- ret = create_raid_groups(trans, root, data_profile,
- metadata_profile, mixed, &allocation);
- if (ret) {
- error("unable to create raid groups: %d", ret);
- goto out;
- }
+ ret = create_raid_groups(trans, root, data_profile,
+ metadata_profile, mixed, &allocation);
+ if (ret) {
+ error("unable to create raid groups: %d", ret);
+ goto out;
}
ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
@@ -33,19 +33,29 @@
#include "transaction.h"
#include "utils.h"
#include "mkfs/rootdir.h"
+#include "mkfs/common.h"
#include "send-utils.h"
-/*
- * This ignores symlinks with unreadable targets and subdirs that can't
- * be read. It's a best-effort to give a rough estimate of the size of
- * a subdir. It doesn't guarantee that prepopulating btrfs from this
- * tree won't still run out of space.
- */
-static u64 global_total_size;
-static u64 fs_block_size;
+static u32 fs_block_size;
static u64 index_cnt = 2;
+/*
+ * Size estimate will be done using the following data:
+ * 1) Number of inodes
+ * Since we will later shrink the fs, over-estimate is completely fine here
+ * as long as our estimate ensure we can populate the image without ENOSPC.
+ * So we only records how many inodes there is, and use the maximum space
+ * usage for every inode.
+ *
+ * 2) Data space each (regular) inode uses
+ * To estimate data chunk size.
+ * Don't care if it can fit as inline extent.
+ * Always round them up to sectorsize.
+ */
+static u64 ftw_meta_nr_inode;
+static u64 ftw_data_size;
+
static int add_directory_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid,
ino_t parent_inum, const char *name,
@@ -685,53 +695,88 @@ out:
static int ftw_add_entry_size(const char *fpath, const struct stat *st,
int type)
{
- if (type == FTW_F || type == FTW_D)
- global_total_size += round_up(st->st_size, fs_block_size);
+ /*
+ * Failed to read dir, mostly due to EPERM.
+ * Abort ASAP, so we don't need to populate the fs
+ */
+ if (type == FTW_DNR || type == FTW_NS)
+ return -EPERM;
+
+ if (S_ISREG(st->st_mode))
+ ftw_data_size += round_up(st->st_size, fs_block_size);
+ ftw_meta_nr_inode++;
return 0;
}
-u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
- u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
+u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
+ u64 meta_profile, u64 data_profile)
{
- u64 dir_size = 0;
u64 total_size = 0;
int ret;
- u64 default_chunk_size = SZ_8M;
- u64 allocated_meta_size = SZ_8M;
- u64 allocated_total_size = 20 * SZ_1M; /* 20MB */
- u64 num_of_meta_chunks = 0;
- u64 num_of_data_chunks = 0;
- u64 num_of_allocated_meta_chunks =
- allocated_meta_size / default_chunk_size;
-
- global_total_size = 0;
+
+ u64 meta_size = 0; /* Based on @ftw_meta_nr_inode */
+ u64 meta_chunk_size = 0;/* Based on @meta_size */
+ u64 data_chunk_size = 0;/* Based on @ftw_data_size */
+
+ u64 meta_threshold = SZ_8M;
+ u64 data_threshold = SZ_8M;
+
+ float data_multipler = 1;
+ float meta_multipler = 1;
+
fs_block_size = sectorsize;
+ ftw_data_size = 0;
+ ftw_meta_nr_inode = 0;
ret = ftw(dir_name, ftw_add_entry_size, 10);
- dir_size = global_total_size;
if (ret < 0) {
error("ftw subdir walk of %s failed: %s", dir_name,
strerror(errno));
exit(1);
}
- num_of_data_chunks = (dir_size + default_chunk_size - 1) /
- default_chunk_size;
- num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
- if (((dir_size / 2) % default_chunk_size) != 0)
- num_of_meta_chunks++;
- if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
- num_of_meta_chunks = 0;
- else
- num_of_meta_chunks -= num_of_allocated_meta_chunks;
+ /*
+ * Maximum metadata useage for every inode, which will be PATH_MAX
+ * for the following items:
+ * 1) DIR_ITEM
+ * 2) DIR_INDEX
+ * 3) INODE_REF
+ *
+ * Plus possible inline extent size, which is sectorsize.
+ *
+ * And finally, allow metadata usage to increase with data size.
+ * Follow the old kernel 8:1 data:meta ratio.
+ * This is especially important for --rootdir, as the file extent size
+ * up limit is 1M, instead of 128M in kernel.
+ * This can bump meta usage easily.
+ */
+ meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
+ ftw_data_size / 8;
- total_size = allocated_total_size +
- (num_of_data_chunks * default_chunk_size) +
- (num_of_meta_chunks * default_chunk_size);
+ /* Minimal chunk size from btrfs_alloc_chunk(). */
+ if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
+ meta_threshold = SZ_32M;
+ meta_multipler = 2;
+ }
+ if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
+ data_threshold = SZ_64M;
+ data_multipler = 2;
+ }
- *num_of_meta_chunks_ret = num_of_meta_chunks;
- *size_of_data_ret = num_of_data_chunks * default_chunk_size;
+ /*
+ * Only when the usage is larger than the minimal chunk size (threshold)
+ * we need to allocate new chunk, or the initial chunk in the image is
+ * large enough.
+ */
+ if (meta_size > meta_threshold)
+ meta_chunk_size = (round_up(meta_size, meta_threshold) -
+ meta_threshold) * meta_multipler;
+ if (ftw_data_size > data_threshold)
+ data_chunk_size = (round_up(ftw_data_size, data_threshold) -
+ data_threshold) * data_multipler;
+
+ total_size = data_chunk_size + meta_chunk_size + min_dev_size;
return total_size;
}
@@ -30,7 +30,6 @@ struct directory_name_entry {
int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
bool verbose);
-u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
- u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret);
-
+u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
+ u64 meta_profile, u64 data_profile);
#endif
Use an easier method to calculate the estimate device for mkfs.btrfs --rootdir. The new method will over-estimate, but should ensure we won't encounter ENOSPC. It relies on the following data to estimate: 1) number of inodes for metadata chunk size 2) rounded up data size of each regular inode for data chunk size. Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize), min_chunk_size) * profile_multiplier PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM. Sectorsize is the maximum size possible for inline extent. min_chunk_size is 8M for SINGLE, and 32M for DUP, get from btrfs_alloc_chunk(). profile_multiplier is 1 for Single, 2 for DUP. Total data chunk size is much easier. Total data chunk size = round_up(total_data_usage, min_chunk_size) * profile_multiplier Total_data_usage is the sum of *rounded up* size of each regular inode use. min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk(). Same profile_multiplier for meta. This over-estimate calculate is, of course, over-estimate. But since we will later shrink the fs to its real usage, it doesn't matter much now. Signed-off-by: Qu Wenruo <wqu@suse.com> --- mkfs/main.c | 109 ++++++++++++++++++++++++++-------------------------- mkfs/rootdir.c | 119 +++++++++++++++++++++++++++++++++++++++------------------ mkfs/rootdir.h | 5 +-- 3 files changed, 139 insertions(+), 94 deletions(-)