@@ -145,20 +145,87 @@ static void verify_cur_segs(void)
c.cur_seg[i] = next_zone(i - 1);
}
+static u_int32_t get_meta_zones(u_int32_t main_zones)
+{
+ u_int32_t main_segments, meta_segments;
+ u_int32_t sit_segments, blocks_for_nat;
+ u_int32_t max_sit_bitmap_size, max_nat_bitmap_size;
+ u_int32_t nat_segments, ssa_segments;
+
+ main_segments = c.segs_per_zone * main_zones;
+
+ sit_segments = SEG_ALIGN(SIZE_ALIGN(main_segments,
+ SIT_ENTRY_PER_BLOCK) /* blocks_for_sit */);
+
+ blocks_for_nat = SIZE_ALIGN((u_int64_t)
+ main_segments * c.blks_per_seg,
+ NAT_ENTRY_PER_BLOCK);
+
+ max_sit_bitmap_size = min((u_int32_t)MAX_SIT_BITMAP_SIZE,
+ sit_segments * c.blks_per_seg / 8);
+
+ /*
+ * it's weird because for 1TB storage, payload is still not
+ * used and max_nat_bitmap_blks is only 21472, which means
+ * the total number of nodes is 21472 * 409 = 8782048
+ */
+ if (max_sit_bitmap_size > MAX_SIT_BITMAP_SIZE_IN_CKPT)
+ max_nat_bitmap_size = CHECKSUM_OFFSET -
+ sizeof(struct f2fs_checkpoint) + 1;
+ else
+ max_nat_bitmap_size =
+ CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1
+ - max_sit_bitmap_size;
+
+ nat_segments = min(SEG_ALIGN(blocks_for_nat),
+ (max_nat_bitmap_size * 8) / c.blks_per_seg);
+
+ /* each main segment has a ssa block */
+ ssa_segments = SEG_ALIGN(main_segments);
+
+ meta_segments = (get_sb(segment_count_ckpt) +
+ sit_segments * 2 + nat_segments * 2 +
+ ssa_segments);
+
+ return ZONE_ALIGN((u_int64_t)meta_segments * c.blks_per_seg);
+}
+
+static u_int32_t get_best_main_zones(void)
+{
+ u_int32_t total_zones = get_sb(segment_count) / (c.segs_per_zone);
+ u_int32_t left = total_zones - get_meta_zones(total_zones),
+ right = total_zones - /* 2(CP + SIT + NAT) + SSA */
+ ZONE_ALIGN(7);
+ u_int32_t candicate = 0;
+
+ while (left <= right) {
+ u_int32_t main_zones = left + (right - left) / 2;
+ u_int32_t meta_zones = get_meta_zones(main_zones);
+
+ if (meta_zones + main_zones == total_zones)
+ return main_zones;
+
+ if (meta_zones + main_zones < total_zones) {
+ left = main_zones + 1;
+ candicate = main_zones;
+ } else
+ right = main_zones - 1;
+ }
+ return candicate;
+}
+
static int f2fs_prepare_super_block(void)
{
u_int32_t blk_size_bytes;
u_int32_t log_sectorsize, log_sectors_per_block;
u_int32_t log_blocksize, log_blks_per_seg;
u_int32_t segment_size_bytes, zone_size_bytes;
- u_int32_t sit_segments;
- u_int32_t blocks_for_sit, blocks_for_nat, blocks_for_ssa;
- u_int32_t total_valid_blks_available;
+ u_int32_t blocks_for_sit, blocks_for_nat;
u_int64_t zone_align_start_offset, diff;
u_int64_t total_meta_zones, total_meta_segments;
u_int32_t sit_bitmap_size, max_sit_bitmap_size;
u_int32_t max_nat_bitmap_size, max_nat_segments;
- u_int32_t total_zones;
+ u_int32_t main_zones, main_segments;
u_int32_t next_ino;
enum quota_type qtype;
int i;
@@ -256,22 +323,19 @@ static int f2fs_prepare_super_block(void)
set_sb(sit_blkaddr, get_sb(segment0_blkaddr) +
get_sb(segment_count_ckpt) * c.blks_per_seg);
- blocks_for_sit = SIZE_ALIGN(get_sb(segment_count), SIT_ENTRY_PER_BLOCK);
+ /* try to do binary decision to get main_zones */
+ main_zones = get_best_main_zones();
+ main_segments = c.segs_per_zone * main_zones;
- sit_segments = SEG_ALIGN(blocks_for_sit);
-
- set_sb(segment_count_sit, sit_segments * 2);
+ blocks_for_sit = SIZE_ALIGN(main_segments, SIT_ENTRY_PER_BLOCK);
+ set_sb(segment_count_sit, SEG_ALIGN(blocks_for_sit) * 2);
set_sb(nat_blkaddr, get_sb(sit_blkaddr) + get_sb(segment_count_sit) *
c.blks_per_seg);
- total_valid_blks_available = (get_sb(segment_count) -
- (get_sb(segment_count_ckpt) +
- get_sb(segment_count_sit))) * c.blks_per_seg;
-
- blocks_for_nat = SIZE_ALIGN(total_valid_blks_available,
+ blocks_for_nat = SIZE_ALIGN((u_int64_t)
+ main_segments * c.blks_per_seg,
NAT_ENTRY_PER_BLOCK);
-
set_sb(segment_count_nat, SEG_ALIGN(blocks_for_nat));
/*
* The number of node segments should not be exceeded a "Threshold".
@@ -312,16 +376,7 @@ static int f2fs_prepare_super_block(void)
set_sb(ssa_blkaddr, get_sb(nat_blkaddr) + get_sb(segment_count_nat) *
c.blks_per_seg);
- total_valid_blks_available = (get_sb(segment_count) -
- (get_sb(segment_count_ckpt) +
- get_sb(segment_count_sit) +
- get_sb(segment_count_nat))) *
- c.blks_per_seg;
-
- blocks_for_ssa = total_valid_blks_available /
- c.blks_per_seg + 1;
-
- set_sb(segment_count_ssa, SEG_ALIGN(blocks_for_ssa));
+ set_sb(segment_count_ssa, SEG_ALIGN(main_segments));
total_meta_segments = get_sb(segment_count_ckpt) +
get_sb(segment_count_sit) +
@@ -354,10 +409,7 @@ static int f2fs_prepare_super_block(void)
}
}
- total_zones = get_sb(segment_count) / (c.segs_per_zone) -
- total_meta_zones;
-
- set_sb(section_count, total_zones * c.secs_per_zone);
+ set_sb(section_count, main_zones * c.secs_per_zone);
set_sb(segment_count_main, get_sb(section_count) * c.segs_per_sec);
@@ -373,6 +425,11 @@ static int f2fs_prepare_super_block(void)
return -1;
}
+ /* temporarily add this ASSERT for testing */
+ ASSERT(total_meta_zones == get_meta_zones(main_zones) &&
+ (total_meta_zones + main_zones) * c.segs_per_zone
+ <= get_sb(segment_count));
+
c.reserved_segments =
(2 * (100 / c.overprovision + 1) + 6)
* c.segs_per_sec;
@@ -404,15 +461,15 @@ static int f2fs_prepare_super_block(void)
qtype, next_ino - 1);
}
- if (total_zones <= 6) {
+ if (main_zones <= 6) {
MSG(1, "\tError: %d zones: Need more zones "
- "by shrinking zone size\n", total_zones);
+ "by shrinking zone size\n", main_zones);
return -1;
}
if (c.heap) {
c.cur_seg[CURSEG_HOT_NODE] =
- last_section(last_zone(total_zones));
+ last_section(last_zone(main_zones));
c.cur_seg[CURSEG_WARM_NODE] = prev_zone(CURSEG_HOT_NODE);
c.cur_seg[CURSEG_COLD_NODE] = prev_zone(CURSEG_WARM_NODE);
c.cur_seg[CURSEG_HOT_DATA] = prev_zone(CURSEG_COLD_NODE);
@@ -424,10 +481,10 @@ static int f2fs_prepare_super_block(void)
c.cur_seg[CURSEG_COLD_NODE] = next_zone(CURSEG_WARM_NODE);
c.cur_seg[CURSEG_HOT_DATA] = next_zone(CURSEG_COLD_NODE);
c.cur_seg[CURSEG_COLD_DATA] =
- max(last_zone((total_zones >> 2)),
+ max(last_zone((main_zones >> 2)),
next_zone(CURSEG_COLD_NODE));
c.cur_seg[CURSEG_WARM_DATA] =
- max(last_zone((total_zones >> 1)),
+ max(last_zone((main_zones >> 1)),
next_zone(CURSEG_COLD_DATA));
}
Use binary decision approach to calculate SIT/NAT/SSA segments it has some benefits when the partition size >= 512G: (bsearch) psize main_segments main_seaments ... 512G 261509 261510 1 T 523141 523143 2 T 1046405 1046409 4 T 2092783 2092791 ... and also has some difference for some specific partition sizes, such as 928M, 1844M, 2760M: 928M (original) main zones: 454 total_meta_zones: 9 (2 CP + 2 SIT + 4 NAT + 1 SSA) (the patch) main_zones: 455 total_meta_zones: 7 (2 CP + 2 SIT + 2 NAT + 1 SSA) *rather than* main_zones: 456 total_meta_zones: 9 = 465 > 464 (928M) since blocks_for_nat is related to main_segments now (there is no need to using 4 NATs since no more main segments). It also clarify that SIT/NAT/SSA are used for main segment area only. Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> --- Change log from v3: - fix 32-bit integer overflow (eg. main_segments * c.blks_per_seg) - fix ASSERT in f2fs_prepare_super_block, eg. For 928M mentioned in the commit message: main_zones: 455 total_meta_zones: 7 == 462 != 464 (2 CP + 2 SIT + 2 NAT + 1 SSA) - test adds in this version: i=0; while true; do dd if=/dev/zero of=test.img bs=1048576 seek=$((38+i)) count=0; mkfs.f2fs test.img || break; fsck.f2fs test.img || break; i=$((i+2)); done Change log from v2: - tighten binary search boundary (the lower bound is not as tight as the original one, but very close and no more redundant code by this way.) - some tests for this patch: 1) both can mkfs at the minimum partition size 38M and main segments are 11. 2) total binary search count optimize: v2 v3 main_segments_count 512M 8 2 248 1G 9 3 502 2G 8 3 1011 4G 10 4 2029 8G 11 4 4065 16G 10 4 8135 32G 12 5 16275 64G 14 7 32581 128G 15 7 65285 256G 16 8 130693 512G 18 8 261510 1024G 16 10 523143 2048G 19 11 1026409 Change log from v1: - use align_down instead of align_up in get_best_main_zones mkfs/f2fs_format.c | 123 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 33 deletions(-)