diff mbox

[f2fs-dev,RFC,v4] mkfs.f2fs: binary decision to calculate SIT/NAT/SSA

Message ID 9047C53C18267742AB12E43B65C7F9F70BCC8966@dggemi505-mbs.china.huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gao Xiang Jan. 11, 2018, 11:19 a.m. UTC
Use binary decision approach to calculate SIT/NAT/SSA segments
it has some benefits when the partition size >= 512G:

                        (bsearch)
psize   main_segments   main_seaments
...
512G    261509          261510
 1 T    523141          523143
 2 T    1046405         1046409
 4 T    2092783         2092791
...

and also has some difference for some specific partition sizes,
such as 928M, 1844M, 2760M:

928M  (original) main zones: 454 total_meta_zones: 9
                                 (2 CP + 2 SIT + 4 NAT + 1 SSA)
     (the patch) main_zones: 455 total_meta_zones: 7
                                 (2 CP + 2 SIT + 2 NAT + 1 SSA)
   *rather than* main_zones: 456 total_meta_zones: 9
                    = 465 > 464 (928M)

since blocks_for_nat is related to main_segments now
(there is no need to using 4 NATs since no more main segments).

It also clarify that SIT/NAT/SSA are used for main segment area only.

Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
---
Change log from v3:
    - fix 32-bit integer overflow (eg. main_segments * c.blks_per_seg)
    - fix ASSERT in f2fs_prepare_super_block, eg. For 928M mentioned in the commit message:
                    main_zones: 455 total_meta_zones: 7   == 462 != 464
                                 (2 CP + 2 SIT + 2 NAT + 1 SSA)
    - test adds in this version:
       i=0; while true; do dd if=/dev/zero of=test.img bs=1048576 seek=$((38+i)) count=0;
       mkfs.f2fs test.img || break; fsck.f2fs test.img || break; i=$((i+2)); done
Change log from v2:
    - tighten binary search boundary (the lower bound is not as tight as the original one,
      but very close and no more redundant code by this way.)
    - some tests for this patch:
       1) both can mkfs at the minimum partition size 38M and main segments are 11.
       2) total binary search count optimize:
                 v2       v3       main_segments_count
         512M    8        2        248
         1G      9        3        502
         2G      8        3        1011
         4G      10       4        2029
         8G      11       4        4065
         16G     10       4        8135
         32G     12       5        16275
         64G     14       7        32581
         128G    15       7        65285
         256G    16       8        130693
         512G    18       8        261510
         1024G   16       10       523143
         2048G   19       11       1026409
Change log from v1:
    - use align_down instead of align_up in get_best_main_zones
 mkfs/f2fs_format.c | 123 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 90 insertions(+), 33 deletions(-)
diff mbox

Patch

diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c
index a130001..b4d560e 100644
--- a/mkfs/f2fs_format.c
+++ b/mkfs/f2fs_format.c
@@ -145,20 +145,87 @@  static void verify_cur_segs(void)
 		c.cur_seg[i] = next_zone(i - 1);
 }
 
+static u_int32_t get_meta_zones(u_int32_t main_zones)
+{
+	u_int32_t main_segments, meta_segments;
+	u_int32_t sit_segments, blocks_for_nat;
+	u_int32_t max_sit_bitmap_size, max_nat_bitmap_size;
+	u_int32_t nat_segments, ssa_segments;
+
+	main_segments = c.segs_per_zone * main_zones;
+
+	sit_segments = SEG_ALIGN(SIZE_ALIGN(main_segments,
+		SIT_ENTRY_PER_BLOCK) /* blocks_for_sit */);
+
+	blocks_for_nat = SIZE_ALIGN((u_int64_t)
+		main_segments * c.blks_per_seg,
+		NAT_ENTRY_PER_BLOCK);
+
+	max_sit_bitmap_size = min((u_int32_t)MAX_SIT_BITMAP_SIZE,
+		sit_segments * c.blks_per_seg / 8);
+
+	/*
+	 * it's weird because for 1TB storage, payload is still not
+	 * used and max_nat_bitmap_blks is only 21472, which means
+	 * the total number of nodes is 21472 * 409 = 8782048
+	 */
+	if (max_sit_bitmap_size > MAX_SIT_BITMAP_SIZE_IN_CKPT)
+		max_nat_bitmap_size = CHECKSUM_OFFSET -
+				sizeof(struct f2fs_checkpoint) + 1;
+	else
+		max_nat_bitmap_size =
+			CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1
+			- max_sit_bitmap_size;
+
+	nat_segments = min(SEG_ALIGN(blocks_for_nat),
+		(max_nat_bitmap_size * 8) / c.blks_per_seg);
+
+	/* each main segment has a ssa block */
+	ssa_segments = SEG_ALIGN(main_segments);
+
+	meta_segments = (get_sb(segment_count_ckpt) +
+		sit_segments * 2 + nat_segments * 2 +
+		ssa_segments);
+
+	return ZONE_ALIGN((u_int64_t)meta_segments * c.blks_per_seg);
+}
+
+static u_int32_t get_best_main_zones(void)
+{
+	u_int32_t total_zones = get_sb(segment_count) / (c.segs_per_zone);
+	u_int32_t left = total_zones - get_meta_zones(total_zones),
+		right = total_zones - /* 2(CP + SIT + NAT) + SSA */
+			ZONE_ALIGN(7);
+	u_int32_t candicate = 0;
+
+	while (left <= right) {
+		u_int32_t main_zones = left + (right - left) / 2;
+		u_int32_t meta_zones = get_meta_zones(main_zones);
+
+		if (meta_zones + main_zones == total_zones)
+			return main_zones;
+
+		if (meta_zones + main_zones < total_zones) {
+			left = main_zones + 1;
+			candicate = main_zones;
+		} else
+			right = main_zones - 1;
+	}
+	return candicate;
+}
+
 static int f2fs_prepare_super_block(void)
 {
 	u_int32_t blk_size_bytes;
 	u_int32_t log_sectorsize, log_sectors_per_block;
 	u_int32_t log_blocksize, log_blks_per_seg;
 	u_int32_t segment_size_bytes, zone_size_bytes;
-	u_int32_t sit_segments;
-	u_int32_t blocks_for_sit, blocks_for_nat, blocks_for_ssa;
-	u_int32_t total_valid_blks_available;
+	u_int32_t blocks_for_sit, blocks_for_nat;
 	u_int64_t zone_align_start_offset, diff;
 	u_int64_t total_meta_zones, total_meta_segments;
 	u_int32_t sit_bitmap_size, max_sit_bitmap_size;
 	u_int32_t max_nat_bitmap_size, max_nat_segments;
-	u_int32_t total_zones;
+	u_int32_t main_zones, main_segments;
 	u_int32_t next_ino;
 	enum quota_type qtype;
 	int i;
@@ -256,22 +323,19 @@  static int f2fs_prepare_super_block(void)
 	set_sb(sit_blkaddr, get_sb(segment0_blkaddr) +
 			get_sb(segment_count_ckpt) * c.blks_per_seg);
 
-	blocks_for_sit = SIZE_ALIGN(get_sb(segment_count), SIT_ENTRY_PER_BLOCK);
+	/* try to do binary decision to get main_zones */
+	main_zones = get_best_main_zones();
+	main_segments = c.segs_per_zone * main_zones;
 
-	sit_segments = SEG_ALIGN(blocks_for_sit);
-
-	set_sb(segment_count_sit, sit_segments * 2);
+	blocks_for_sit = SIZE_ALIGN(main_segments, SIT_ENTRY_PER_BLOCK);
+	set_sb(segment_count_sit, SEG_ALIGN(blocks_for_sit) * 2);
 
 	set_sb(nat_blkaddr, get_sb(sit_blkaddr) + get_sb(segment_count_sit) *
 			c.blks_per_seg);
 
-	total_valid_blks_available = (get_sb(segment_count) -
-			(get_sb(segment_count_ckpt) +
-			get_sb(segment_count_sit))) * c.blks_per_seg;
-
-	blocks_for_nat = SIZE_ALIGN(total_valid_blks_available,
+	blocks_for_nat = SIZE_ALIGN((u_int64_t)
+			main_segments * c.blks_per_seg,
 			NAT_ENTRY_PER_BLOCK);
-
 	set_sb(segment_count_nat, SEG_ALIGN(blocks_for_nat));
 	/*
 	 * The number of node segments should not be exceeded a "Threshold".
@@ -312,16 +376,7 @@  static int f2fs_prepare_super_block(void)
 	set_sb(ssa_blkaddr, get_sb(nat_blkaddr) + get_sb(segment_count_nat) *
 			c.blks_per_seg);
 
-	total_valid_blks_available = (get_sb(segment_count) -
-			(get_sb(segment_count_ckpt) +
-			get_sb(segment_count_sit) +
-			get_sb(segment_count_nat))) *
-			c.blks_per_seg;
-
-	blocks_for_ssa = total_valid_blks_available /
-				c.blks_per_seg + 1;
-
-	set_sb(segment_count_ssa, SEG_ALIGN(blocks_for_ssa));
+	set_sb(segment_count_ssa, SEG_ALIGN(main_segments));
 
 	total_meta_segments = get_sb(segment_count_ckpt) +
 		get_sb(segment_count_sit) +
@@ -354,10 +409,7 @@  static int f2fs_prepare_super_block(void)
 		}
 	}
 
-	total_zones = get_sb(segment_count) / (c.segs_per_zone) -
-							total_meta_zones;
-
-	set_sb(section_count, total_zones * c.secs_per_zone);
+	set_sb(section_count, main_zones * c.secs_per_zone);
 
 	set_sb(segment_count_main, get_sb(section_count) * c.segs_per_sec);
 
@@ -373,6 +425,11 @@  static int f2fs_prepare_super_block(void)
 		return -1;
 	}
 
+	/* temporarily add this ASSERT for testing */
+	ASSERT(total_meta_zones == get_meta_zones(main_zones) &&
+		(total_meta_zones + main_zones) * c.segs_per_zone
+			<= get_sb(segment_count));
+
 	c.reserved_segments =
 			(2 * (100 / c.overprovision + 1) + 6)
 			* c.segs_per_sec;
@@ -404,15 +461,15 @@  static int f2fs_prepare_super_block(void)
 					qtype, next_ino - 1);
 	}
 
-	if (total_zones <= 6) {
+	if (main_zones <= 6) {
 		MSG(1, "\tError: %d zones: Need more zones "
-			"by shrinking zone size\n", total_zones);
+			"by shrinking zone size\n", main_zones);
 		return -1;
 	}
 
 	if (c.heap) {
 		c.cur_seg[CURSEG_HOT_NODE] =
-				last_section(last_zone(total_zones));
+				last_section(last_zone(main_zones));
 		c.cur_seg[CURSEG_WARM_NODE] = prev_zone(CURSEG_HOT_NODE);
 		c.cur_seg[CURSEG_COLD_NODE] = prev_zone(CURSEG_WARM_NODE);
 		c.cur_seg[CURSEG_HOT_DATA] = prev_zone(CURSEG_COLD_NODE);
@@ -424,10 +481,10 @@  static int f2fs_prepare_super_block(void)
 		c.cur_seg[CURSEG_COLD_NODE] = next_zone(CURSEG_WARM_NODE);
 		c.cur_seg[CURSEG_HOT_DATA] = next_zone(CURSEG_COLD_NODE);
 		c.cur_seg[CURSEG_COLD_DATA] =
-				max(last_zone((total_zones >> 2)),
+				max(last_zone((main_zones >> 2)),
 					next_zone(CURSEG_COLD_NODE));
 		c.cur_seg[CURSEG_WARM_DATA] =
-				max(last_zone((total_zones >> 1)),
+				max(last_zone((main_zones >> 1)),
 					next_zone(CURSEG_COLD_DATA));
 	}