Message ID | 9047C53C18267742AB12E43B65C7F9F70BCB4020@dggemi505-mbs.china.huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/17, gaoxiang (P) wrote: > clean up some redundant repeat code in build_sit_entries > and seperate some logic into new functions: > - build_discard_entries > - build_sec_entries This patch makes another big loop redundantly? > > Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" > are all taken out of the loops they are unchangable. > > Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> > --- > fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ > 1 file changed, 37 insertions(+), 43 deletions(-) > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 40e1d20..bcd8a40 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) > return restore_curseg_summaries(sbi); > } > > +static void build_discard_entries(struct f2fs_sb_info *sbi) > +{ > + unsigned segno; > + > + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { > + struct seg_entry *se = get_seg_entry(sbi, segno); > + > + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) > + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); > + else { > + memcpy(se->discard_map, se->cur_valid_map, > + SIT_VBLOCK_MAP_SIZE); > + > + sbi->discard_blks += sbi->blocks_per_seg - > + se->valid_blocks; > + } > + } > +} > + > +static void build_sec_entries(struct f2fs_sb_info *sbi) > +{ > + unsigned segno; > + > + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) > + get_sec_entry(sbi, segno)->valid_blocks += > + get_seg_entry(sbi, segno)->valid_blocks; > +} > + > static void build_sit_entries(struct f2fs_sb_info *sbi) > { > struct sit_info *sit_i = SIT_I(sbi); > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); > struct f2fs_journal *journal = curseg->journal; > - struct seg_entry *se; > struct f2fs_sit_entry sit; > int sit_blk_cnt = SIT_BLK_CNT(sbi); > unsigned int i, start, end; > @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) > struct f2fs_sit_block *sit_blk; > struct page *page; > > - se = &sit_i->sentries[start]; > page = get_current_sit_page(sbi, start); > sit_blk = (struct f2fs_sit_block *)page_address(page); > sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; > f2fs_put_page(page, 1); > > check_block_count(sbi, start, &sit); > - seg_info_from_raw_sit(se, &sit); > - > - /* build discard map only one time */ > - if (f2fs_discard_en(sbi)) { > - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { > - memset(se->discard_map, 0xff, > - SIT_VBLOCK_MAP_SIZE); > - } else { > - memcpy(se->discard_map, > - se->cur_valid_map, > - SIT_VBLOCK_MAP_SIZE); > - sbi->discard_blks += > - sbi->blocks_per_seg - > - se->valid_blocks; > - } > - } > > - if (sbi->segs_per_sec > 1) > - get_sec_entry(sbi, start)->valid_blocks += > - se->valid_blocks; > + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); > } > start_blk += readed; > } while (start_blk < sit_blk_cnt); > > down_read(&curseg->journal_rwsem); > for (i = 0; i < sits_in_cursum(journal); i++) { > - unsigned int old_valid_blocks; > - > start = le32_to_cpu(segno_in_journal(journal, i)); > - se = &sit_i->sentries[start]; > sit = sit_in_journal(journal, i); > > - old_valid_blocks = se->valid_blocks; > - > check_block_count(sbi, start, &sit); > - seg_info_from_raw_sit(se, &sit); > - > - if (f2fs_discard_en(sbi)) { > - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { > - memset(se->discard_map, 0xff, > - SIT_VBLOCK_MAP_SIZE); > - } else { > - memcpy(se->discard_map, se->cur_valid_map, > - SIT_VBLOCK_MAP_SIZE); > - sbi->discard_blks += old_valid_blocks - > - se->valid_blocks; > - } > - } > - > - if (sbi->segs_per_sec > 1) > - get_sec_entry(sbi, start)->valid_blocks += > - se->valid_blocks - old_valid_blocks; > + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); > } > up_read(&curseg->journal_rwsem); > + > + /* build discard map only one time */ > + if (f2fs_discard_en(sbi)) > + build_discard_entries(sbi); > + > + if (sbi->segs_per_sec > 1) > + build_sec_entries(sbi); > } > > static void init_free_segmap(struct f2fs_sb_info *sbi) > -- > 2.1.4
Hi Jaegeuk, On 2017/12/20 7:40, Jaegeuk Kim wrote: > On 12/17, gaoxiang (P) wrote: >> clean up some redundant repeat code in build_sit_entries >> and seperate some logic into new functions: >> - build_discard_entries >> - build_sec_entries > > This patch makes another big loop redundantly? > "condition & jump" is the fundamental of "if" and loop. In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 are true, and I think, we have: - the original approach: MAIN_SEGS <-- loop condition MAIN_SEGS <-- f2fs_discard_en(sbi) MAIN_SEGS <-- sbi->segs_per_sec > 1 sits_in_cursum(journal) <-- loop_condition sits_in_cursum(journal) <-- f2fs_discard_en(sbi) sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. - this patch: MAIN_SEGS <-- loop condition sits_in_cursum(journal) <-- loop_condition MAIN_SEGS <-- f2fs_discard_en(sbi) MAIN_SEGS <-- sbi->segs_per_sec > 1 it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 are false, and we have: - this patch MAIN_SEGS <-- loop condition sits_in_cursum(journal) <-- loop_condition See https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops In addtion, this approach is easier to read than the original one, so splitting the loop would be beneficial. Thanks >> >> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" >> are all taken out of the loops they are unchangable. >> >> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> >> --- >> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ >> 1 file changed, 37 insertions(+), 43 deletions(-) >> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >> index 40e1d20..bcd8a40 100644 >> --- a/fs/f2fs/segment.c >> +++ b/fs/f2fs/segment.c >> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) >> return restore_curseg_summaries(sbi); >> } >> >> +static void build_discard_entries(struct f2fs_sb_info *sbi) >> +{ >> + unsigned segno; >> + >> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { >> + struct seg_entry *se = get_seg_entry(sbi, segno); >> + >> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) >> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); >> + else { >> + memcpy(se->discard_map, se->cur_valid_map, >> + SIT_VBLOCK_MAP_SIZE); >> + >> + sbi->discard_blks += sbi->blocks_per_seg - >> + se->valid_blocks; >> + } >> + } >> +} >> + >> +static void build_sec_entries(struct f2fs_sb_info *sbi) >> +{ >> + unsigned segno; >> + >> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) >> + get_sec_entry(sbi, segno)->valid_blocks += >> + get_seg_entry(sbi, segno)->valid_blocks; >> +} >> + >> static void build_sit_entries(struct f2fs_sb_info *sbi) >> { >> struct sit_info *sit_i = SIT_I(sbi); >> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); >> struct f2fs_journal *journal = curseg->journal; >> - struct seg_entry *se; >> struct f2fs_sit_entry sit; >> int sit_blk_cnt = SIT_BLK_CNT(sbi); >> unsigned int i, start, end; >> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) >> struct f2fs_sit_block *sit_blk; >> struct page *page; >> >> - se = &sit_i->sentries[start]; >> page = get_current_sit_page(sbi, start); >> sit_blk = (struct f2fs_sit_block *)page_address(page); >> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; >> f2fs_put_page(page, 1); >> >> check_block_count(sbi, start, &sit); >> - seg_info_from_raw_sit(se, &sit); >> - >> - /* build discard map only one time */ >> - if (f2fs_discard_en(sbi)) { >> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >> - memset(se->discard_map, 0xff, >> - SIT_VBLOCK_MAP_SIZE); >> - } else { >> - memcpy(se->discard_map, >> - se->cur_valid_map, >> - SIT_VBLOCK_MAP_SIZE); >> - sbi->discard_blks += >> - sbi->blocks_per_seg - >> - se->valid_blocks; >> - } >> - } >> >> - if (sbi->segs_per_sec > 1) >> - get_sec_entry(sbi, start)->valid_blocks += >> - se->valid_blocks; >> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >> } >> start_blk += readed; >> } while (start_blk < sit_blk_cnt); >> >> down_read(&curseg->journal_rwsem); >> for (i = 0; i < sits_in_cursum(journal); i++) { >> - unsigned int old_valid_blocks; >> - >> start = le32_to_cpu(segno_in_journal(journal, i)); >> - se = &sit_i->sentries[start]; >> sit = sit_in_journal(journal, i); >> >> - old_valid_blocks = se->valid_blocks; >> - >> check_block_count(sbi, start, &sit); >> - seg_info_from_raw_sit(se, &sit); >> - >> - if (f2fs_discard_en(sbi)) { >> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >> - memset(se->discard_map, 0xff, >> - SIT_VBLOCK_MAP_SIZE); >> - } else { >> - memcpy(se->discard_map, se->cur_valid_map, >> - SIT_VBLOCK_MAP_SIZE); >> - sbi->discard_blks += old_valid_blocks - >> - se->valid_blocks; >> - } >> - } >> - >> - if (sbi->segs_per_sec > 1) >> - get_sec_entry(sbi, start)->valid_blocks += >> - se->valid_blocks - old_valid_blocks; >> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >> } >> up_read(&curseg->journal_rwsem); >> + >> + /* build discard map only one time */ >> + if (f2fs_discard_en(sbi)) >> + build_discard_entries(sbi); >> + >> + if (sbi->segs_per_sec > 1) >> + build_sec_entries(sbi); >> } >> >> static void init_free_segmap(struct f2fs_sb_info *sbi) >> -- >> 2.1.4
On 12/20, Gaoxiang (OS) wrote: > Hi Jaegeuk, > > On 2017/12/20 7:40, Jaegeuk Kim wrote: > > On 12/17, gaoxiang (P) wrote: > >> clean up some redundant repeat code in build_sit_entries > >> and seperate some logic into new functions: > >> - build_discard_entries > >> - build_sec_entries > > > > This patch makes another big loop redundantly? > > > "condition & jump" is the fundamental of "if" and loop. > In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 > are true, and I think, we have: > - the original approach: > MAIN_SEGS <-- loop condition > MAIN_SEGS <-- f2fs_discard_en(sbi) > MAIN_SEGS <-- sbi->segs_per_sec > 1 > > sits_in_cursum(journal) <-- loop_condition > sits_in_cursum(journal) <-- f2fs_discard_en(sbi) > sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 > it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. > - this patch: > MAIN_SEGS <-- loop condition > sits_in_cursum(journal) <-- loop_condition > > MAIN_SEGS <-- f2fs_discard_en(sbi) > MAIN_SEGS <-- sbi->segs_per_sec > 1 > it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. > > and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > > 1 are false, and we have: > - this patch > MAIN_SEGS <-- loop condition > > sits_in_cursum(journal) <-- loop_condition > See > https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops Interesting! What I can think of the worst case looks like: In current code, for (N) { do_something; if (f2fs_discard_en()) do _something; if (sbi->segs_per_sec > 1) do_something; } for (sits_in_cursum()) { do_something; if (f2fs_discard_en()) do _something; if (sbi->segs_per_sec > 1) do_something; } => O(N + sits_in_cursum()) Once compiler unrolled the loop, we can expect most of jumps could be hit by branch predictor, since the loop does not have many branches. In the patch, for (N) do_something; for (sits_in_cursum()) do_something; if (f2fs_discard_en()) { for (N) do_something; } if (sbi->segs_per_sec > 1) { for (N) do_something; } => O(3*N + sits_in_cursum()) BTW, build_discard_entries(struct f2fs_sb_info *sbi) is more likely to split the loops, as you describe. In build_discard_entries(), for (N) { if (CP_TRIMMED_FLAG) do_something; else do_semething_else; } Thanks, > > In addtion, this approach is easier to read than the original > one, so splitting the loop would be beneficial. > > Thanks > > >> > >> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" > >> are all taken out of the loops they are unchangable. > >> > >> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> > >> --- > >> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ > >> 1 file changed, 37 insertions(+), 43 deletions(-) > >> > >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > >> index 40e1d20..bcd8a40 100644 > >> --- a/fs/f2fs/segment.c > >> +++ b/fs/f2fs/segment.c > >> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) > >> return restore_curseg_summaries(sbi); > >> } > >> > >> +static void build_discard_entries(struct f2fs_sb_info *sbi) > >> +{ > >> + unsigned segno; > >> + > >> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { > >> + struct seg_entry *se = get_seg_entry(sbi, segno); > >> + > >> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) > >> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); > >> + else { > >> + memcpy(se->discard_map, se->cur_valid_map, > >> + SIT_VBLOCK_MAP_SIZE); > >> + > >> + sbi->discard_blks += sbi->blocks_per_seg - > >> + se->valid_blocks; > >> + } > >> + } > >> +} > >> + > >> +static void build_sec_entries(struct f2fs_sb_info *sbi) > >> +{ > >> + unsigned segno; > >> + > >> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) > >> + get_sec_entry(sbi, segno)->valid_blocks += > >> + get_seg_entry(sbi, segno)->valid_blocks; > >> +} > >> + > >> static void build_sit_entries(struct f2fs_sb_info *sbi) > >> { > >> struct sit_info *sit_i = SIT_I(sbi); > >> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); > >> struct f2fs_journal *journal = curseg->journal; > >> - struct seg_entry *se; > >> struct f2fs_sit_entry sit; > >> int sit_blk_cnt = SIT_BLK_CNT(sbi); > >> unsigned int i, start, end; > >> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) > >> struct f2fs_sit_block *sit_blk; > >> struct page *page; > >> > >> - se = &sit_i->sentries[start]; > >> page = get_current_sit_page(sbi, start); > >> sit_blk = (struct f2fs_sit_block *)page_address(page); > >> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; > >> f2fs_put_page(page, 1); > >> > >> check_block_count(sbi, start, &sit); > >> - seg_info_from_raw_sit(se, &sit); > >> - > >> - /* build discard map only one time */ > >> - if (f2fs_discard_en(sbi)) { > >> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { > >> - memset(se->discard_map, 0xff, > >> - SIT_VBLOCK_MAP_SIZE); > >> - } else { > >> - memcpy(se->discard_map, > >> - se->cur_valid_map, > >> - SIT_VBLOCK_MAP_SIZE); > >> - sbi->discard_blks += > >> - sbi->blocks_per_seg - > >> - se->valid_blocks; > >> - } > >> - } > >> > >> - if (sbi->segs_per_sec > 1) > >> - get_sec_entry(sbi, start)->valid_blocks += > >> - se->valid_blocks; > >> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); > >> } > >> start_blk += readed; > >> } while (start_blk < sit_blk_cnt); > >> > >> down_read(&curseg->journal_rwsem); > >> for (i = 0; i < sits_in_cursum(journal); i++) { > >> - unsigned int old_valid_blocks; > >> - > >> start = le32_to_cpu(segno_in_journal(journal, i)); > >> - se = &sit_i->sentries[start]; > >> sit = sit_in_journal(journal, i); > >> > >> - old_valid_blocks = se->valid_blocks; > >> - > >> check_block_count(sbi, start, &sit); > >> - seg_info_from_raw_sit(se, &sit); > >> - > >> - if (f2fs_discard_en(sbi)) { > >> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { > >> - memset(se->discard_map, 0xff, > >> - SIT_VBLOCK_MAP_SIZE); > >> - } else { > >> - memcpy(se->discard_map, se->cur_valid_map, > >> - SIT_VBLOCK_MAP_SIZE); > >> - sbi->discard_blks += old_valid_blocks - > >> - se->valid_blocks; > >> - } > >> - } > >> - > >> - if (sbi->segs_per_sec > 1) > >> - get_sec_entry(sbi, start)->valid_blocks += > >> - se->valid_blocks - old_valid_blocks; > >> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); > >> } > >> up_read(&curseg->journal_rwsem); > >> + > >> + /* build discard map only one time */ > >> + if (f2fs_discard_en(sbi)) > >> + build_discard_entries(sbi); > >> + > >> + if (sbi->segs_per_sec > 1) > >> + build_sec_entries(sbi); > >> } > >> > >> static void init_free_segmap(struct f2fs_sb_info *sbi) > >> -- > >> 2.1.4
Hi Jaegeuk, On 2017/12/20 10:31, Jaegeuk Kim wrote: > On 12/20, Gaoxiang (OS) wrote: >> Hi Jaegeuk, >> >> On 2017/12/20 7:40, Jaegeuk Kim wrote: >>> On 12/17, gaoxiang (P) wrote: >>>> clean up some redundant repeat code in build_sit_entries >>>> and seperate some logic into new functions: >>>> - build_discard_entries >>>> - build_sec_entries >>> >>> This patch makes another big loop redundantly? >>> >> "condition & jump" is the fundamental of "if" and loop. >> In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 >> are true, and I think, we have: >> - the original approach: >> MAIN_SEGS <-- loop condition >> MAIN_SEGS <-- f2fs_discard_en(sbi) >> MAIN_SEGS <-- sbi->segs_per_sec > 1 >> >> sits_in_cursum(journal) <-- loop_condition >> sits_in_cursum(journal) <-- f2fs_discard_en(sbi) >> sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 >> it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. >> - this patch: >> MAIN_SEGS <-- loop condition >> sits_in_cursum(journal) <-- loop_condition >> >> MAIN_SEGS <-- f2fs_discard_en(sbi) >> MAIN_SEGS <-- sbi->segs_per_sec > 1 >> it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. >> >> and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > >> 1 are false, and we have: >> - this patch >> MAIN_SEGS <-- loop condition >> >> sits_in_cursum(journal) <-- loop_condition >> See >> https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops > > Interesting! > > What I can think of the worst case looks like: > > In current code, > for (N) { > do_something; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > } > for (sits_in_cursum()) { > do_something; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > } > => O(N + sits_in_cursum()) In the worst case Its 3(1 -- exit condition < MAIN_SEG, 2 -- f2fs_discard_en(), 3 -- sbi->segs_per_sec > 1) *N + 3*sits_in_cursum() if condition & jump instuctions at runtime. If we use Big O notion, yes I think O(N + sits_in_cursum()) > > Once compiler unrolled the loop, we can expect most of jumps could be hit by > branch predictor, since the loop does not have many branches. The current code, the compiler could unroll the loop into, I guess for (N) { do_something; } if (f2fs_discard_en()) { do _something; } if (sbi->segs_per_sec > 1) { do_something; } ... for (sits_in_cursum()) { do_something; } if (f2fs_discard_en()) { do _something; } if (sbi->segs_per_sec > 1) { do_something; } Taking branch predictor into consideration, I think the unrolled one is more easier to predict than the current rolled one. (as you say, the current has more conditions in the loop and a exit condition to predict) > > In the patch, > for (N) > do_something; > for (sits_in_cursum()) > do_something; > > if (f2fs_discard_en()) { > for (N) > do_something; > } > if (sbi->segs_per_sec > 1) { > for (N) > do_something; > } > => O(3*N + sits_in_cursum()) If we use Big O notion, I think O(N + sits_in_cursum()) too. > > BTW, build_discard_entries(struct f2fs_sb_info *sbi) is more likely to split > the loops, as you describe. > > In build_discard_entries(), > for (N) { > if (CP_TRIMMED_FLAG) > do_something; > else > do_semething_else; > } > Thanks, > I think so, yet CP_TRIMMED_FLAG comes from a function argument, so compiler cannot know whether it is constant. I think if we count the runtime condition & jump instruction at runtime. the unrolls above are beneficial, I think. Thanks. >> >> In addtion, this approach is easier to read than the original >> one, so splitting the loop would be beneficial. >> >> Thanks >> >>>> >>>> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" >>>> are all taken out of the loops they are unchangable. >>>> >>>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> >>>> --- >>>> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ >>>> 1 file changed, 37 insertions(+), 43 deletions(-) >>>> >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>> index 40e1d20..bcd8a40 100644 >>>> --- a/fs/f2fs/segment.c >>>> +++ b/fs/f2fs/segment.c >>>> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) >>>> return restore_curseg_summaries(sbi); >>>> } >>>> >>>> +static void build_discard_entries(struct f2fs_sb_info *sbi) >>>> +{ >>>> + unsigned segno; >>>> + >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { >>>> + struct seg_entry *se = get_seg_entry(sbi, segno); >>>> + >>>> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) >>>> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); >>>> + else { >>>> + memcpy(se->discard_map, se->cur_valid_map, >>>> + SIT_VBLOCK_MAP_SIZE); >>>> + >>>> + sbi->discard_blks += sbi->blocks_per_seg - >>>> + se->valid_blocks; >>>> + } >>>> + } >>>> +} >>>> + >>>> +static void build_sec_entries(struct f2fs_sb_info *sbi) >>>> +{ >>>> + unsigned segno; >>>> + >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) >>>> + get_sec_entry(sbi, segno)->valid_blocks += >>>> + get_seg_entry(sbi, segno)->valid_blocks; >>>> +} >>>> + >>>> static void build_sit_entries(struct f2fs_sb_info *sbi) >>>> { >>>> struct sit_info *sit_i = SIT_I(sbi); >>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); >>>> struct f2fs_journal *journal = curseg->journal; >>>> - struct seg_entry *se; >>>> struct f2fs_sit_entry sit; >>>> int sit_blk_cnt = SIT_BLK_CNT(sbi); >>>> unsigned int i, start, end; >>>> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) >>>> struct f2fs_sit_block *sit_blk; >>>> struct page *page; >>>> >>>> - se = &sit_i->sentries[start]; >>>> page = get_current_sit_page(sbi, start); >>>> sit_blk = (struct f2fs_sit_block *)page_address(page); >>>> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; >>>> f2fs_put_page(page, 1); >>>> >>>> check_block_count(sbi, start, &sit); >>>> - seg_info_from_raw_sit(se, &sit); >>>> - >>>> - /* build discard map only one time */ >>>> - if (f2fs_discard_en(sbi)) { >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>> - memset(se->discard_map, 0xff, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - } else { >>>> - memcpy(se->discard_map, >>>> - se->cur_valid_map, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - sbi->discard_blks += >>>> - sbi->blocks_per_seg - >>>> - se->valid_blocks; >>>> - } >>>> - } >>>> >>>> - if (sbi->segs_per_sec > 1) >>>> - get_sec_entry(sbi, start)->valid_blocks += >>>> - se->valid_blocks; >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>> } >>>> start_blk += readed; >>>> } while (start_blk < sit_blk_cnt); >>>> >>>> down_read(&curseg->journal_rwsem); >>>> for (i = 0; i < sits_in_cursum(journal); i++) { >>>> - unsigned int old_valid_blocks; >>>> - >>>> start = le32_to_cpu(segno_in_journal(journal, i)); >>>> - se = &sit_i->sentries[start]; >>>> sit = sit_in_journal(journal, i); >>>> >>>> - old_valid_blocks = se->valid_blocks; >>>> - >>>> check_block_count(sbi, start, &sit); >>>> - seg_info_from_raw_sit(se, &sit); >>>> - >>>> - if (f2fs_discard_en(sbi)) { >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>> - memset(se->discard_map, 0xff, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - } else { >>>> - memcpy(se->discard_map, se->cur_valid_map, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - sbi->discard_blks += old_valid_blocks - >>>> - se->valid_blocks; >>>> - } >>>> - } >>>> - >>>> - if (sbi->segs_per_sec > 1) >>>> - get_sec_entry(sbi, start)->valid_blocks += >>>> - se->valid_blocks - old_valid_blocks; >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>> } >>>> up_read(&curseg->journal_rwsem); >>>> + >>>> + /* build discard map only one time */ >>>> + if (f2fs_discard_en(sbi)) >>>> + build_discard_entries(sbi); >>>> + >>>> + if (sbi->segs_per_sec > 1) >>>> + build_sec_entries(sbi); >>>> } >>>> >>>> static void init_free_segmap(struct f2fs_sb_info *sbi) >>>> -- >>>> 2.1.4
Hi Jaegeuk, On 2017/12/20 10:31, Jaegeuk Kim wrote: > On 12/20, Gaoxiang (OS) wrote: >> Hi Jaegeuk, >> >> On 2017/12/20 7:40, Jaegeuk Kim wrote: >>> On 12/17, gaoxiang (P) wrote: >>>> clean up some redundant repeat code in build_sit_entries >>>> and seperate some logic into new functions: >>>> - build_discard_entries >>>> - build_sec_entries >>> >>> This patch makes another big loop redundantly? >>> >> "condition & jump" is the fundamental of "if" and loop. >> In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 >> are true, and I think, we have: >> - the original approach: >> MAIN_SEGS <-- loop condition >> MAIN_SEGS <-- f2fs_discard_en(sbi) >> MAIN_SEGS <-- sbi->segs_per_sec > 1 >> >> sits_in_cursum(journal) <-- loop_condition >> sits_in_cursum(journal) <-- f2fs_discard_en(sbi) >> sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 >> it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. >> - this patch: >> MAIN_SEGS <-- loop condition >> sits_in_cursum(journal) <-- loop_condition >> >> MAIN_SEGS <-- f2fs_discard_en(sbi) >> MAIN_SEGS <-- sbi->segs_per_sec > 1 >> it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. >> >> and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > >> 1 are false, and we have: >> - this patch >> MAIN_SEGS <-- loop condition >> >> sits_in_cursum(journal) <-- loop_condition >> See >> https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops > > Interesting! > > What I can think of the worst case looks like: > > In current code, > for (N) { > do_something; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > } > for (sits_in_cursum()) { > do_something; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > } > => O(N + sits_in_cursum()) In the worst case Its 3(1 -- exit condition < MAIN_SEG, 2 -- f2fs_discard_en(), 3 -- sbi->segs_per_sec > 1) *N + 3*sits_in_cursum() if condition & jump instuctions at runtime. If we use Big O notion, yes I think O(N + sits_in_cursum()) > > Once compiler unrolled the loop, we can expect most of jumps could be hit by > branch predictor, since the loop does not have many branches. The current code, if the compiler decides to unroll the loop, it could unroll it into, I guess for (N) { do_something; } if (f2fs_discard_en()) { for(N) { do _something; } } if (sbi->segs_per_sec > 1) { for(N) { do_something; } } ... for (sits_in_cursum()) { do_something; } if (f2fs_discard_en()) { for (N) { do _something; } } if (sbi->segs_per_sec > 1) { for (N) { do_something; } } Taking branch predictor into consideration, I think the unrolled one is more easier to predict than the current rolled one. (as you say, the current has more conditions in the loop and a exit condition to predict) > > In the patch, > for (N) > do_something; > for (sits_in_cursum()) > do_something; > > if (f2fs_discard_en()) { > for (N) > do_something; > } > if (sbi->segs_per_sec > 1) { > for (N) > do_something; > } > => O(3*N + sits_in_cursum()) If we use Big O notion, I think O(N + sits_in_cursum()) too. > > BTW, build_discard_entries(struct f2fs_sb_info *sbi) is more likely to split > the loops, as you describe. > > In build_discard_entries(), > for (N) { > if (CP_TRIMMED_FLAG) > do_something; > else > do_semething_else; > } > Thanks, > I think so, yet CP_TRIMMED_FLAG comes from a function argument, so I think compiler cannot decide whether it is constant. I think if we count the runtime condition & jump instruction at runtime. the unrolls above are beneficial, I think. Thanks. >> >> In addtion, this approach is easier to read than the original >> one, so splitting the loop would be beneficial. >> >> Thanks >> >>>> >>>> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" >>>> are all taken out of the loops they are unchangable. >>>> >>>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> >>>> --- >>>> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ >>>> 1 file changed, 37 insertions(+), 43 deletions(-) >>>> >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>> index 40e1d20..bcd8a40 100644 >>>> --- a/fs/f2fs/segment.c >>>> +++ b/fs/f2fs/segment.c >>>> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) >>>> return restore_curseg_summaries(sbi); >>>> } >>>> >>>> +static void build_discard_entries(struct f2fs_sb_info *sbi) >>>> +{ >>>> + unsigned segno; >>>> + >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { >>>> + struct seg_entry *se = get_seg_entry(sbi, segno); >>>> + >>>> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) >>>> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); >>>> + else { >>>> + memcpy(se->discard_map, se->cur_valid_map, >>>> + SIT_VBLOCK_MAP_SIZE); >>>> + >>>> + sbi->discard_blks += sbi->blocks_per_seg - >>>> + se->valid_blocks; >>>> + } >>>> + } >>>> +} >>>> + >>>> +static void build_sec_entries(struct f2fs_sb_info *sbi) >>>> +{ >>>> + unsigned segno; >>>> + >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) >>>> + get_sec_entry(sbi, segno)->valid_blocks += >>>> + get_seg_entry(sbi, segno)->valid_blocks; >>>> +} >>>> + >>>> static void build_sit_entries(struct f2fs_sb_info *sbi) >>>> { >>>> struct sit_info *sit_i = SIT_I(sbi); >>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); >>>> struct f2fs_journal *journal = curseg->journal; >>>> - struct seg_entry *se; >>>> struct f2fs_sit_entry sit; >>>> int sit_blk_cnt = SIT_BLK_CNT(sbi); >>>> unsigned int i, start, end; >>>> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) >>>> struct f2fs_sit_block *sit_blk; >>>> struct page *page; >>>> >>>> - se = &sit_i->sentries[start]; >>>> page = get_current_sit_page(sbi, start); >>>> sit_blk = (struct f2fs_sit_block *)page_address(page); >>>> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; >>>> f2fs_put_page(page, 1); >>>> >>>> check_block_count(sbi, start, &sit); >>>> - seg_info_from_raw_sit(se, &sit); >>>> - >>>> - /* build discard map only one time */ >>>> - if (f2fs_discard_en(sbi)) { >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>> - memset(se->discard_map, 0xff, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - } else { >>>> - memcpy(se->discard_map, >>>> - se->cur_valid_map, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - sbi->discard_blks += >>>> - sbi->blocks_per_seg - >>>> - se->valid_blocks; >>>> - } >>>> - } >>>> >>>> - if (sbi->segs_per_sec > 1) >>>> - get_sec_entry(sbi, start)->valid_blocks += >>>> - se->valid_blocks; >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>> } >>>> start_blk += readed; >>>> } while (start_blk < sit_blk_cnt); >>>> >>>> down_read(&curseg->journal_rwsem); >>>> for (i = 0; i < sits_in_cursum(journal); i++) { >>>> - unsigned int old_valid_blocks; >>>> - >>>> start = le32_to_cpu(segno_in_journal(journal, i)); >>>> - se = &sit_i->sentries[start]; >>>> sit = sit_in_journal(journal, i); >>>> >>>> - old_valid_blocks = se->valid_blocks; >>>> - >>>> check_block_count(sbi, start, &sit); >>>> - seg_info_from_raw_sit(se, &sit); >>>> - >>>> - if (f2fs_discard_en(sbi)) { >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>> - memset(se->discard_map, 0xff, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - } else { >>>> - memcpy(se->discard_map, se->cur_valid_map, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - sbi->discard_blks += old_valid_blocks - >>>> - se->valid_blocks; >>>> - } >>>> - } >>>> - >>>> - if (sbi->segs_per_sec > 1) >>>> - get_sec_entry(sbi, start)->valid_blocks += >>>> - se->valid_blocks - old_valid_blocks; >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>> } >>>> up_read(&curseg->journal_rwsem); >>>> + >>>> + /* build discard map only one time */ >>>> + if (f2fs_discard_en(sbi)) >>>> + build_discard_entries(sbi); >>>> + >>>> + if (sbi->segs_per_sec > 1) >>>> + build_sec_entries(sbi); >>>> } >>>> >>>> static void init_free_segmap(struct f2fs_sb_info *sbi) >>>> -- >>>> 2.1.4
Hi Jaegeuk, For compiler loop optimization, refer to "loop unswitching" section in https://zhuanlan.zhihu.com/p/23326776 (Chinese stack overflow-like website) https://translate.google.com/translate?sl=zh-CN&tl=en&js=y&prev=_t&hl=zh-CN&ie=UTF-8&u=https%3A%2F%2Fzhuanlan.zhihu.com%2Fp%2F23326776&edit-text=&act=url or https://en.wikipedia.org/wiki/Loop_unswitching http://www.compileroptimizations.com/category/unswitching.htm https://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Optimize-Options.html -O3 Optimize yet more. -O3 turns on all optimizations specified by -O2 and also turns on the -finline-functions, -funswitch-loops, -fpredictive-commoning, -fgcse-after-reload and -ftree-vectorize options. For branch predictor, I think it is much easier to predict and analyze to parallelize while (A){ // only one miss ... } if (B) { while (A) { // only one miss ... } } Than while (A) { ... if (B) { // A->B->A->B crossing ... } } For most of processors. On 2017/12/20 10:31, Jaegeuk Kim wrote: > On 12/20, Gaoxiang (OS) wrote: >> Hi Jaegeuk, >> >> On 2017/12/20 7:40, Jaegeuk Kim wrote: >>> On 12/17, gaoxiang (P) wrote: >>>> clean up some redundant repeat code in build_sit_entries >>>> and seperate some logic into new functions: >>>> - build_discard_entries >>>> - build_sec_entries >>> >>> This patch makes another big loop redundantly? >>> >> "condition & jump" is the fundamental of "if" and loop. >> In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 >> are true, and I think, we have: >> - the original approach: >> MAIN_SEGS <-- loop condition >> MAIN_SEGS <-- f2fs_discard_en(sbi) >> MAIN_SEGS <-- sbi->segs_per_sec > 1 >> >> sits_in_cursum(journal) <-- loop_condition >> sits_in_cursum(journal) <-- f2fs_discard_en(sbi) >> sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 >> it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. >> - this patch: >> MAIN_SEGS <-- loop condition >> sits_in_cursum(journal) <-- loop_condition >> >> MAIN_SEGS <-- f2fs_discard_en(sbi) >> MAIN_SEGS <-- sbi->segs_per_sec > 1 >> it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. >> >> and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > >> 1 are false, and we have: >> - this patch >> MAIN_SEGS <-- loop condition >> >> sits_in_cursum(journal) <-- loop_condition >> See >> https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops > > Interesting! > > What I can think of the worst case looks like: > > In current code, > for (N) { > do_something; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > } > for (sits_in_cursum()) { > do_something; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > } > => O(N + sits_in_cursum()) In the worst case Its 3(1 -- exit condition < MAIN_SEG, 2 -- f2fs_discard_en(), 3 -- sbi->segs_per_sec > 1) *N + 3*sits_in_cursum() if condition & jump instuctions at runtime. If we use Big O notion, yes I think O(N + sits_in_cursum()) > > Once compiler unrolled the loop, we can expect most of jumps could be hit by > branch predictor, since the loop does not have many branches. The current code, if the compiler decides to unroll the loop, it could unroll it into, I guess for (N) { do_something; } if (f2fs_discard_en()) { for(N) { do _something; } } if (sbi->segs_per_sec > 1) { for(N) { do_something; } } ... /* extra */ for (sits_in_cursum()) { do_something; } if (f2fs_discard_en()) { for (sits_in_cursum()) { do _something; } } if (sbi->segs_per_sec > 1) { for (sits_in_cursum()) { do_something; } } Taking branch predictor into consideration, I think the unrolled one is more easier to predict than the current rolled one. (as you say, the current has more conditions in the loop and a exit condition to predict) > > In the patch, > for (N) > do_something; > for (sits_in_cursum()) > do_something; > > if (f2fs_discard_en()) { > for (N) > do_something; > } > if (sbi->segs_per_sec > 1) { > for (N) > do_something; > } > => O(3*N + sits_in_cursum()) If we use Big O notion, I think O(N + sits_in_cursum()) too. > > BTW, build_discard_entries(struct f2fs_sb_info *sbi) is more likely to split > the loops, as you describe. > > In build_discard_entries(), > for (N) { > if (CP_TRIMMED_FLAG) > do_something; > else > do_semething_else; > } > Thanks, > I think so, yet CP_TRIMMED_FLAG comes from a function argument, so I think compiler cannot decide whether it is constant. I think if we count the runtime condition & jump instruction at runtime. the unrolls above are beneficial, I think. Thanks. >> >> In addtion, this approach is easier to read than the original >> one, so splitting the loop would be beneficial. >> >> Thanks >> >>>> >>>> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" >>>> are all taken out of the loops they are unchangable. >>>> >>>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> >>>> --- >>>> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ >>>> 1 file changed, 37 insertions(+), 43 deletions(-) >>>> >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>> index 40e1d20..bcd8a40 100644 >>>> --- a/fs/f2fs/segment.c >>>> +++ b/fs/f2fs/segment.c >>>> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) >>>> return restore_curseg_summaries(sbi); >>>> } >>>> >>>> +static void build_discard_entries(struct f2fs_sb_info *sbi) >>>> +{ >>>> + unsigned segno; >>>> + >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { >>>> + struct seg_entry *se = get_seg_entry(sbi, segno); >>>> + >>>> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) >>>> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); >>>> + else { >>>> + memcpy(se->discard_map, se->cur_valid_map, >>>> + SIT_VBLOCK_MAP_SIZE); >>>> + >>>> + sbi->discard_blks += sbi->blocks_per_seg - >>>> + se->valid_blocks; >>>> + } >>>> + } >>>> +} >>>> + >>>> +static void build_sec_entries(struct f2fs_sb_info *sbi) >>>> +{ >>>> + unsigned segno; >>>> + >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) >>>> + get_sec_entry(sbi, segno)->valid_blocks += >>>> + get_seg_entry(sbi, segno)->valid_blocks; >>>> +} >>>> + >>>> static void build_sit_entries(struct f2fs_sb_info *sbi) >>>> { >>>> struct sit_info *sit_i = SIT_I(sbi); >>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); >>>> struct f2fs_journal *journal = curseg->journal; >>>> - struct seg_entry *se; >>>> struct f2fs_sit_entry sit; >>>> int sit_blk_cnt = SIT_BLK_CNT(sbi); >>>> unsigned int i, start, end; >>>> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) >>>> struct f2fs_sit_block *sit_blk; >>>> struct page *page; >>>> >>>> - se = &sit_i->sentries[start]; >>>> page = get_current_sit_page(sbi, start); >>>> sit_blk = (struct f2fs_sit_block *)page_address(page); >>>> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; >>>> f2fs_put_page(page, 1); >>>> >>>> check_block_count(sbi, start, &sit); >>>> - seg_info_from_raw_sit(se, &sit); >>>> - >>>> - /* build discard map only one time */ >>>> - if (f2fs_discard_en(sbi)) { >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>> - memset(se->discard_map, 0xff, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - } else { >>>> - memcpy(se->discard_map, >>>> - se->cur_valid_map, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - sbi->discard_blks += >>>> - sbi->blocks_per_seg - >>>> - se->valid_blocks; >>>> - } >>>> - } >>>> >>>> - if (sbi->segs_per_sec > 1) >>>> - get_sec_entry(sbi, start)->valid_blocks += >>>> - se->valid_blocks; >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>> } >>>> start_blk += readed; >>>> } while (start_blk < sit_blk_cnt); >>>> >>>> down_read(&curseg->journal_rwsem); >>>> for (i = 0; i < sits_in_cursum(journal); i++) { >>>> - unsigned int old_valid_blocks; >>>> - >>>> start = le32_to_cpu(segno_in_journal(journal, i)); >>>> - se = &sit_i->sentries[start]; >>>> sit = sit_in_journal(journal, i); >>>> >>>> - old_valid_blocks = se->valid_blocks; >>>> - >>>> check_block_count(sbi, start, &sit); >>>> - seg_info_from_raw_sit(se, &sit); >>>> - >>>> - if (f2fs_discard_en(sbi)) { >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>> - memset(se->discard_map, 0xff, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - } else { >>>> - memcpy(se->discard_map, se->cur_valid_map, >>>> - SIT_VBLOCK_MAP_SIZE); >>>> - sbi->discard_blks += old_valid_blocks - >>>> - se->valid_blocks; >>>> - } >>>> - } >>>> - >>>> - if (sbi->segs_per_sec > 1) >>>> - get_sec_entry(sbi, start)->valid_blocks += >>>> - se->valid_blocks - old_valid_blocks; >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>> } >>>> up_read(&curseg->journal_rwsem); >>>> + >>>> + /* build discard map only one time */ >>>> + if (f2fs_discard_en(sbi)) >>>> + build_discard_entries(sbi); >>>> + >>>> + if (sbi->segs_per_sec > 1) >>>> + build_sec_entries(sbi); >>>> } >>>> >>>> static void init_free_segmap(struct f2fs_sb_info *sbi) >>>> -- >>>> 2.1.4
On 12/20, Gaoxiang (OS) wrote: > Hi Jaegeuk, > > On 2017/12/20 10:31, Jaegeuk Kim wrote: > > On 12/20, Gaoxiang (OS) wrote: > >> Hi Jaegeuk, > >> > >> On 2017/12/20 7:40, Jaegeuk Kim wrote: > >>> On 12/17, gaoxiang (P) wrote: > >>>> clean up some redundant repeat code in build_sit_entries > >>>> and seperate some logic into new functions: > >>>> - build_discard_entries > >>>> - build_sec_entries > >>> > >>> This patch makes another big loop redundantly? > >>> > >> "condition & jump" is the fundamental of "if" and loop. > >> In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 > >> are true, and I think, we have: > >> - the original approach: > >> MAIN_SEGS <-- loop condition > >> MAIN_SEGS <-- f2fs_discard_en(sbi) > >> MAIN_SEGS <-- sbi->segs_per_sec > 1 > >> > >> sits_in_cursum(journal) <-- loop_condition > >> sits_in_cursum(journal) <-- f2fs_discard_en(sbi) > >> sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 > >> it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. > >> - this patch: > >> MAIN_SEGS <-- loop condition > >> sits_in_cursum(journal) <-- loop_condition > >> > >> MAIN_SEGS <-- f2fs_discard_en(sbi) > >> MAIN_SEGS <-- sbi->segs_per_sec > 1 > >> it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. > >> > >> and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > > >> 1 are false, and we have: > >> - this patch > >> MAIN_SEGS <-- loop condition > >> > >> sits_in_cursum(journal) <-- loop_condition > >> See > >> https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops > > > > Interesting! > > > > What I can think of the worst case looks like: > > > > In current code, > > for (N) { > > do_something; > > if (f2fs_discard_en()) > > do _something; > > if (sbi->segs_per_sec > 1) > > do_something; > > } > > for (sits_in_cursum()) { > > do_something; > > if (f2fs_discard_en()) > > do _something; > > if (sbi->segs_per_sec > 1) > > do_something; > > } > > => O(N + sits_in_cursum()) > > In the worst case Its 3(1 -- exit condition < MAIN_SEG, 2 -- f2fs_discard_en(), 3 -- > sbi->segs_per_sec > 1) *N + 3*sits_in_cursum() if condition & jump > instuctions at runtime. > > If we use Big O notion, yes I think O(N + sits_in_cursum()) > > > > > Once compiler unrolled the loop, we can expect most of jumps could be hit by > > branch predictor, since the loop does not have many branches. > > The current code, if the compiler decides to unroll the loop, it could unroll it into, I guess > for (N) { > do_something; > } > if (f2fs_discard_en()) { > for(N) { > do _something; > } > } > if (sbi->segs_per_sec > 1) { > for(N) { > do_something; > } > } > ... > for (sits_in_cursum()) { > do_something; > } > if (f2fs_discard_en()) { > for (N) { > do _something; > } > } > if (sbi->segs_per_sec > 1) { > for (N) { > do_something; > } > } Unrolled loop would be something like: for (N/n) { do_something #1; if (f2fs_discard_en()) do_something; if (sbi->segs_per_sec > 1) do_something; ... do_something #n; if (f2fs_discard_en()) do_something; if (sbi->segs_per_sec > 1) do_something; } And, branch predictor can avoid three branch conditions by BTB. do_something #1; if (f2fs_discard_en()) do _something; if (sbi->segs_per_sec > 1) do_something; do_something #2; if (f2fs_discard_en()) do _something; if (sbi->segs_per_sec > 1) do_something; ... do_something #N; if (f2fs_discard_en()) do _something; if (sbi->segs_per_sec > 1) do_something; Anyway, I don't see huge benefit regarding to performance and code readability, but do worry about potential patch conflicts when backporting this. > Taking branch predictor into consideration, I think the unrolled one is more easier to predict > than the current rolled one. (as you say, the current has more conditions in the loop > and a exit condition to predict) > > > > > In the patch, > > for (N) > > do_something; > > for (sits_in_cursum()) > > do_something; > > > > if (f2fs_discard_en()) { > > for (N) > > do_something; > > } > > if (sbi->segs_per_sec > 1) { > > for (N) > > do_something; > > } > > => O(3*N + sits_in_cursum()) > > If we use Big O notion, I think O(N + sits_in_cursum()) too. > > > > BTW, build_discard_entries(struct f2fs_sb_info *sbi) is more likely to split > > the loops, as you describe. > > > > In build_discard_entries(), > > for (N) { > > if (CP_TRIMMED_FLAG) > > do_something; > > else > > do_semething_else; > > } > > Thanks, > > > > I think so, yet CP_TRIMMED_FLAG comes from a function argument, so I think compiler > cannot decide whether it is constant. > > I think if we count the runtime condition & jump instruction at runtime. > the unrolls above are beneficial, I think. > > Thanks. > > >> > >> In addtion, this approach is easier to read than the original > >> one, so splitting the loop would be beneficial. > >> > >> Thanks > >> > >>>> > >>>> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" > >>>> are all taken out of the loops they are unchangable. > >>>> > >>>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> > >>>> --- > >>>> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ > >>>> 1 file changed, 37 insertions(+), 43 deletions(-) > >>>> > >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > >>>> index 40e1d20..bcd8a40 100644 > >>>> --- a/fs/f2fs/segment.c > >>>> +++ b/fs/f2fs/segment.c > >>>> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) > >>>> return restore_curseg_summaries(sbi); > >>>> } > >>>> > >>>> +static void build_discard_entries(struct f2fs_sb_info *sbi) > >>>> +{ > >>>> + unsigned segno; > >>>> + > >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { > >>>> + struct seg_entry *se = get_seg_entry(sbi, segno); > >>>> + > >>>> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) > >>>> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); > >>>> + else { > >>>> + memcpy(se->discard_map, se->cur_valid_map, > >>>> + SIT_VBLOCK_MAP_SIZE); > >>>> + > >>>> + sbi->discard_blks += sbi->blocks_per_seg - > >>>> + se->valid_blocks; > >>>> + } > >>>> + } > >>>> +} > >>>> + > >>>> +static void build_sec_entries(struct f2fs_sb_info *sbi) > >>>> +{ > >>>> + unsigned segno; > >>>> + > >>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) > >>>> + get_sec_entry(sbi, segno)->valid_blocks += > >>>> + get_seg_entry(sbi, segno)->valid_blocks; > >>>> +} > >>>> + > >>>> static void build_sit_entries(struct f2fs_sb_info *sbi) > >>>> { > >>>> struct sit_info *sit_i = SIT_I(sbi); > >>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); > >>>> struct f2fs_journal *journal = curseg->journal; > >>>> - struct seg_entry *se; > >>>> struct f2fs_sit_entry sit; > >>>> int sit_blk_cnt = SIT_BLK_CNT(sbi); > >>>> unsigned int i, start, end; > >>>> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) > >>>> struct f2fs_sit_block *sit_blk; > >>>> struct page *page; > >>>> > >>>> - se = &sit_i->sentries[start]; > >>>> page = get_current_sit_page(sbi, start); > >>>> sit_blk = (struct f2fs_sit_block *)page_address(page); > >>>> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; > >>>> f2fs_put_page(page, 1); > >>>> > >>>> check_block_count(sbi, start, &sit); > >>>> - seg_info_from_raw_sit(se, &sit); > >>>> - > >>>> - /* build discard map only one time */ > >>>> - if (f2fs_discard_en(sbi)) { > >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { > >>>> - memset(se->discard_map, 0xff, > >>>> - SIT_VBLOCK_MAP_SIZE); > >>>> - } else { > >>>> - memcpy(se->discard_map, > >>>> - se->cur_valid_map, > >>>> - SIT_VBLOCK_MAP_SIZE); > >>>> - sbi->discard_blks += > >>>> - sbi->blocks_per_seg - > >>>> - se->valid_blocks; > >>>> - } > >>>> - } > >>>> > >>>> - if (sbi->segs_per_sec > 1) > >>>> - get_sec_entry(sbi, start)->valid_blocks += > >>>> - se->valid_blocks; > >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); > >>>> } > >>>> start_blk += readed; > >>>> } while (start_blk < sit_blk_cnt); > >>>> > >>>> down_read(&curseg->journal_rwsem); > >>>> for (i = 0; i < sits_in_cursum(journal); i++) { > >>>> - unsigned int old_valid_blocks; > >>>> - > >>>> start = le32_to_cpu(segno_in_journal(journal, i)); > >>>> - se = &sit_i->sentries[start]; > >>>> sit = sit_in_journal(journal, i); > >>>> > >>>> - old_valid_blocks = se->valid_blocks; > >>>> - > >>>> check_block_count(sbi, start, &sit); > >>>> - seg_info_from_raw_sit(se, &sit); > >>>> - > >>>> - if (f2fs_discard_en(sbi)) { > >>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { > >>>> - memset(se->discard_map, 0xff, > >>>> - SIT_VBLOCK_MAP_SIZE); > >>>> - } else { > >>>> - memcpy(se->discard_map, se->cur_valid_map, > >>>> - SIT_VBLOCK_MAP_SIZE); > >>>> - sbi->discard_blks += old_valid_blocks - > >>>> - se->valid_blocks; > >>>> - } > >>>> - } > >>>> - > >>>> - if (sbi->segs_per_sec > 1) > >>>> - get_sec_entry(sbi, start)->valid_blocks += > >>>> - se->valid_blocks - old_valid_blocks; > >>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); > >>>> } > >>>> up_read(&curseg->journal_rwsem); > >>>> + > >>>> + /* build discard map only one time */ > >>>> + if (f2fs_discard_en(sbi)) > >>>> + build_discard_entries(sbi); > >>>> + > >>>> + if (sbi->segs_per_sec > 1) > >>>> + build_sec_entries(sbi); > >>>> } > >>>> > >>>> static void init_free_segmap(struct f2fs_sb_info *sbi) > >>>> -- > >>>> 2.1.4
Hi Jaegeuk, On 2017/12/21 4:39, Jaegeuk Kim wrote: > On 12/20, Gaoxiang (OS) wrote: >> Hi Jaegeuk, >> >> On 2017/12/20 10:31, Jaegeuk Kim wrote: >>> On 12/20, Gaoxiang (OS) wrote: >>>> Hi Jaegeuk, >>>> >>>> On 2017/12/20 7:40, Jaegeuk Kim wrote: >>>>> On 12/17, gaoxiang (P) wrote: >>>>>> clean up some redundant repeat code in build_sit_entries >>>>>> and seperate some logic into new functions: >>>>>> - build_discard_entries >>>>>> - build_sec_entries >>>>> >>>>> This patch makes another big loop redundantly? >>>>> >>>> "condition & jump" is the fundamental of "if" and loop. >>>> In the worst case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > 1 >>>> are true, and I think, we have: >>>> - the original approach: >>>> MAIN_SEGS <-- loop condition >>>> MAIN_SEGS <-- f2fs_discard_en(sbi) >>>> MAIN_SEGS <-- sbi->segs_per_sec > 1 >>>> >>>> sits_in_cursum(journal) <-- loop_condition >>>> sits_in_cursum(journal) <-- f2fs_discard_en(sbi) >>>> sits_in_cursum(journal) <-- sbi->segs_per_sec > 1 >>>> it takes about 3*(MAIN_SEGS + sits_in_cursum(journal)) jumps effort. >>>> - this patch: >>>> MAIN_SEGS <-- loop condition >>>> sits_in_cursum(journal) <-- loop_condition >>>> >>>> MAIN_SEGS <-- f2fs_discard_en(sbi) >>>> MAIN_SEGS <-- sbi->segs_per_sec > 1 >>>> it takes abourt 3*MAIN_SEGS + sits_in_cursum(journal) jumps effort. >>>> >>>> and in the best case, both f2fs_discard_en(sbi) and sbi->segs_per_sec > >>>> 1 are false, and we have: >>>> - this patch >>>> MAIN_SEGS <-- loop condition >>>> >>>> sits_in_cursum(journal) <-- loop_condition >>>> See >>>> https://stackoverflow.com/questions/1462710/can-c-compilers-optimize-if-statements-inside-for-loops >>> >>> Interesting! >>> >>> What I can think of the worst case looks like: >>> >>> In current code, >>> for (N) { >>> do_something; >>> if (f2fs_discard_en()) >>> do _something; >>> if (sbi->segs_per_sec > 1) >>> do_something; >>> } >>> for (sits_in_cursum()) { >>> do_something; >>> if (f2fs_discard_en()) >>> do _something; >>> if (sbi->segs_per_sec > 1) >>> do_something; >>> } >>> => O(N + sits_in_cursum()) >> >> In the worst case Its 3(1 -- exit condition < MAIN_SEG, 2 -- f2fs_discard_en(), 3 -- >> sbi->segs_per_sec > 1) *N + 3*sits_in_cursum() if condition & jump >> instuctions at runtime. >> >> If we use Big O notion, yes I think O(N + sits_in_cursum()) >> >>> >>> Once compiler unrolled the loop, we can expect most of jumps could be hit by >>> branch predictor, since the loop does not have many branches. >> >> The current code, if the compiler decides to unroll the loop, it could unroll it into, I guess >> for (N) { >> do_something; >> } >> if (f2fs_discard_en()) { >> for(N) { >> do _something; >> } >> } >> if (sbi->segs_per_sec > 1) { >> for(N) { >> do_something; >> } >> } >> ... >> for (sits_in_cursum()) { >> do_something; >> } >> if (f2fs_discard_en()) { >> for (N) { >> do _something; >> } >> } >> if (sbi->segs_per_sec > 1) { >> for (N) { >> do_something; >> } >> } > > Unrolled loop would be something like: > for (N/n) { > do_something #1; > if (f2fs_discard_en()) > do_something; > if (sbi->segs_per_sec > 1) > do_something; > ... > do_something #n; > if (f2fs_discard_en()) > do_something; > if (sbi->segs_per_sec > 1) > do_something; > } > > And, branch predictor can avoid three branch conditions by BTB. > > do_something #1; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > do_something #2; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > ... > do_something #N; > if (f2fs_discard_en()) > do _something; > if (sbi->segs_per_sec > 1) > do_something; > > Anyway, I don't see huge benefit regarding to performance and code readability, > but do worry about potential patch conflicts when backporting this. > ok, I think it depends on the kind and complexity of BTB the target processor or architecture uses and how the target architecture enhances parallelization. I am new to f2fs, curious about the f2fs detailed implementation. I'm more focused on the code readability since some code with jumps are a little bit hacky. :) Thanks, >> Taking branch predictor into consideration, I think the unrolled one is more easier to predict >> than the current rolled one. (as you say, the current has more conditions in the loop >> and a exit condition to predict) >> >>> >>> In the patch, >>> for (N) >>> do_something; >>> for (sits_in_cursum()) >>> do_something; >>> >>> if (f2fs_discard_en()) { >>> for (N) >>> do_something; >>> } >>> if (sbi->segs_per_sec > 1) { >>> for (N) >>> do_something; >>> } >>> => O(3*N + sits_in_cursum()) >> >> If we use Big O notion, I think O(N + sits_in_cursum()) too. >>> >>> BTW, build_discard_entries(struct f2fs_sb_info *sbi) is more likely to split >>> the loops, as you describe. >>> >>> In build_discard_entries(), >>> for (N) { >>> if (CP_TRIMMED_FLAG) >>> do_something; >>> else >>> do_semething_else; >>> } >>> Thanks, >>> >> >> I think so, yet CP_TRIMMED_FLAG comes from a function argument, so I think compiler >> cannot decide whether it is constant. >> >> I think if we count the runtime condition & jump instruction at runtime. >> the unrolls above are beneficial, I think. >> >> Thanks. >> >>>> >>>> In addtion, this approach is easier to read than the original >>>> one, so splitting the loop would be beneficial. >>>> >>>> Thanks >>>> >>>>>> >>>>>> Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" >>>>>> are all taken out of the loops they are unchangable. >>>>>> >>>>>> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> >>>>>> --- >>>>>> fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ >>>>>> 1 file changed, 37 insertions(+), 43 deletions(-) >>>>>> >>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>>>> index 40e1d20..bcd8a40 100644 >>>>>> --- a/fs/f2fs/segment.c >>>>>> +++ b/fs/f2fs/segment.c >>>>>> @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) >>>>>> return restore_curseg_summaries(sbi); >>>>>> } >>>>>> >>>>>> +static void build_discard_entries(struct f2fs_sb_info *sbi) >>>>>> +{ >>>>>> + unsigned segno; >>>>>> + >>>>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { >>>>>> + struct seg_entry *se = get_seg_entry(sbi, segno); >>>>>> + >>>>>> + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) >>>>>> + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); >>>>>> + else { >>>>>> + memcpy(se->discard_map, se->cur_valid_map, >>>>>> + SIT_VBLOCK_MAP_SIZE); >>>>>> + >>>>>> + sbi->discard_blks += sbi->blocks_per_seg - >>>>>> + se->valid_blocks; >>>>>> + } >>>>>> + } >>>>>> +} >>>>>> + >>>>>> +static void build_sec_entries(struct f2fs_sb_info *sbi) >>>>>> +{ >>>>>> + unsigned segno; >>>>>> + >>>>>> + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) >>>>>> + get_sec_entry(sbi, segno)->valid_blocks += >>>>>> + get_seg_entry(sbi, segno)->valid_blocks; >>>>>> +} >>>>>> + >>>>>> static void build_sit_entries(struct f2fs_sb_info *sbi) >>>>>> { >>>>>> struct sit_info *sit_i = SIT_I(sbi); >>>>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); >>>>>> struct f2fs_journal *journal = curseg->journal; >>>>>> - struct seg_entry *se; >>>>>> struct f2fs_sit_entry sit; >>>>>> int sit_blk_cnt = SIT_BLK_CNT(sbi); >>>>>> unsigned int i, start, end; >>>>>> @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) >>>>>> struct f2fs_sit_block *sit_blk; >>>>>> struct page *page; >>>>>> >>>>>> - se = &sit_i->sentries[start]; >>>>>> page = get_current_sit_page(sbi, start); >>>>>> sit_blk = (struct f2fs_sit_block *)page_address(page); >>>>>> sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; >>>>>> f2fs_put_page(page, 1); >>>>>> >>>>>> check_block_count(sbi, start, &sit); >>>>>> - seg_info_from_raw_sit(se, &sit); >>>>>> - >>>>>> - /* build discard map only one time */ >>>>>> - if (f2fs_discard_en(sbi)) { >>>>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>>>> - memset(se->discard_map, 0xff, >>>>>> - SIT_VBLOCK_MAP_SIZE); >>>>>> - } else { >>>>>> - memcpy(se->discard_map, >>>>>> - se->cur_valid_map, >>>>>> - SIT_VBLOCK_MAP_SIZE); >>>>>> - sbi->discard_blks += >>>>>> - sbi->blocks_per_seg - >>>>>> - se->valid_blocks; >>>>>> - } >>>>>> - } >>>>>> >>>>>> - if (sbi->segs_per_sec > 1) >>>>>> - get_sec_entry(sbi, start)->valid_blocks += >>>>>> - se->valid_blocks; >>>>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>>>> } >>>>>> start_blk += readed; >>>>>> } while (start_blk < sit_blk_cnt); >>>>>> >>>>>> down_read(&curseg->journal_rwsem); >>>>>> for (i = 0; i < sits_in_cursum(journal); i++) { >>>>>> - unsigned int old_valid_blocks; >>>>>> - >>>>>> start = le32_to_cpu(segno_in_journal(journal, i)); >>>>>> - se = &sit_i->sentries[start]; >>>>>> sit = sit_in_journal(journal, i); >>>>>> >>>>>> - old_valid_blocks = se->valid_blocks; >>>>>> - >>>>>> check_block_count(sbi, start, &sit); >>>>>> - seg_info_from_raw_sit(se, &sit); >>>>>> - >>>>>> - if (f2fs_discard_en(sbi)) { >>>>>> - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { >>>>>> - memset(se->discard_map, 0xff, >>>>>> - SIT_VBLOCK_MAP_SIZE); >>>>>> - } else { >>>>>> - memcpy(se->discard_map, se->cur_valid_map, >>>>>> - SIT_VBLOCK_MAP_SIZE); >>>>>> - sbi->discard_blks += old_valid_blocks - >>>>>> - se->valid_blocks; >>>>>> - } >>>>>> - } >>>>>> - >>>>>> - if (sbi->segs_per_sec > 1) >>>>>> - get_sec_entry(sbi, start)->valid_blocks += >>>>>> - se->valid_blocks - old_valid_blocks; >>>>>> + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); >>>>>> } >>>>>> up_read(&curseg->journal_rwsem); >>>>>> + >>>>>> + /* build discard map only one time */ >>>>>> + if (f2fs_discard_en(sbi)) >>>>>> + build_discard_entries(sbi); >>>>>> + >>>>>> + if (sbi->segs_per_sec > 1) >>>>>> + build_sec_entries(sbi); >>>>>> } >>>>>> >>>>>> static void init_free_segmap(struct f2fs_sb_info *sbi) >>>>>> -- >>>>>> 2.1.4
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 40e1d20..bcd8a40 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3476,12 +3476,39 @@ static int build_curseg(struct f2fs_sb_info *sbi) return restore_curseg_summaries(sbi); } +static void build_discard_entries(struct f2fs_sb_info *sbi) +{ + unsigned segno; + + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) { + struct seg_entry *se = get_seg_entry(sbi, segno); + + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + + sbi->discard_blks += sbi->blocks_per_seg - + se->valid_blocks; + } + } +} + +static void build_sec_entries(struct f2fs_sb_info *sbi) +{ + unsigned segno; + + for (segno = 0; segno < MAIN_SEGS(sbi); ++segno) + get_sec_entry(sbi, segno)->valid_blocks += + get_seg_entry(sbi, segno)->valid_blocks; +} + static void build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_journal *journal = curseg->journal; - struct seg_entry *se; struct f2fs_sit_entry sit; int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; @@ -3498,67 +3525,34 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) struct f2fs_sit_block *sit_blk; struct page *page; - se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); check_block_count(sbi, start, &sit); - seg_info_from_raw_sit(se, &sit); - - /* build discard map only one time */ - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } - } - if (sbi->segs_per_sec > 1) - get_sec_entry(sbi, start)->valid_blocks += - se->valid_blocks; + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); } start_blk += readed; } while (start_blk < sit_blk_cnt); down_read(&curseg->journal_rwsem); for (i = 0; i < sits_in_cursum(journal); i++) { - unsigned int old_valid_blocks; - start = le32_to_cpu(segno_in_journal(journal, i)); - se = &sit_i->sentries[start]; sit = sit_in_journal(journal, i); - old_valid_blocks = se->valid_blocks; - check_block_count(sbi, start, &sit); - seg_info_from_raw_sit(se, &sit); - - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; - } - } - - if (sbi->segs_per_sec > 1) - get_sec_entry(sbi, start)->valid_blocks += - se->valid_blocks - old_valid_blocks; + seg_info_from_raw_sit(&sit_i->sentries[start], &sit); } up_read(&curseg->journal_rwsem); + + /* build discard map only one time */ + if (f2fs_discard_en(sbi)) + build_discard_entries(sbi); + + if (sbi->segs_per_sec > 1) + build_sec_entries(sbi); } static void init_free_segmap(struct f2fs_sb_info *sbi)
clean up some redundant repeat code in build_sit_entries and seperate some logic into new functions: - build_discard_entries - build_sec_entries Note that "if (f2fs_discard_en(sbi))" and "if (sbi->segs_per_sec > 1)" are all taken out of the loops they are unchangable. Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> --- fs/f2fs/segment.c | 80 +++++++++++++++++++++++++------------------------------ 1 file changed, 37 insertions(+), 43 deletions(-)