Message ID | 1509368658-60355-1-git-send-email-yunlong.song@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
ping... On 2017/10/30 21:04, Yunlong Song wrote: > f2fs_balance_fs only actives once in the commit_inmem_pages, but there > are more than one page to commit, so all the other pages will miss the > check. This will lead to out-of-free problem when commit a very large > file. However, we cannot do f2fs_balance_fs for each inmem page, since > this will break atomicity. As a result, we should collect prefree > segments if needed and stop atomic commit when there are not enough > available blocks to write atomic pages. > > Signed-off-by: Yunlong Song <yunlong.song@huawei.com> > --- > fs/f2fs/f2fs.h | 1 + > fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- > 2 files changed, 29 insertions(+), 1 deletion(-) > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 13a96b8..04ce48f 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -610,6 +610,7 @@ struct f2fs_inode_info { > struct list_head inmem_pages; /* inmemory pages managed by f2fs */ > struct task_struct *inmem_task; /* store inmemory task */ > struct mutex inmem_lock; /* lock for inmemory pages */ > + unsigned long inmem_blocks; /* inmemory blocks */ > struct extent_tree *extent_tree; /* cached extent_tree entry */ > struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ > struct rw_semaphore i_mmap_sem; > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 46dfbca..813c110 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page) > list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); > inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); > + fi->inmem_blocks++; > mutex_unlock(&fi->inmem_lock); > > trace_f2fs_register_inmem_page(page, INMEM); > @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode, > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct inmem_pages *cur, *tmp; > int err = 0; > + struct f2fs_inode_info *fi = F2FS_I(inode); > > list_for_each_entry_safe(cur, tmp, head, list) { > struct page *page = cur->page; > @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode, > list_del(&cur->list); > kmem_cache_free(inmem_entry_slab, cur); > dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); > + fi->inmem_blocks--; > } > return err; > } > @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) > if (!list_empty(&fi->inmem_ilist)) > list_del_init(&fi->inmem_ilist); > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); > + if (fi->inmem_blocks) { > + f2fs_bug_on(sbi, 1); > + fi->inmem_blocks = 0; > + } > mutex_unlock(&fi->inmem_lock); > > clear_inode_flag(inode, FI_ATOMIC_FILE); > @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) > > f2fs_bug_on(sbi, !cur || cur->page != page); > list_del(&cur->list); > + fi->inmem_blocks--; > mutex_unlock(&fi->inmem_lock); > > dec_page_count(sbi, F2FS_INMEM_PAGES); > @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) > > INIT_LIST_HEAD(&revoke_list); > f2fs_balance_fs(sbi, true); > + if (prefree_segments(sbi) > + && has_not_enough_free_secs(sbi, 0, > + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { > + struct cp_control cpc; > + > + cpc.reason = __get_cp_reason(sbi); > + err = write_checkpoint(sbi, &cpc); > + if (err) > + goto drop; > + } > f2fs_lock_op(sbi); > > set_inode_flag(inode, FI_ATOMIC_COMMIT); > > mutex_lock(&fi->inmem_lock); > + if ((sbi->user_block_count - valid_user_blocks(sbi)) < > + fi->inmem_blocks) { > + err = -ENOSPC; > + goto drop; > + } > err = __commit_inmem_pages(inode, &revoke_list); > if (err) { > int ret; > @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) > ret = __revoke_inmem_pages(inode, &revoke_list, false, true); > if (ret) > err = ret; > - > +drop: > /* drop all uncommitted pages */ > __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); > } > @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) > if (!list_empty(&fi->inmem_ilist)) > list_del_init(&fi->inmem_ilist); > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); > + if (fi->inmem_blocks) { > + f2fs_bug_on(sbi, 1); > + fi->inmem_blocks = 0; > + } > mutex_unlock(&fi->inmem_lock); > > clear_inode_flag(inode, FI_ATOMIC_COMMIT);
On 10/30, Yunlong Song wrote: > f2fs_balance_fs only actives once in the commit_inmem_pages, but there > are more than one page to commit, so all the other pages will miss the > check. This will lead to out-of-free problem when commit a very large > file. However, we cannot do f2fs_balance_fs for each inmem page, since > this will break atomicity. As a result, we should collect prefree > segments if needed and stop atomic commit when there are not enough > available blocks to write atomic pages. > > Signed-off-by: Yunlong Song <yunlong.song@huawei.com> > --- > fs/f2fs/f2fs.h | 1 + > fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- > 2 files changed, 29 insertions(+), 1 deletion(-) > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 13a96b8..04ce48f 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -610,6 +610,7 @@ struct f2fs_inode_info { > struct list_head inmem_pages; /* inmemory pages managed by f2fs */ > struct task_struct *inmem_task; /* store inmemory task */ > struct mutex inmem_lock; /* lock for inmemory pages */ > + unsigned long inmem_blocks; /* inmemory blocks */ > struct extent_tree *extent_tree; /* cached extent_tree entry */ > struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ > struct rw_semaphore i_mmap_sem; > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 46dfbca..813c110 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page) > list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); > inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); > + fi->inmem_blocks++; > mutex_unlock(&fi->inmem_lock); > > trace_f2fs_register_inmem_page(page, INMEM); > @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode, > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct inmem_pages *cur, *tmp; > int err = 0; > + struct f2fs_inode_info *fi = F2FS_I(inode); > > list_for_each_entry_safe(cur, tmp, head, list) { > struct page *page = cur->page; > @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode, > list_del(&cur->list); > kmem_cache_free(inmem_entry_slab, cur); > dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); > + fi->inmem_blocks--; > } > return err; > } > @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) > if (!list_empty(&fi->inmem_ilist)) > list_del_init(&fi->inmem_ilist); > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); > + if (fi->inmem_blocks) { > + f2fs_bug_on(sbi, 1); > + fi->inmem_blocks = 0; > + } > mutex_unlock(&fi->inmem_lock); > > clear_inode_flag(inode, FI_ATOMIC_FILE); > @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) > > f2fs_bug_on(sbi, !cur || cur->page != page); > list_del(&cur->list); > + fi->inmem_blocks--; > mutex_unlock(&fi->inmem_lock); > > dec_page_count(sbi, F2FS_INMEM_PAGES); > @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) > > INIT_LIST_HEAD(&revoke_list); > f2fs_balance_fs(sbi, true); > + if (prefree_segments(sbi) > + && has_not_enough_free_secs(sbi, 0, > + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { > + struct cp_control cpc; > + > + cpc.reason = __get_cp_reason(sbi); > + err = write_checkpoint(sbi, &cpc); > + if (err) > + goto drop; > + } > f2fs_lock_op(sbi); > > set_inode_flag(inode, FI_ATOMIC_COMMIT); > > mutex_lock(&fi->inmem_lock); > + if ((sbi->user_block_count - valid_user_blocks(sbi)) < What does this mean? We already allocated blocks successfully? > + fi->inmem_blocks) { > + err = -ENOSPC; > + goto drop; > + } > err = __commit_inmem_pages(inode, &revoke_list); > if (err) { > int ret; > @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) > ret = __revoke_inmem_pages(inode, &revoke_list, false, true); > if (ret) > err = ret; > - > +drop: > /* drop all uncommitted pages */ > __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); > } > @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) > if (!list_empty(&fi->inmem_ilist)) > list_del_init(&fi->inmem_ilist); > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); > + if (fi->inmem_blocks) { > + f2fs_bug_on(sbi, 1); > + fi->inmem_blocks = 0; > + } > mutex_unlock(&fi->inmem_lock); > > clear_inode_flag(inode, FI_ATOMIC_COMMIT); > -- > 1.8.5.2
Because I found that it will still lead to out-of-free problem with out that check. I trace and find that it is possible that the committing date pages of the atomic file is bigger than the sbi->user_block_count - valid_user_blocks(sbi), so I add this check. On 2017/11/3 11:46, Jaegeuk Kim wrote: > On 10/30, Yunlong Song wrote: >> f2fs_balance_fs only actives once in the commit_inmem_pages, but there >> are more than one page to commit, so all the other pages will miss the >> check. This will lead to out-of-free problem when commit a very large >> file. However, we cannot do f2fs_balance_fs for each inmem page, since >> this will break atomicity. As a result, we should collect prefree >> segments if needed and stop atomic commit when there are not enough >> available blocks to write atomic pages. >> >> Signed-off-by: Yunlong Song <yunlong.song@huawei.com> >> --- >> fs/f2fs/f2fs.h | 1 + >> fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- >> 2 files changed, 29 insertions(+), 1 deletion(-) >> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >> index 13a96b8..04ce48f 100644 >> --- a/fs/f2fs/f2fs.h >> +++ b/fs/f2fs/f2fs.h >> @@ -610,6 +610,7 @@ struct f2fs_inode_info { >> struct list_head inmem_pages; /* inmemory pages managed by f2fs */ >> struct task_struct *inmem_task; /* store inmemory task */ >> struct mutex inmem_lock; /* lock for inmemory pages */ >> + unsigned long inmem_blocks; /* inmemory blocks */ >> struct extent_tree *extent_tree; /* cached extent_tree entry */ >> struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ >> struct rw_semaphore i_mmap_sem; >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >> index 46dfbca..813c110 100644 >> --- a/fs/f2fs/segment.c >> +++ b/fs/f2fs/segment.c >> @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page) >> list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); >> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >> inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >> + fi->inmem_blocks++; >> mutex_unlock(&fi->inmem_lock); >> >> trace_f2fs_register_inmem_page(page, INMEM); >> @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode, >> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >> struct inmem_pages *cur, *tmp; >> int err = 0; >> + struct f2fs_inode_info *fi = F2FS_I(inode); >> >> list_for_each_entry_safe(cur, tmp, head, list) { >> struct page *page = cur->page; >> @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode, >> list_del(&cur->list); >> kmem_cache_free(inmem_entry_slab, cur); >> dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >> + fi->inmem_blocks--; >> } >> return err; >> } >> @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) >> if (!list_empty(&fi->inmem_ilist)) >> list_del_init(&fi->inmem_ilist); >> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >> + if (fi->inmem_blocks) { >> + f2fs_bug_on(sbi, 1); >> + fi->inmem_blocks = 0; >> + } >> mutex_unlock(&fi->inmem_lock); >> >> clear_inode_flag(inode, FI_ATOMIC_FILE); >> @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) >> >> f2fs_bug_on(sbi, !cur || cur->page != page); >> list_del(&cur->list); >> + fi->inmem_blocks--; >> mutex_unlock(&fi->inmem_lock); >> >> dec_page_count(sbi, F2FS_INMEM_PAGES); >> @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) >> >> INIT_LIST_HEAD(&revoke_list); >> f2fs_balance_fs(sbi, true); >> + if (prefree_segments(sbi) >> + && has_not_enough_free_secs(sbi, 0, >> + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { >> + struct cp_control cpc; >> + >> + cpc.reason = __get_cp_reason(sbi); >> + err = write_checkpoint(sbi, &cpc); >> + if (err) >> + goto drop; >> + } >> f2fs_lock_op(sbi); >> >> set_inode_flag(inode, FI_ATOMIC_COMMIT); >> >> mutex_lock(&fi->inmem_lock); >> + if ((sbi->user_block_count - valid_user_blocks(sbi)) < > What does this mean? We already allocated blocks successfully? > >> + fi->inmem_blocks) { >> + err = -ENOSPC; >> + goto drop; >> + } >> err = __commit_inmem_pages(inode, &revoke_list); >> if (err) { >> int ret; >> @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) >> ret = __revoke_inmem_pages(inode, &revoke_list, false, true); >> if (ret) >> err = ret; >> - >> +drop: >> /* drop all uncommitted pages */ >> __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); >> } >> @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) >> if (!list_empty(&fi->inmem_ilist)) >> list_del_init(&fi->inmem_ilist); >> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >> + if (fi->inmem_blocks) { >> + f2fs_bug_on(sbi, 1); >> + fi->inmem_blocks = 0; >> + } >> mutex_unlock(&fi->inmem_lock); >> >> clear_inode_flag(inode, FI_ATOMIC_COMMIT); >> -- >> 1.8.5.2 > . >
Test: Newest kernel source code from f2fs-dev 1G zram with f2fs 8 threads to atomic write one same file on zram there are four kinds of atomic write at the same time: 1 no atomic start, with atomic commit 2 no atomic start, no atomic commit 3 atomic start, with atomic commit 4 atomic start, no atomic commit And I add dump_stack after the check as following, + if ((sbi->user_block_count - valid_user_blocks(sbi)) < + fi->inmem_blocks) { + dump_stack(); + err = -ENOSPC; + goto drop; + } then we have: [ 136.237247] F2FS-fs (zram1): Unexpected flush for atomic writes: ino=4, npages=8193 [ 136.952469] CPU: 1 PID: 1274 Comm: atomic_t2 Not tainted 4.14.0-rc4+ #109 [ 136.952947] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 [ 136.953162] Call Trace: [ 136.953162] dump_stack+0x4d/0x6e [ 136.953162] commit_inmem_pages+0x258/0x270 [ 136.953162] ? __sb_start_write+0x48/0x80 [ 136.953162] ? __mnt_want_write_file+0x18/0x30 [ 136.953162] f2fs_ioctl+0x1025/0x1e30 [ 136.953162] ? up_write+0x25/0x30 [ 136.953162] ? f2fs_file_write_iter+0xf3/0x1e0 [ 136.953162] ? selinux_file_ioctl+0x114/0x1e0 [ 136.953162] do_vfs_ioctl+0x96/0x5a0 [ 136.953162] SyS_ioctl+0x79/0x90 [ 136.953162] ? SyS_lseek+0x87/0xb0 [ 136.953162] entry_SYSCALL_64_fastpath+0x13/0x94 [ 136.953162] RIP: 0033:0x434b97 [ 136.953162] RSP: 002b:00007ffc68859de8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 136.953162] RAX: ffffffffffffffda RBX: 00000000006b78e0 RCX: 0000000000434b97 [ 136.953162] RDX: 00000000006b70e8 RSI: 000000000000f502 RDI: 0000000000000003 [ 136.953162] RBP: 0000000002000010 R08: 00000000006b70e8 R09: 00000000006b7160 [ 136.953162] R10: 0000000000000022 R11: 0000000000000202 R12: 00007f491a1c4010 [ 136.953162] R13: 0000000002001000 R14: 0000000002000000 R15: 00000000006b7938 So I think we should add the check code. On 2017/11/3 12:48, Yunlong Song wrote: > Because I found that it will still lead to out-of-free problem with > out that check. > I trace and find that it is possible that the committing date pages of > the atomic > file is bigger than the sbi->user_block_count - > valid_user_blocks(sbi), so I add > this check. > > On 2017/11/3 11:46, Jaegeuk Kim wrote: >> On 10/30, Yunlong Song wrote: >>> f2fs_balance_fs only actives once in the commit_inmem_pages, but there >>> are more than one page to commit, so all the other pages will miss the >>> check. This will lead to out-of-free problem when commit a very large >>> file. However, we cannot do f2fs_balance_fs for each inmem page, since >>> this will break atomicity. As a result, we should collect prefree >>> segments if needed and stop atomic commit when there are not enough >>> available blocks to write atomic pages. >>> >>> Signed-off-by: Yunlong Song <yunlong.song@huawei.com> >>> --- >>> fs/f2fs/f2fs.h | 1 + >>> fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- >>> 2 files changed, 29 insertions(+), 1 deletion(-) >>> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>> index 13a96b8..04ce48f 100644 >>> --- a/fs/f2fs/f2fs.h >>> +++ b/fs/f2fs/f2fs.h >>> @@ -610,6 +610,7 @@ struct f2fs_inode_info { >>> struct list_head inmem_pages; /* inmemory pages managed by >>> f2fs */ >>> struct task_struct *inmem_task; /* store inmemory task */ >>> struct mutex inmem_lock; /* lock for inmemory pages */ >>> + unsigned long inmem_blocks; /* inmemory blocks */ >>> struct extent_tree *extent_tree; /* cached extent_tree >>> entry */ >>> struct rw_semaphore dio_rwsem[2];/* avoid racing between dio >>> and gc */ >>> struct rw_semaphore i_mmap_sem; >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>> index 46dfbca..813c110 100644 >>> --- a/fs/f2fs/segment.c >>> +++ b/fs/f2fs/segment.c >>> @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, >>> struct page *page) >>> list_add_tail(&fi->inmem_ilist, >>> &sbi->inode_list[ATOMIC_FILE]); >>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>> inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >>> + fi->inmem_blocks++; >>> mutex_unlock(&fi->inmem_lock); >>> trace_f2fs_register_inmem_page(page, INMEM); >>> @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode >>> *inode, >>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>> struct inmem_pages *cur, *tmp; >>> int err = 0; >>> + struct f2fs_inode_info *fi = F2FS_I(inode); >>> list_for_each_entry_safe(cur, tmp, head, list) { >>> struct page *page = cur->page; >>> @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode >>> *inode, >>> list_del(&cur->list); >>> kmem_cache_free(inmem_entry_slab, cur); >>> dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >>> + fi->inmem_blocks--; >>> } >>> return err; >>> } >>> @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) >>> if (!list_empty(&fi->inmem_ilist)) >>> list_del_init(&fi->inmem_ilist); >>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>> + if (fi->inmem_blocks) { >>> + f2fs_bug_on(sbi, 1); >>> + fi->inmem_blocks = 0; >>> + } >>> mutex_unlock(&fi->inmem_lock); >>> clear_inode_flag(inode, FI_ATOMIC_FILE); >>> @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct >>> page *page) >>> f2fs_bug_on(sbi, !cur || cur->page != page); >>> list_del(&cur->list); >>> + fi->inmem_blocks--; >>> mutex_unlock(&fi->inmem_lock); >>> dec_page_count(sbi, F2FS_INMEM_PAGES); >>> @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) >>> INIT_LIST_HEAD(&revoke_list); >>> f2fs_balance_fs(sbi, true); >>> + if (prefree_segments(sbi) >>> + && has_not_enough_free_secs(sbi, 0, >>> + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { >>> + struct cp_control cpc; >>> + >>> + cpc.reason = __get_cp_reason(sbi); >>> + err = write_checkpoint(sbi, &cpc); >>> + if (err) >>> + goto drop; >>> + } >>> f2fs_lock_op(sbi); >>> set_inode_flag(inode, FI_ATOMIC_COMMIT); >>> mutex_lock(&fi->inmem_lock); >>> + if ((sbi->user_block_count - valid_user_blocks(sbi)) < >> What does this mean? We already allocated blocks successfully? >> >>> + fi->inmem_blocks) { >>> + err = -ENOSPC; >>> + goto drop; >>> + } >>> err = __commit_inmem_pages(inode, &revoke_list); >>> if (err) { >>> int ret; >>> @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) >>> ret = __revoke_inmem_pages(inode, &revoke_list, false, true); >>> if (ret) >>> err = ret; >>> - >>> +drop: >>> /* drop all uncommitted pages */ >>> __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); >>> } >>> @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) >>> if (!list_empty(&fi->inmem_ilist)) >>> list_del_init(&fi->inmem_ilist); >>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>> + if (fi->inmem_blocks) { >>> + f2fs_bug_on(sbi, 1); >>> + fi->inmem_blocks = 0; >>> + } >>> mutex_unlock(&fi->inmem_lock); >>> clear_inode_flag(inode, FI_ATOMIC_COMMIT); >>> -- >>> 1.8.5.2 >> . >> >
On 2017/11/3 22:40, Yunlong Song wrote: > Test: > Newest kernel source code from f2fs-dev > 1G zram with f2fs > 8 threads to atomic write one same file on zram > there are four kinds of atomic write at the same time: > 1 no atomic start, with atomic commit > 2 no atomic start, no atomic commit > 3 atomic start, with atomic commit > 4 atomic start, no atomic commit > > And I add dump_stack after the check as following, > + if ((sbi->user_block_count - valid_user_blocks(sbi)) < > + fi->inmem_blocks) { valid_user_blocks contains fi->inmem_blocks and all reserved new node blocks? Thanks, > + dump_stack(); > + err = -ENOSPC; > + goto drop; > + } > > then we have: > > [ 136.237247] F2FS-fs (zram1): Unexpected flush for atomic writes: ino=4, npages=8193 > [ 136.952469] CPU: 1 PID: 1274 Comm: atomic_t2 Not tainted 4.14.0-rc4+ #109 > [ 136.952947] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 > [ 136.953162] Call Trace: > [ 136.953162] dump_stack+0x4d/0x6e > [ 136.953162] commit_inmem_pages+0x258/0x270 > [ 136.953162] ? __sb_start_write+0x48/0x80 > [ 136.953162] ? __mnt_want_write_file+0x18/0x30 > [ 136.953162] f2fs_ioctl+0x1025/0x1e30 > [ 136.953162] ? up_write+0x25/0x30 > [ 136.953162] ? f2fs_file_write_iter+0xf3/0x1e0 > [ 136.953162] ? selinux_file_ioctl+0x114/0x1e0 > [ 136.953162] do_vfs_ioctl+0x96/0x5a0 > [ 136.953162] SyS_ioctl+0x79/0x90 > [ 136.953162] ? SyS_lseek+0x87/0xb0 > [ 136.953162] entry_SYSCALL_64_fastpath+0x13/0x94 > [ 136.953162] RIP: 0033:0x434b97 > [ 136.953162] RSP: 002b:00007ffc68859de8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 > [ 136.953162] RAX: ffffffffffffffda RBX: 00000000006b78e0 RCX: 0000000000434b97 > [ 136.953162] RDX: 00000000006b70e8 RSI: 000000000000f502 RDI: 0000000000000003 > [ 136.953162] RBP: 0000000002000010 R08: 00000000006b70e8 R09: 00000000006b7160 > [ 136.953162] R10: 0000000000000022 R11: 0000000000000202 R12: 00007f491a1c4010 > [ 136.953162] R13: 0000000002001000 R14: 0000000002000000 R15: 00000000006b7938 > > So I think we should add the check code. > > On 2017/11/3 12:48, Yunlong Song wrote: >> Because I found that it will still lead to out-of-free problem with out that check. >> I trace and find that it is possible that the committing date pages of the atomic >> file is bigger than the sbi->user_block_count - valid_user_blocks(sbi), so I add >> this check. >> >> On 2017/11/3 11:46, Jaegeuk Kim wrote: >>> On 10/30, Yunlong Song wrote: >>>> f2fs_balance_fs only actives once in the commit_inmem_pages, but there >>>> are more than one page to commit, so all the other pages will miss the >>>> check. This will lead to out-of-free problem when commit a very large >>>> file. However, we cannot do f2fs_balance_fs for each inmem page, since >>>> this will break atomicity. As a result, we should collect prefree >>>> segments if needed and stop atomic commit when there are not enough >>>> available blocks to write atomic pages. >>>> >>>> Signed-off-by: Yunlong Song <yunlong.song@huawei.com> >>>> --- >>>> fs/f2fs/f2fs.h | 1 + >>>> fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- >>>> 2 files changed, 29 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>>> index 13a96b8..04ce48f 100644 >>>> --- a/fs/f2fs/f2fs.h >>>> +++ b/fs/f2fs/f2fs.h >>>> @@ -610,6 +610,7 @@ struct f2fs_inode_info { >>>> struct list_head inmem_pages; /* inmemory pages managed by f2fs */ >>>> struct task_struct *inmem_task; /* store inmemory task */ >>>> struct mutex inmem_lock; /* lock for inmemory pages */ >>>> + unsigned long inmem_blocks; /* inmemory blocks */ >>>> struct extent_tree *extent_tree; /* cached extent_tree entry */ >>>> struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ >>>> struct rw_semaphore i_mmap_sem; >>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>> index 46dfbca..813c110 100644 >>>> --- a/fs/f2fs/segment.c >>>> +++ b/fs/f2fs/segment.c >>>> @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page) >>>> list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); >>>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>>> inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >>>> + fi->inmem_blocks++; >>>> mutex_unlock(&fi->inmem_lock); >>>> trace_f2fs_register_inmem_page(page, INMEM); >>>> @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode, >>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>>> struct inmem_pages *cur, *tmp; >>>> int err = 0; >>>> + struct f2fs_inode_info *fi = F2FS_I(inode); >>>> list_for_each_entry_safe(cur, tmp, head, list) { >>>> struct page *page = cur->page; >>>> @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode, >>>> list_del(&cur->list); >>>> kmem_cache_free(inmem_entry_slab, cur); >>>> dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >>>> + fi->inmem_blocks--; >>>> } >>>> return err; >>>> } >>>> @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) >>>> if (!list_empty(&fi->inmem_ilist)) >>>> list_del_init(&fi->inmem_ilist); >>>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>>> + if (fi->inmem_blocks) { >>>> + f2fs_bug_on(sbi, 1); >>>> + fi->inmem_blocks = 0; >>>> + } >>>> mutex_unlock(&fi->inmem_lock); >>>> clear_inode_flag(inode, FI_ATOMIC_FILE); >>>> @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) >>>> f2fs_bug_on(sbi, !cur || cur->page != page); >>>> list_del(&cur->list); >>>> + fi->inmem_blocks--; >>>> mutex_unlock(&fi->inmem_lock); >>>> dec_page_count(sbi, F2FS_INMEM_PAGES); >>>> @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) >>>> INIT_LIST_HEAD(&revoke_list); >>>> f2fs_balance_fs(sbi, true); >>>> + if (prefree_segments(sbi) >>>> + && has_not_enough_free_secs(sbi, 0, >>>> + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { >>>> + struct cp_control cpc; >>>> + >>>> + cpc.reason = __get_cp_reason(sbi); >>>> + err = write_checkpoint(sbi, &cpc); >>>> + if (err) >>>> + goto drop; >>>> + } >>>> f2fs_lock_op(sbi); >>>> set_inode_flag(inode, FI_ATOMIC_COMMIT); >>>> mutex_lock(&fi->inmem_lock); >>>> + if ((sbi->user_block_count - valid_user_blocks(sbi)) < >>> What does this mean? We already allocated blocks successfully? >>> >>>> + fi->inmem_blocks) { >>>> + err = -ENOSPC; >>>> + goto drop; >>>> + } >>>> err = __commit_inmem_pages(inode, &revoke_list); >>>> if (err) { >>>> int ret; >>>> @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) >>>> ret = __revoke_inmem_pages(inode, &revoke_list, false, true); >>>> if (ret) >>>> err = ret; >>>> - >>>> +drop: >>>> /* drop all uncommitted pages */ >>>> __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); >>>> } >>>> @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) >>>> if (!list_empty(&fi->inmem_ilist)) >>>> list_del_init(&fi->inmem_ilist); >>>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>>> + if (fi->inmem_blocks) { >>>> + f2fs_bug_on(sbi, 1); >>>> + fi->inmem_blocks = 0; >>>> + } >>>> mutex_unlock(&fi->inmem_lock); >>>> clear_inode_flag(inode, FI_ATOMIC_COMMIT); >>>> -- >>>> 1.8.5.2 >>> . >>> >> >
So there is no connection between sbi->user_block_count - valid_user_blocks(sbi) and fi->inmem_blocks. It is sensible that sbi->user_block_count - valid_user_blocks(sbi) is smaller than fi->inmem_blocks. On 2017/11/3 23:23, Chao Yu wrote: > On 2017/11/3 22:40, Yunlong Song wrote: >> Test: >> Newest kernel source code from f2fs-dev >> 1G zram with f2fs >> 8 threads to atomic write one same file on zram >> there are four kinds of atomic write at the same time: >> 1 no atomic start, with atomic commit >> 2 no atomic start, no atomic commit >> 3 atomic start, with atomic commit >> 4 atomic start, no atomic commit >> >> And I add dump_stack after the check as following, >> + if ((sbi->user_block_count - valid_user_blocks(sbi)) < >> + fi->inmem_blocks) { > valid_user_blocks contains fi->inmem_blocks and all reserved new node blocks? > > Thanks, > >> + dump_stack(); >> + err = -ENOSPC; >> + goto drop; >> + } >> >> then we have: >> >> [ 136.237247] F2FS-fs (zram1): Unexpected flush for atomic writes: ino=4, npages=8193 >> [ 136.952469] CPU: 1 PID: 1274 Comm: atomic_t2 Not tainted 4.14.0-rc4+ #109 >> [ 136.952947] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 >> [ 136.953162] Call Trace: >> [ 136.953162] dump_stack+0x4d/0x6e >> [ 136.953162] commit_inmem_pages+0x258/0x270 >> [ 136.953162] ? __sb_start_write+0x48/0x80 >> [ 136.953162] ? __mnt_want_write_file+0x18/0x30 >> [ 136.953162] f2fs_ioctl+0x1025/0x1e30 >> [ 136.953162] ? up_write+0x25/0x30 >> [ 136.953162] ? f2fs_file_write_iter+0xf3/0x1e0 >> [ 136.953162] ? selinux_file_ioctl+0x114/0x1e0 >> [ 136.953162] do_vfs_ioctl+0x96/0x5a0 >> [ 136.953162] SyS_ioctl+0x79/0x90 >> [ 136.953162] ? SyS_lseek+0x87/0xb0 >> [ 136.953162] entry_SYSCALL_64_fastpath+0x13/0x94 >> [ 136.953162] RIP: 0033:0x434b97 >> [ 136.953162] RSP: 002b:00007ffc68859de8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 >> [ 136.953162] RAX: ffffffffffffffda RBX: 00000000006b78e0 RCX: 0000000000434b97 >> [ 136.953162] RDX: 00000000006b70e8 RSI: 000000000000f502 RDI: 0000000000000003 >> [ 136.953162] RBP: 0000000002000010 R08: 00000000006b70e8 R09: 00000000006b7160 >> [ 136.953162] R10: 0000000000000022 R11: 0000000000000202 R12: 00007f491a1c4010 >> [ 136.953162] R13: 0000000002001000 R14: 0000000002000000 R15: 00000000006b7938 >> >> So I think we should add the check code. >> >> On 2017/11/3 12:48, Yunlong Song wrote: >>> Because I found that it will still lead to out-of-free problem with out that check. >>> I trace and find that it is possible that the committing date pages of the atomic >>> file is bigger than the sbi->user_block_count - valid_user_blocks(sbi), so I add >>> this check. >>> >>> On 2017/11/3 11:46, Jaegeuk Kim wrote: >>>> On 10/30, Yunlong Song wrote: >>>>> f2fs_balance_fs only actives once in the commit_inmem_pages, but there >>>>> are more than one page to commit, so all the other pages will miss the >>>>> check. This will lead to out-of-free problem when commit a very large >>>>> file. However, we cannot do f2fs_balance_fs for each inmem page, since >>>>> this will break atomicity. As a result, we should collect prefree >>>>> segments if needed and stop atomic commit when there are not enough >>>>> available blocks to write atomic pages. >>>>> >>>>> Signed-off-by: Yunlong Song <yunlong.song@huawei.com> >>>>> --- >>>>> fs/f2fs/f2fs.h | 1 + >>>>> fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- >>>>> 2 files changed, 29 insertions(+), 1 deletion(-) >>>>> >>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>>>> index 13a96b8..04ce48f 100644 >>>>> --- a/fs/f2fs/f2fs.h >>>>> +++ b/fs/f2fs/f2fs.h >>>>> @@ -610,6 +610,7 @@ struct f2fs_inode_info { >>>>> struct list_head inmem_pages; /* inmemory pages managed by f2fs */ >>>>> struct task_struct *inmem_task; /* store inmemory task */ >>>>> struct mutex inmem_lock; /* lock for inmemory pages */ >>>>> + unsigned long inmem_blocks; /* inmemory blocks */ >>>>> struct extent_tree *extent_tree; /* cached extent_tree entry */ >>>>> struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ >>>>> struct rw_semaphore i_mmap_sem; >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>>>> index 46dfbca..813c110 100644 >>>>> --- a/fs/f2fs/segment.c >>>>> +++ b/fs/f2fs/segment.c >>>>> @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page) >>>>> list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); >>>>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>>>> inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >>>>> + fi->inmem_blocks++; >>>>> mutex_unlock(&fi->inmem_lock); >>>>> trace_f2fs_register_inmem_page(page, INMEM); >>>>> @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode, >>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>>>> struct inmem_pages *cur, *tmp; >>>>> int err = 0; >>>>> + struct f2fs_inode_info *fi = F2FS_I(inode); >>>>> list_for_each_entry_safe(cur, tmp, head, list) { >>>>> struct page *page = cur->page; >>>>> @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode, >>>>> list_del(&cur->list); >>>>> kmem_cache_free(inmem_entry_slab, cur); >>>>> dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); >>>>> + fi->inmem_blocks--; >>>>> } >>>>> return err; >>>>> } >>>>> @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) >>>>> if (!list_empty(&fi->inmem_ilist)) >>>>> list_del_init(&fi->inmem_ilist); >>>>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>>>> + if (fi->inmem_blocks) { >>>>> + f2fs_bug_on(sbi, 1); >>>>> + fi->inmem_blocks = 0; >>>>> + } >>>>> mutex_unlock(&fi->inmem_lock); >>>>> clear_inode_flag(inode, FI_ATOMIC_FILE); >>>>> @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) >>>>> f2fs_bug_on(sbi, !cur || cur->page != page); >>>>> list_del(&cur->list); >>>>> + fi->inmem_blocks--; >>>>> mutex_unlock(&fi->inmem_lock); >>>>> dec_page_count(sbi, F2FS_INMEM_PAGES); >>>>> @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) >>>>> INIT_LIST_HEAD(&revoke_list); >>>>> f2fs_balance_fs(sbi, true); >>>>> + if (prefree_segments(sbi) >>>>> + && has_not_enough_free_secs(sbi, 0, >>>>> + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { >>>>> + struct cp_control cpc; >>>>> + >>>>> + cpc.reason = __get_cp_reason(sbi); >>>>> + err = write_checkpoint(sbi, &cpc); >>>>> + if (err) >>>>> + goto drop; >>>>> + } >>>>> f2fs_lock_op(sbi); >>>>> set_inode_flag(inode, FI_ATOMIC_COMMIT); >>>>> mutex_lock(&fi->inmem_lock); >>>>> + if ((sbi->user_block_count - valid_user_blocks(sbi)) < >>>> What does this mean? We already allocated blocks successfully? >>>> >>>>> + fi->inmem_blocks) { >>>>> + err = -ENOSPC; >>>>> + goto drop; >>>>> + } >>>>> err = __commit_inmem_pages(inode, &revoke_list); >>>>> if (err) { >>>>> int ret; >>>>> @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) >>>>> ret = __revoke_inmem_pages(inode, &revoke_list, false, true); >>>>> if (ret) >>>>> err = ret; >>>>> - >>>>> +drop: >>>>> /* drop all uncommitted pages */ >>>>> __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); >>>>> } >>>>> @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) >>>>> if (!list_empty(&fi->inmem_ilist)) >>>>> list_del_init(&fi->inmem_ilist); >>>>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); >>>>> + if (fi->inmem_blocks) { >>>>> + f2fs_bug_on(sbi, 1); >>>>> + fi->inmem_blocks = 0; >>>>> + } >>>>> mutex_unlock(&fi->inmem_lock); >>>>> clear_inode_flag(inode, FI_ATOMIC_COMMIT); >>>>> -- >>>>> 1.8.5.2 >>>> . >>>> > . >
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 13a96b8..04ce48f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -610,6 +610,7 @@ struct f2fs_inode_info { struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ + unsigned long inmem_blocks; /* inmemory blocks */ struct extent_tree *extent_tree; /* cached extent_tree entry */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ struct rw_semaphore i_mmap_sem; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 46dfbca..813c110 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page) list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + fi->inmem_blocks++; mutex_unlock(&fi->inmem_lock); trace_f2fs_register_inmem_page(page, INMEM); @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inmem_pages *cur, *tmp; int err = 0; + struct f2fs_inode_info *fi = F2FS_I(inode); list_for_each_entry_safe(cur, tmp, head, list) { struct page *page = cur->page; @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode, list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + fi->inmem_blocks--; } return err; } @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode) if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + if (fi->inmem_blocks) { + f2fs_bug_on(sbi, 1); + fi->inmem_blocks = 0; + } mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page) f2fs_bug_on(sbi, !cur || cur->page != page); list_del(&cur->list); + fi->inmem_blocks--; mutex_unlock(&fi->inmem_lock); dec_page_count(sbi, F2FS_INMEM_PAGES); @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode) INIT_LIST_HEAD(&revoke_list); f2fs_balance_fs(sbi, true); + if (prefree_segments(sbi) + && has_not_enough_free_secs(sbi, 0, + fi->inmem_blocks / BLKS_PER_SEC(sbi))) { + struct cp_control cpc; + + cpc.reason = __get_cp_reason(sbi); + err = write_checkpoint(sbi, &cpc); + if (err) + goto drop; + } f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); mutex_lock(&fi->inmem_lock); + if ((sbi->user_block_count - valid_user_blocks(sbi)) < + fi->inmem_blocks) { + err = -ENOSPC; + goto drop; + } err = __commit_inmem_pages(inode, &revoke_list); if (err) { int ret; @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode) ret = __revoke_inmem_pages(inode, &revoke_list, false, true); if (ret) err = ret; - +drop: /* drop all uncommitted pages */ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); } @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode) if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + if (fi->inmem_blocks) { + f2fs_bug_on(sbi, 1); + fi->inmem_blocks = 0; + } mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_balance_fs only actives once in the commit_inmem_pages, but there are more than one page to commit, so all the other pages will miss the check. This will lead to out-of-free problem when commit a very large file. However, we cannot do f2fs_balance_fs for each inmem page, since this will break atomicity. As a result, we should collect prefree segments if needed and stop atomic commit when there are not enough available blocks to write atomic pages. Signed-off-by: Yunlong Song <yunlong.song@huawei.com> --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-)