diff mbox series

[f2fs-dev] f2fs: fix to truncate meta inode pages forcely

Message ID 20240307151235.3864725-1-chao@kernel.org (mailing list archive)
State Superseded
Headers show
Series [f2fs-dev] f2fs: fix to truncate meta inode pages forcely | expand

Commit Message

Chao Yu March 7, 2024, 3:12 p.m. UTC
Below race case can cause data corruption:

Thread A				GC thread
- f2fs_inplace_write_data
					- gc_data_segment
					 - ra_data_block
					  - locked meta_inode page
 - invalidate_mapping_pages
 : fail to invalidate meta_inode page
   due to lock failure or dirty|writeback
   status
 - f2fs_submit_page_bio
 : write last dirty data to old blkaddr
					 - move_data_block
					  - load old data from meta_inode page
					  - f2fs_submit_page_write
					  : write old data to new blkaddr

Because invalidate_mapping_pages() will skip invalidating page when the
page has unclear status including locked, dirty, writeback and so on, so
we need to use truncate_inode_pages_range() instead of
invalidate_mapping_pages() to make sure meta_inode page will be dropped.

Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC")
Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write")
Signed-off-by: Chao Yu <chao@kernel.org>
---
 fs/f2fs/checkpoint.c    |  5 +++--
 fs/f2fs/f2fs.h          | 28 +++++++++++++++++++++++++++-
 fs/f2fs/segment.c       |  5 ++---
 include/linux/f2fs_fs.h |  1 +
 4 files changed, 33 insertions(+), 6 deletions(-)

Comments

Jaegeuk Kim March 7, 2024, 6:37 p.m. UTC | #1
On 03/07, Chao Yu wrote:
> Below race case can cause data corruption:
> 
> Thread A				GC thread
> - f2fs_inplace_write_data
> 					- gc_data_segment
> 					 - ra_data_block
> 					  - locked meta_inode page
>  - invalidate_mapping_pages
>  : fail to invalidate meta_inode page
>    due to lock failure or dirty|writeback
>    status

Wasn't the original data page locked in both cases?

>  - f2fs_submit_page_bio
>  : write last dirty data to old blkaddr
> 					 - move_data_block
> 					  - load old data from meta_inode page
> 					  - f2fs_submit_page_write
> 					  : write old data to new blkaddr
> 
> Because invalidate_mapping_pages() will skip invalidating page when the
> page has unclear status including locked, dirty, writeback and so on, so
> we need to use truncate_inode_pages_range() instead of
> invalidate_mapping_pages() to make sure meta_inode page will be dropped.
> 
> Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC")
> Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write")
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>  fs/f2fs/checkpoint.c    |  5 +++--
>  fs/f2fs/f2fs.h          | 28 +++++++++++++++++++++++++++-
>  fs/f2fs/segment.c       |  5 ++---
>  include/linux/f2fs_fs.h |  1 +
>  4 files changed, 33 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index a09a9609e228..55b7d2cf030f 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1598,8 +1598,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>  	 */
>  	if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
>  		f2fs_sb_has_compression(sbi))
> -		invalidate_mapping_pages(META_MAPPING(sbi),
> -				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
> +		f2fs_bug_on(sbi,
> +			invalidate_inode_pages2_range(META_MAPPING(sbi),
> +				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
>  
>  	f2fs_release_ino_entry(sbi, false);
>  
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4836e7cb0efe..9814e5981a6a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -4655,10 +4655,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
>  	return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
>  }
>  
> +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
> +					block_t blkaddr, unsigned int cnt)
> +{
> +	bool need_submit = false;
> +	int i = 0;
> +
> +	do {
> +		struct page *page;
> +
> +		page = find_get_page(META_MAPPING(sbi), blkaddr + i);
> +		if (page) {
> +			if (PageWriteback(page))
> +				need_submit = true;
> +			f2fs_put_page(page, 0);
> +		}
> +	} while (++i < cnt && !need_submit);
> +
> +	if (need_submit)
> +		f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
> +							NULL, 0, DATA);
> +
> +	truncate_inode_pages_range(META_MAPPING(sbi),
> +			F2FS_BLK_TO_BYTES((loff_t)blkaddr),
> +			F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
> +}
> +
>  static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
>  								block_t blkaddr)
>  {
> -	invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
> +	f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
>  	f2fs_invalidate_compress_page(sbi, blkaddr);
>  }
>  
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4ff3b2d14ddf..20af48d7f784 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3741,8 +3741,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
>  	}
>  
>  	if (fio->post_read)
> -		invalidate_mapping_pages(META_MAPPING(sbi),
> -				fio->new_blkaddr, fio->new_blkaddr);
> +		f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
>  
>  	stat_inc_inplace_blocks(fio->sbi);
>  
> @@ -3932,7 +3931,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
>  	for (i = 0; i < len; i++)
>  		f2fs_wait_on_block_writeback(inode, blkaddr + i);
>  
> -	invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
> +	f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
>  }
>  
>  static int read_compacted_summaries(struct f2fs_sb_info *sbi)
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index 755e9a41b196..a357287eac1e 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -27,6 +27,7 @@
>  
>  #define F2FS_BYTES_TO_BLK(bytes)	((bytes) >> F2FS_BLKSIZE_BITS)
>  #define F2FS_BLK_TO_BYTES(blk)		((blk) << F2FS_BLKSIZE_BITS)
> +#define F2FS_BLK_END_BYTES(blk)		(F2FS_BLK_TO_BYTES(blk + 1) - 1)
>  
>  /* 0, 1(node nid), 2(meta nid) are reserved node id */
>  #define F2FS_RESERVED_NODE_NUM		3
> -- 
> 2.40.1
Chao Yu March 8, 2024, 12:55 a.m. UTC | #2
On 2024/3/8 2:37, Jaegeuk Kim wrote:
> On 03/07, Chao Yu wrote:
>> Below race case can cause data corruption:
>>
>> Thread A				GC thread
>> - f2fs_inplace_write_data
>> 					- gc_data_segment
>> 					 - ra_data_block
>> 					  - locked meta_inode page
>>   - invalidate_mapping_pages
>>   : fail to invalidate meta_inode page
>>     due to lock failure or dirty|writeback
>>     status
> 
> Wasn't the original data page locked in both cases?

Oh, the race case needs to fixed as below:

Thread A				GC thread
					- gc_data_segment
					 - ra_data_block
					  - locked meta_inode page
- f2fs_inplace_write_data
  - invalidate_mapping_pages
  : fail to invalidate meta_inode page
    due to lock failure or dirty|writeback
    status
  - f2fs_submit_page_bio
  : write last dirty data to old blkaddr
					 - move_data_block
					  - load old data from meta_inode page
					  - f2fs_submit_page_write
					  : write old data to new blkaddr

There is a hole in between ra_data_block() and move_data_block(),
in where the data page is unlocked.

Thanks,

> 
>>   - f2fs_submit_page_bio
>>   : write last dirty data to old blkaddr
>> 					 - move_data_block
>> 					  - load old data from meta_inode page
>> 					  - f2fs_submit_page_write
>> 					  : write old data to new blkaddr
>>
>> Because invalidate_mapping_pages() will skip invalidating page when the
>> page has unclear status including locked, dirty, writeback and so on, so
>> we need to use truncate_inode_pages_range() instead of
>> invalidate_mapping_pages() to make sure meta_inode page will be dropped.
>>
>> Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC")
>> Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write")
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> ---
>>   fs/f2fs/checkpoint.c    |  5 +++--
>>   fs/f2fs/f2fs.h          | 28 +++++++++++++++++++++++++++-
>>   fs/f2fs/segment.c       |  5 ++---
>>   include/linux/f2fs_fs.h |  1 +
>>   4 files changed, 33 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index a09a9609e228..55b7d2cf030f 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -1598,8 +1598,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>   	 */
>>   	if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
>>   		f2fs_sb_has_compression(sbi))
>> -		invalidate_mapping_pages(META_MAPPING(sbi),
>> -				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
>> +		f2fs_bug_on(sbi,
>> +			invalidate_inode_pages2_range(META_MAPPING(sbi),
>> +				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
>>   
>>   	f2fs_release_ino_entry(sbi, false);
>>   
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 4836e7cb0efe..9814e5981a6a 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -4655,10 +4655,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
>>   	return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
>>   }
>>   
>> +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
>> +					block_t blkaddr, unsigned int cnt)
>> +{
>> +	bool need_submit = false;
>> +	int i = 0;
>> +
>> +	do {
>> +		struct page *page;
>> +
>> +		page = find_get_page(META_MAPPING(sbi), blkaddr + i);
>> +		if (page) {
>> +			if (PageWriteback(page))
>> +				need_submit = true;
>> +			f2fs_put_page(page, 0);
>> +		}
>> +	} while (++i < cnt && !need_submit);
>> +
>> +	if (need_submit)
>> +		f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
>> +							NULL, 0, DATA);
>> +
>> +	truncate_inode_pages_range(META_MAPPING(sbi),
>> +			F2FS_BLK_TO_BYTES((loff_t)blkaddr),
>> +			F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
>> +}
>> +
>>   static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
>>   								block_t blkaddr)
>>   {
>> -	invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
>> +	f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
>>   	f2fs_invalidate_compress_page(sbi, blkaddr);
>>   }
>>   
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 4ff3b2d14ddf..20af48d7f784 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -3741,8 +3741,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
>>   	}
>>   
>>   	if (fio->post_read)
>> -		invalidate_mapping_pages(META_MAPPING(sbi),
>> -				fio->new_blkaddr, fio->new_blkaddr);
>> +		f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
>>   
>>   	stat_inc_inplace_blocks(fio->sbi);
>>   
>> @@ -3932,7 +3931,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
>>   	for (i = 0; i < len; i++)
>>   		f2fs_wait_on_block_writeback(inode, blkaddr + i);
>>   
>> -	invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
>> +	f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
>>   }
>>   
>>   static int read_compacted_summaries(struct f2fs_sb_info *sbi)
>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>> index 755e9a41b196..a357287eac1e 100644
>> --- a/include/linux/f2fs_fs.h
>> +++ b/include/linux/f2fs_fs.h
>> @@ -27,6 +27,7 @@
>>   
>>   #define F2FS_BYTES_TO_BLK(bytes)	((bytes) >> F2FS_BLKSIZE_BITS)
>>   #define F2FS_BLK_TO_BYTES(blk)		((blk) << F2FS_BLKSIZE_BITS)
>> +#define F2FS_BLK_END_BYTES(blk)		(F2FS_BLK_TO_BYTES(blk + 1) - 1)
>>   
>>   /* 0, 1(node nid), 2(meta nid) are reserved node id */
>>   #define F2FS_RESERVED_NODE_NUM		3
>> -- 
>> 2.40.1
diff mbox series

Patch

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a09a9609e228..55b7d2cf030f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1598,8 +1598,9 @@  static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 */
 	if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
 		f2fs_sb_has_compression(sbi))
-		invalidate_mapping_pages(META_MAPPING(sbi),
-				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
+		f2fs_bug_on(sbi,
+			invalidate_inode_pages2_range(META_MAPPING(sbi),
+				MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
 
 	f2fs_release_ino_entry(sbi, false);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4836e7cb0efe..9814e5981a6a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4655,10 +4655,36 @@  static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
 	return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
 }
 
+static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
+					block_t blkaddr, unsigned int cnt)
+{
+	bool need_submit = false;
+	int i = 0;
+
+	do {
+		struct page *page;
+
+		page = find_get_page(META_MAPPING(sbi), blkaddr + i);
+		if (page) {
+			if (PageWriteback(page))
+				need_submit = true;
+			f2fs_put_page(page, 0);
+		}
+	} while (++i < cnt && !need_submit);
+
+	if (need_submit)
+		f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
+							NULL, 0, DATA);
+
+	truncate_inode_pages_range(META_MAPPING(sbi),
+			F2FS_BLK_TO_BYTES((loff_t)blkaddr),
+			F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
+}
+
 static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
 								block_t blkaddr)
 {
-	invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
+	f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
 	f2fs_invalidate_compress_page(sbi, blkaddr);
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4ff3b2d14ddf..20af48d7f784 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3741,8 +3741,7 @@  int f2fs_inplace_write_data(struct f2fs_io_info *fio)
 	}
 
 	if (fio->post_read)
-		invalidate_mapping_pages(META_MAPPING(sbi),
-				fio->new_blkaddr, fio->new_blkaddr);
+		f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
 
 	stat_inc_inplace_blocks(fio->sbi);
 
@@ -3932,7 +3931,7 @@  void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
 	for (i = 0; i < len; i++)
 		f2fs_wait_on_block_writeback(inode, blkaddr + i);
 
-	invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
+	f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
 }
 
 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 755e9a41b196..a357287eac1e 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -27,6 +27,7 @@ 
 
 #define F2FS_BYTES_TO_BLK(bytes)	((bytes) >> F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_TO_BYTES(blk)		((blk) << F2FS_BLKSIZE_BITS)
+#define F2FS_BLK_END_BYTES(blk)		(F2FS_BLK_TO_BYTES(blk + 1) - 1)
 
 /* 0, 1(node nid), 2(meta nid) are reserved node id */
 #define F2FS_RESERVED_NODE_NUM		3