diff mbox series

[v2] btrfs: inode: Don't compress if NODATASUM or NODATACOW set

Message ID 20190701051225.17957-1-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series [v2] btrfs: inode: Don't compress if NODATASUM or NODATACOW set | expand

Commit Message

Qu Wenruo July 1, 2019, 5:12 a.m. UTC
As btrfs(5) specified:

	Note
	If nodatacow or nodatasum are enabled, compression is disabled.

If NODATASUM or NODATACOW set, we should not compress the extent.

Normally NODATACOW is detected properly in run_delalloc_range() so
compression won't happen for NODATACOW.

However for NODATASUM we don't have any check, and it can cause
compressed extent without csum pretty easily, just by:
  mkfs.btrfs -f $dev
  mount $dev $mnt -o nodatasum
  touch $mnt/foobar
  mount -o remount,datasum,compress $mnt
  xfs_io -f -c "pwrite 0 128K" $mnt/foobar

And in fact, we have a bug report about corrupted compressed extent
without proper data checksum so even RAID1 can't recover the corruption.
(https://bugzilla.kernel.org/show_bug.cgi?id=199707)

Running compression without proper checksum could cause more damage when
corruption happens, as compressed data could make the whole extent
unreadable, so there is no need to allow compression for
NODATACSUM.

The fix will refactor the inode compression check into two parts:
- inode_can_compress()
  As the hard requirement, checked at btrfs_run_delalloc_range(), so no
  compression will happen for NODATASUM inode at all.

- inode_need_compress()
  As the soft requirement, checked at btrfs_run_delalloc_range() and
  compress_file_range().

Reported-by: James Harvey <jamespharvey20@gmail.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
Changelog:
v2:
- Refactor inode_need_compress() into two functions
- Refactor inode_need_compress() to return bool
---
 fs/btrfs/inode.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

Comments

David Sterba July 4, 2019, 4:04 p.m. UTC | #1
On Mon, Jul 01, 2019 at 05:12:46AM +0000, Qu Wenruo wrote:
> As btrfs(5) specified:
> 
> 	Note
> 	If nodatacow or nodatasum are enabled, compression is disabled.
> 
> If NODATASUM or NODATACOW set, we should not compress the extent.
> 
> Normally NODATACOW is detected properly in run_delalloc_range() so
> compression won't happen for NODATACOW.
> 
> However for NODATASUM we don't have any check, and it can cause
> compressed extent without csum pretty easily, just by:
>   mkfs.btrfs -f $dev
>   mount $dev $mnt -o nodatasum
>   touch $mnt/foobar
>   mount -o remount,datasum,compress $mnt
>   xfs_io -f -c "pwrite 0 128K" $mnt/foobar
> 
> And in fact, we have a bug report about corrupted compressed extent
> without proper data checksum so even RAID1 can't recover the corruption.
> (https://bugzilla.kernel.org/show_bug.cgi?id=199707)
> 
> Running compression without proper checksum could cause more damage when
> corruption happens, as compressed data could make the whole extent
> unreadable, so there is no need to allow compression for
> NODATACSUM.
> 
> The fix will refactor the inode compression check into two parts:
> - inode_can_compress()
>   As the hard requirement, checked at btrfs_run_delalloc_range(), so no
>   compression will happen for NODATASUM inode at all.
> 
> - inode_need_compress()
>   As the soft requirement, checked at btrfs_run_delalloc_range() and
>   compress_file_range().
> 
> Reported-by: James Harvey <jamespharvey20@gmail.com>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> Changelog:
> v2:
> - Refactor inode_need_compress() into two functions
> - Refactor inode_need_compress() to return bool
> ---
>  fs/btrfs/inode.c | 38 ++++++++++++++++++++++++++++++++------
>  1 file changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a2aabdb85226..be1cabf35680 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -394,24 +394,49 @@ static noinline int add_async_extent(struct async_chunk *cow,
>  	return 0;
>  }
>  
> -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
> +/*
> + * Check if the inode can accept compression.
> + *
> + * This checks for the hard requirement of compression, including CoW and
> + * checksum requirement.
> + */
> +static inline bool inode_can_compress(struct inode *inode)
> +{
> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
> +	    BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
> +		return false;
> +	return true;
> +}
> +
> +/*
> + * Check if the inode need compression.
> + *
> + * This checks for the soft requirement of compression.
> + */
> +static inline bool inode_need_compress(struct inode *inode, u64 start, u64 end)
>  {
>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>  
> +	if (!inode_can_compress(inode)) {
> +		WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
> +			KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
> +			btrfs_ino(BTRFS_I(inode)));
> +		return false;
> +	}
>  	/* force compress */
>  	if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
> -		return 1;
> +		return true;
>  	/* defrag ioctl */
>  	if (BTRFS_I(inode)->defrag_compress)
> -		return 1;
> +		return true;
>  	/* bad compression ratios */
>  	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
> -		return 0;
> +		return false;
>  	if (btrfs_test_opt(fs_info, COMPRESS) ||
>  	    BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
>  	    BTRFS_I(inode)->prop_compress)
>  		return btrfs_compress_heuristic(inode, start, end);
> -	return 0;
> +	return false;
>  }
>  
>  static inline void inode_should_defrag(struct btrfs_inode *inode,
> @@ -1630,7 +1655,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
>  	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
>  					 page_started, 0, nr_written);
> -	} else if (!inode_need_compress(inode, start, end)) {
> +	} else if (!inode_can_compress(inode) ||
> +		   !inode_need_compress(inode, start, end)) {

Well, that's not excatly what I expected, but because this is an
important fix I won't object now and add it to 5.3 queue.
Qu Wenruo July 4, 2019, 11:51 p.m. UTC | #2
On 2019/7/5 上午12:04, David Sterba wrote:
> On Mon, Jul 01, 2019 at 05:12:46AM +0000, Qu Wenruo wrote:
>> As btrfs(5) specified:
>>
>> 	Note
>> 	If nodatacow or nodatasum are enabled, compression is disabled.
>>
>> If NODATASUM or NODATACOW set, we should not compress the extent.
>>
>> Normally NODATACOW is detected properly in run_delalloc_range() so
>> compression won't happen for NODATACOW.
>>
>> However for NODATASUM we don't have any check, and it can cause
>> compressed extent without csum pretty easily, just by:
>>   mkfs.btrfs -f $dev
>>   mount $dev $mnt -o nodatasum
>>   touch $mnt/foobar
>>   mount -o remount,datasum,compress $mnt
>>   xfs_io -f -c "pwrite 0 128K" $mnt/foobar
>>
>> And in fact, we have a bug report about corrupted compressed extent
>> without proper data checksum so even RAID1 can't recover the corruption.
>> (https://bugzilla.kernel.org/show_bug.cgi?id=199707)
>>
>> Running compression without proper checksum could cause more damage when
>> corruption happens, as compressed data could make the whole extent
>> unreadable, so there is no need to allow compression for
>> NODATACSUM.
>>
>> The fix will refactor the inode compression check into two parts:
>> - inode_can_compress()
>>   As the hard requirement, checked at btrfs_run_delalloc_range(), so no
>>   compression will happen for NODATASUM inode at all.
>>
>> - inode_need_compress()
>>   As the soft requirement, checked at btrfs_run_delalloc_range() and
>>   compress_file_range().
>>
>> Reported-by: James Harvey <jamespharvey20@gmail.com>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>> Changelog:
>> v2:
>> - Refactor inode_need_compress() into two functions
>> - Refactor inode_need_compress() to return bool
>> ---
>>  fs/btrfs/inode.c | 38 ++++++++++++++++++++++++++++++++------
>>  1 file changed, 32 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> index a2aabdb85226..be1cabf35680 100644
>> --- a/fs/btrfs/inode.c
>> +++ b/fs/btrfs/inode.c
>> @@ -394,24 +394,49 @@ static noinline int add_async_extent(struct async_chunk *cow,
>>  	return 0;
>>  }
>>  
>> -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
>> +/*
>> + * Check if the inode can accept compression.
>> + *
>> + * This checks for the hard requirement of compression, including CoW and
>> + * checksum requirement.
>> + */
>> +static inline bool inode_can_compress(struct inode *inode)
>> +{
>> +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
>> +	    BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
>> +		return false;
>> +	return true;
>> +}
>> +
>> +/*
>> + * Check if the inode need compression.
>> + *
>> + * This checks for the soft requirement of compression.
>> + */
>> +static inline bool inode_need_compress(struct inode *inode, u64 start, u64 end)
>>  {
>>  	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>>  
>> +	if (!inode_can_compress(inode)) {
>> +		WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
>> +			KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
>> +			btrfs_ino(BTRFS_I(inode)));
>> +		return false;
>> +	}
>>  	/* force compress */
>>  	if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
>> -		return 1;
>> +		return true;
>>  	/* defrag ioctl */
>>  	if (BTRFS_I(inode)->defrag_compress)
>> -		return 1;
>> +		return true;
>>  	/* bad compression ratios */
>>  	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
>> -		return 0;
>> +		return false;
>>  	if (btrfs_test_opt(fs_info, COMPRESS) ||
>>  	    BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
>>  	    BTRFS_I(inode)->prop_compress)
>>  		return btrfs_compress_heuristic(inode, start, end);
>> -	return 0;
>> +	return false;
>>  }
>>  
>>  static inline void inode_should_defrag(struct btrfs_inode *inode,
>> @@ -1630,7 +1655,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
>>  	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
>>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
>>  					 page_started, 0, nr_written);
>> -	} else if (!inode_need_compress(inode, start, end)) {
>> +	} else if (!inode_can_compress(inode) ||
>> +		   !inode_need_compress(inode, start, end)) {
> 
> Well, that's not excatly what I expected, but because this is an
> important fix I won't object now and add it to 5.3 queue.
> 

I know what you expect, single inode_can_compress().

But still, we want to avoid hitting the compression routine, thus here
we do extra inode_need_compress() check other than exiting in
compress_file_extent().

Thanks,
Qu
David Sterba July 5, 2019, 4:38 p.m. UTC | #3
On Thu, Jul 04, 2019 at 11:51:38PM +0000, WenRuo Qu wrote:
> >> @@ -1630,7 +1655,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
> >>  	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
> >>  		ret = run_delalloc_nocow(inode, locked_page, start, end,
> >>  					 page_started, 0, nr_written);
> >> -	} else if (!inode_need_compress(inode, start, end)) {
> >> +	} else if (!inode_can_compress(inode) ||
> >> +		   !inode_need_compress(inode, start, end)) {
> > 
> > Well, that's not excatly what I expected, but because this is an
> > important fix I won't object now and add it to 5.3 queue.
> > 
> 
> I know what you expect, single inode_can_compress().

Yeah, because need_compress calls the heuristic, and then again inside
compress_file_range. I found a few problems in the compression decision
logic that will address that but for now that's how things have been so
the fix is not making it worse.

> But still, we want to avoid hitting the compression routine, thus here
> we do extra inode_need_compress() check other than exiting in
> compress_file_extent().

That's right, some of the compression decisions can be made out of
compress_file_range, like NOCOMPRESS. Even the heuristics can let it
skip compression completely but when this does not reach
compress_file_range the NOCOMPRESS flag has no chance to be set.
And there are more issues.
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a2aabdb85226..be1cabf35680 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -394,24 +394,49 @@  static noinline int add_async_extent(struct async_chunk *cow,
 	return 0;
 }
 
-static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
+/*
+ * Check if the inode can accept compression.
+ *
+ * This checks for the hard requirement of compression, including CoW and
+ * checksum requirement.
+ */
+static inline bool inode_can_compress(struct inode *inode)
+{
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
+	    BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+		return false;
+	return true;
+}
+
+/*
+ * Check if the inode need compression.
+ *
+ * This checks for the soft requirement of compression.
+ */
+static inline bool inode_need_compress(struct inode *inode, u64 start, u64 end)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 
+	if (!inode_can_compress(inode)) {
+		WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+			KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
+			btrfs_ino(BTRFS_I(inode)));
+		return false;
+	}
 	/* force compress */
 	if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
-		return 1;
+		return true;
 	/* defrag ioctl */
 	if (BTRFS_I(inode)->defrag_compress)
-		return 1;
+		return true;
 	/* bad compression ratios */
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
-		return 0;
+		return false;
 	if (btrfs_test_opt(fs_info, COMPRESS) ||
 	    BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
 	    BTRFS_I(inode)->prop_compress)
 		return btrfs_compress_heuristic(inode, start, end);
-	return 0;
+	return false;
 }
 
 static inline void inode_should_defrag(struct btrfs_inode *inode,
@@ -1630,7 +1655,8 @@  int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
 	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 0, nr_written);
-	} else if (!inode_need_compress(inode, start, end)) {
+	} else if (!inode_can_compress(inode) ||
+		   !inode_need_compress(inode, start, end)) {
 		ret = cow_file_range(inode, locked_page, start, end, end,
 				      page_started, nr_written, 1, NULL);
 	} else {