diff mbox series

[1/7] btrfs: Don't call readpage_end_io_hook for the btree inode

Message ID 20200918133439.23187-2-nborisov@suse.com (mailing list archive)
State New, archived
Headers show
Series Remove struct extent_io_ops | expand

Commit Message

Nikolay Borisov Sept. 18, 2020, 1:34 p.m. UTC
Instead of relying on indirect calls to implement metadata buffer
validation simply check if the inode whose page we are processing equals
the btree inode. If it does call the necessary function.

This is an improvement in 2 directions:
1. We aren't paying the penalty of indirect calls in a post-speculation
   attacks world.

2. The function is now named more explicitly so it's obvious what's
   going on

This is in preparation to removing struct extent_io_ops altogether.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/ctree.h     | 2 ++
 fs/btrfs/disk-io.c   | 8 ++++----
 fs/btrfs/disk-io.h   | 4 +++-
 fs/btrfs/extent_io.c | 9 ++++++---
 fs/btrfs/inode.c     | 7 +++----
 5 files changed, 18 insertions(+), 12 deletions(-)

Comments

Nikolay Borisov Sept. 18, 2020, 1:41 p.m. UTC | #1
On 18.09.20 г. 16:34 ч., Nikolay Borisov wrote:
> Instead of relying on indirect calls to implement metadata buffer
> validation simply check if the inode whose page we are processing equals
> the btree inode. If it does call the necessary function.
> 
> This is an improvement in 2 directions:
> 1. We aren't paying the penalty of indirect calls in a post-speculation
>    attacks world.
> 
> 2. The function is now named more explicitly so it's obvious what's
>    going on
> 
> This is in preparation to removing struct extent_io_ops altogether.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---

So this patch does a bit more than it states because it's also modifying
the readpage_end_io_hook for data nodes as well. Other than that the
code is correct. I'd reword the changelog to the following:

Subject: Call readpage_end_io_hook directly

"
Instead of relying on indirect calls to implement post-read processing
simply distinguish between data/metadata pages and call the
corresponding function. This patch also renames and exports the 2 hooks
giving them more clear names.

This is an improvement in 2 directions:
1. We aren't paying the penalty of indirect calls in a post-speculation
   attacks world.

2. The function is now named more explicitly so it's obvious what's
    going on

 This is in preparation to removing struct extent_io_ops altogether.
"

>  fs/btrfs/ctree.h     | 2 ++
>  fs/btrfs/disk-io.c   | 8 ++++----
>  fs/btrfs/disk-io.h   | 4 +++-
>  fs/btrfs/extent_io.c | 9 ++++++---
>  fs/btrfs/inode.c     | 7 +++----
>  5 files changed, 18 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 4e667b0565e0..0c58d96b9fb3 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -2962,6 +2962,8 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode,
>  u64 btrfs_file_extent_end(const struct btrfs_path *path);
>  
>  /* inode.c */
> +int btrfs_check_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
> +		     struct page *page, u64 start, u64 end, int mirror);
>  struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
>  					   u64 start, u64 len);
>  noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 160b485d2cc0..5ad11c38230f 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -524,9 +524,9 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
>  	return 1;
>  }
>  
> -static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
> -				      u64 phy_offset, struct page *page,
> -				      u64 start, u64 end, int mirror)
> +int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
> +				   struct page *page, u64 start, u64 end,
> +				   int mirror)
>  {
>  	u64 found_start;
>  	int found_level;
> @@ -4639,5 +4639,5 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
>  static const struct extent_io_ops btree_extent_io_ops = {
>  	/* mandatory callbacks */
>  	.submit_bio_hook = btree_submit_bio_hook,
> -	.readpage_end_io_hook = btree_readpage_end_io_hook,
> +	.readpage_end_io_hook = NULL
>  };
> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
> index 89b6a709a184..bc2e49246199 100644
> --- a/fs/btrfs/disk-io.h
> +++ b/fs/btrfs/disk-io.h
> @@ -76,7 +76,9 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
>  void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
>  void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
>  				 struct btrfs_root *root);
> -
> +int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
> +				   struct page *page, u64 start, u64 end,
> +				   int mirror);
>  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
>  struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
>  #endif
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index afac70ef0cc5..5e47606f7786 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2851,9 +2851,12 @@ static void end_bio_extent_readpage(struct bio *bio)
>  
>  		mirror = io_bio->mirror_num;
>  		if (likely(uptodate)) {
> -			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
> -							      page, start, end,
> -							      mirror);
> +			if (data_inode)
> +				ret = btrfs_check_csum(io_bio, offset, page,
> +						       start, end, mirror);
> +			else
> +				ret = btrfs_validate_metadata_buffer(io_bio,
> +					offset, page, start, end, mirror);
>  			if (ret)
>  				uptodate = 0;
>  			else
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index cb3fdd0798c6..23ac09aa813e 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2817,9 +2817,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
>   * if there's a match, we allow the bio to finish.  If not, the code in
>   * extent_io.c will try to find good copies for us.
>   */
> -static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
> -				      u64 phy_offset, struct page *page,
> -				      u64 start, u64 end, int mirror)
> +int btrfs_check_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
> +		     struct page *page, u64 start, u64 end, int mirror)
>  {
>  	size_t offset = start - page_offset(page);
>  	struct inode *inode = page->mapping->host;
> @@ -10249,7 +10248,7 @@ static const struct file_operations btrfs_dir_file_operations = {
>  static const struct extent_io_ops btrfs_extent_io_ops = {
>  	/* mandatory callbacks */
>  	.submit_bio_hook = btrfs_submit_bio_hook,
> -	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
> +	.readpage_end_io_hook = NULL
>  };
>  
>  /*
>
Johannes Thumshirn Sept. 21, 2020, 2:54 p.m. UTC | #2
On 18/09/2020 15:34, Nikolay Borisov wrote:
>  static const struct extent_io_ops btree_extent_io_ops = {
>  	/* mandatory callbacks */
>  	.submit_bio_hook = btree_submit_bio_hook,
> -	.readpage_end_io_hook = btree_readpage_end_io_hook,
> +	.readpage_end_io_hook = NULL

[...]

> @@ -10249,7 +10248,7 @@ static const struct file_operations btrfs_dir_file_operations = {
>  static const struct extent_io_ops btrfs_extent_io_ops = {
>  	/* mandatory callbacks */
>  	.submit_bio_hook = btrfs_submit_bio_hook,
> -	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
> +	.readpage_end_io_hook = NULL

After your patch both implementations of 
extent_io_ops->readpage_end_io_hook() are gone, why don't you kill the
definition from struct extent_io_ops as well?
David Sterba Sept. 21, 2020, 5:45 p.m. UTC | #3
On Fri, Sep 18, 2020 at 04:34:33PM +0300, Nikolay Borisov wrote:
> Instead of relying on indirect calls to implement metadata buffer
> validation simply check if the inode whose page we are processing equals
> the btree inode. If it does call the necessary function.
> 
> This is an improvement in 2 directions:
> 1. We aren't paying the penalty of indirect calls in a post-speculation
>    attacks world.
> 
> 2. The function is now named more explicitly so it's obvious what's
>    going on

The new naming is not making things clear, btrfs_check_csum sounds very
generic while it does a very specific thing just by the number and type
of the parameters. Similar for btrfs_validate_metadata_buffer.

> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2851,9 +2851,12 @@ static void end_bio_extent_readpage(struct bio *bio)
>  
>  		mirror = io_bio->mirror_num;
>  		if (likely(uptodate)) {
> -			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
> -							      page, start, end,
> -							      mirror);
> +			if (data_inode)
> +				ret = btrfs_check_csum(io_bio, offset, page,
> +						       start, end, mirror);
> +			else
> +				ret = btrfs_validate_metadata_buffer(io_bio,
> +					offset, page, start, end, mirror);

In the context where the functions are used I'd expect some symmetry,
data/metadata. Something like btrfs_validate_data_bio.
Nikolay Borisov Sept. 23, 2020, 6:29 a.m. UTC | #4
On 21.09.20 г. 20:45 ч., David Sterba wrote:
> On Fri, Sep 18, 2020 at 04:34:33PM +0300, Nikolay Borisov wrote:
>> Instead of relying on indirect calls to implement metadata buffer
>> validation simply check if the inode whose page we are processing equals
>> the btree inode. If it does call the necessary function.
>>
>> This is an improvement in 2 directions:
>> 1. We aren't paying the penalty of indirect calls in a post-speculation
>>    attacks world.
>>
>> 2. The function is now named more explicitly so it's obvious what's
>>    going on
> 
> The new naming is not making things clear, btrfs_check_csum sounds very
> generic while it does a very specific thing just by the number and type
> of the parameters. Similar for btrfs_validate_metadata_buffer.
> 
>> --- a/fs/btrfs/extent_io.c
>> +++ b/fs/btrfs/extent_io.c
>> @@ -2851,9 +2851,12 @@ static void end_bio_extent_readpage(struct bio *bio)
>>  
>>  		mirror = io_bio->mirror_num;
>>  		if (likely(uptodate)) {
>> -			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
>> -							      page, start, end,
>> -							      mirror);
>> +			if (data_inode)
>> +				ret = btrfs_check_csum(io_bio, offset, page,
>> +						       start, end, mirror);
>> +			else
>> +				ret = btrfs_validate_metadata_buffer(io_bio,
>> +					offset, page, start, end, mirror);
> 
> In the context where the functions are used I'd expect some symmetry,
> data/metadata. Something like btrfs_validate_data_bio.
> 

The reason for this naming is that btrfs_vlidate_metadata_buffer
actually validates as in "tree-checker style validation" of the extent
buffer not simply calculating the checksum. So to me it feels like a
more complete,heavyweight operations hence "validating", whlist
btrfs_check_csum just checks the csum of a single sector/blocksize in
the bio. I think the metadata function's name conveys what it's doing in
full:

1. It's doing validation as per aforementioned explanation
2. It's doing it for a whole extent buffer and not just a chunk of it.

I agree that the data function's name is somewhat generic, perhahps it
could be renamed so that it points to the fact it's validating a single
sector/blocksize? I.e btrfs_check_ blocksize_csum or something like that ?
David Sterba Sept. 23, 2020, 2:10 p.m. UTC | #5
On Wed, Sep 23, 2020 at 09:29:00AM +0300, Nikolay Borisov wrote:
> 
> 
> On 21.09.20 г. 20:45 ч., David Sterba wrote:
> > On Fri, Sep 18, 2020 at 04:34:33PM +0300, Nikolay Borisov wrote:
> >> Instead of relying on indirect calls to implement metadata buffer
> >> validation simply check if the inode whose page we are processing equals
> >> the btree inode. If it does call the necessary function.
> >>
> >> This is an improvement in 2 directions:
> >> 1. We aren't paying the penalty of indirect calls in a post-speculation
> >>    attacks world.
> >>
> >> 2. The function is now named more explicitly so it's obvious what's
> >>    going on
> > 
> > The new naming is not making things clear, btrfs_check_csum sounds very
> > generic while it does a very specific thing just by the number and type
> > of the parameters. Similar for btrfs_validate_metadata_buffer.
> > 
> >> --- a/fs/btrfs/extent_io.c
> >> +++ b/fs/btrfs/extent_io.c
> >> @@ -2851,9 +2851,12 @@ static void end_bio_extent_readpage(struct bio *bio)
> >>  
> >>  		mirror = io_bio->mirror_num;
> >>  		if (likely(uptodate)) {
> >> -			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
> >> -							      page, start, end,
> >> -							      mirror);
> >> +			if (data_inode)
> >> +				ret = btrfs_check_csum(io_bio, offset, page,
> >> +						       start, end, mirror);
> >> +			else
> >> +				ret = btrfs_validate_metadata_buffer(io_bio,
> >> +					offset, page, start, end, mirror);
> > 
> > In the context where the functions are used I'd expect some symmetry,
> > data/metadata. Something like btrfs_validate_data_bio.
> > 
> 
> The reason for this naming is that btrfs_vlidate_metadata_buffer
> actually validates as in "tree-checker style validation" of the extent
> buffer not simply calculating the checksum. So to me it feels like a
> more complete,heavyweight operations hence "validating", whlist
> btrfs_check_csum just checks the csum of a single sector/blocksize in
> the bio. I think the metadata function's name conveys what it's doing in
> full:
> 
> 1. It's doing validation as per aforementioned explanation
> 2. It's doing it for a whole extent buffer and not just a chunk of it.

No problem with the metadata function name, I agree with the reasoning
above.

> I agree that the data function's name is somewhat generic, perhahps it
> could be renamed so that it points to the fact it's validating a single
> sector/blocksize? I.e btrfs_check_ blocksize_csum or something like that ?

Yeah, that the data have a simpler validation maybe does not deserve to
be called like that. We should not use 'sector' here as bios use that
too. So btrfs_check_data_block_csum or btrfs_check_block_csum?
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4e667b0565e0..0c58d96b9fb3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2962,6 +2962,8 @@  void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode,
 u64 btrfs_file_extent_end(const struct btrfs_path *path);
 
 /* inode.c */
+int btrfs_check_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
+		     struct page *page, u64 start, u64 end, int mirror);
 struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
 					   u64 start, u64 len);
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 160b485d2cc0..5ad11c38230f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -524,9 +524,9 @@  static int check_tree_block_fsid(struct extent_buffer *eb)
 	return 1;
 }
 
-static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
-				      u64 phy_offset, struct page *page,
-				      u64 start, u64 end, int mirror)
+int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
+				   struct page *page, u64 start, u64 end,
+				   int mirror)
 {
 	u64 found_start;
 	int found_level;
@@ -4639,5 +4639,5 @@  static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
 static const struct extent_io_ops btree_extent_io_ops = {
 	/* mandatory callbacks */
 	.submit_bio_hook = btree_submit_bio_hook,
-	.readpage_end_io_hook = btree_readpage_end_io_hook,
+	.readpage_end_io_hook = NULL
 };
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 89b6a709a184..bc2e49246199 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -76,7 +76,9 @@  void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
 void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
 				 struct btrfs_root *root);
-
+int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
+				   struct page *page, u64 start, u64 end,
+				   int mirror);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
 #endif
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index afac70ef0cc5..5e47606f7786 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2851,9 +2851,12 @@  static void end_bio_extent_readpage(struct bio *bio)
 
 		mirror = io_bio->mirror_num;
 		if (likely(uptodate)) {
-			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
-							      page, start, end,
-							      mirror);
+			if (data_inode)
+				ret = btrfs_check_csum(io_bio, offset, page,
+						       start, end, mirror);
+			else
+				ret = btrfs_validate_metadata_buffer(io_bio,
+					offset, page, start, end, mirror);
 			if (ret)
 				uptodate = 0;
 			else
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cb3fdd0798c6..23ac09aa813e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2817,9 +2817,8 @@  static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
  * if there's a match, we allow the bio to finish.  If not, the code in
  * extent_io.c will try to find good copies for us.
  */
-static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
-				      u64 phy_offset, struct page *page,
-				      u64 start, u64 end, int mirror)
+int btrfs_check_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
+		     struct page *page, u64 start, u64 end, int mirror)
 {
 	size_t offset = start - page_offset(page);
 	struct inode *inode = page->mapping->host;
@@ -10249,7 +10248,7 @@  static const struct file_operations btrfs_dir_file_operations = {
 static const struct extent_io_ops btrfs_extent_io_ops = {
 	/* mandatory callbacks */
 	.submit_bio_hook = btrfs_submit_bio_hook,
-	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+	.readpage_end_io_hook = NULL
 };
 
 /*