btrfs: Exploit the fact pages passed to extent_readpages are always contiguous
diff mbox series

Message ID 20190311075538.19242-1-nborisov@suse.com
State New
Headers show
Series
  • btrfs: Exploit the fact pages passed to extent_readpages are always contiguous
Related show

Commit Message

Nikolay Borisov March 11, 2019, 7:55 a.m. UTC
Currently extent_readpages (called from btrfs_redpages) will always call
__extent_readpages which tries to create contiguous range of pages and
call __do_contiguous_readpages when such contiguous range is created.

It turns out this is unnecessary due to the fact that generic VFS code
always calls filesystem's ->readpages callback (btrfs_readpages in
this case) with already contiguous pages. Armed with this knowledge it's
possible to simplify extent_readpages by eliminating the call to
__extent_readpages and directly calling contiguous_readpages. The only
edge case that needs to be handled is when add_to_page_cache_lru
fails. This is easy as all that is needed is to submit whatever is the
number of pages successfully added to the lru.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---

I've been running with this patch for the past 3 months and haven't encountered 
any issues with it.
 fs/btrfs/extent_io.c | 58 +++++++++++---------------------------------
 1 file changed, 14 insertions(+), 44 deletions(-)

Comments

David Sterba March 13, 2019, 5:27 p.m. UTC | #1
On Mon, Mar 11, 2019 at 09:55:38AM +0200, Nikolay Borisov wrote:
> Currently extent_readpages (called from btrfs_redpages) will always call
> __extent_readpages which tries to create contiguous range of pages and
> call __do_contiguous_readpages when such contiguous range is created.
> 
> It turns out this is unnecessary due to the fact that generic VFS code
> always calls filesystem's ->readpages callback (btrfs_readpages in
> this case) with already contiguous pages. Armed with this knowledge it's
> possible to simplify extent_readpages by eliminating the call to
> __extent_readpages and directly calling contiguous_readpages. The only
> edge case that needs to be handled is when add_to_page_cache_lru
> fails. This is easy as all that is needed is to submit whatever is the
> number of pages successfully added to the lru.

I'd like to have more details why this is correct. Submitting what we
have so far seems ok, the reasons why add_to_page_cache_lru are unclear
and go back to xarray.

Possible error is EEXSIT, here it's clear that we don't need to read the
pages (already there). A sequence of such pages will make it wildly hop
from 1st for cycle iteration, to the if (nr) check and back.

Previously this looped still in the for-cycle, which was easier to
follow but otherwise not due to __extent_readpages. So I think it's an
improvement in the end, though a few comments would be good there.

Reviewed-by: David Sterba <dsterba@suse.com>

> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---
> 
> I've been running with this patch for the past 3 months and haven't encountered 
> any issues with it.
>  fs/btrfs/extent_io.c | 58 +++++++++++---------------------------------
>  1 file changed, 14 insertions(+), 44 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index b20700ad8752..551dd21d7351 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3066,7 +3066,7 @@ static int __do_readpage(struct extent_io_tree *tree,
>  	return ret;
>  }
>  
> -static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
> +static inline void contiguous_readpages(struct extent_io_tree *tree,
>  					     struct page *pages[], int nr_pages,
>  					     u64 start, u64 end,
>  					     struct extent_map **em_cached,
> @@ -3097,46 +3097,6 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
>  	}
>  }
>  
> -static void __extent_readpages(struct extent_io_tree *tree,
> -			       struct page *pages[],
> -			       int nr_pages,
> -			       struct extent_map **em_cached,
> -			       struct bio **bio, unsigned long *bio_flags,
> -			       u64 *prev_em_start)
> -{
> -	u64 start = 0;
> -	u64 end = 0;
> -	u64 page_start;
> -	int index;
> -	int first_index = 0;
> -
> -	for (index = 0; index < nr_pages; index++) {
> -		page_start = page_offset(pages[index]);
> -		if (!end) {
> -			start = page_start;
> -			end = start + PAGE_SIZE - 1;
> -			first_index = index;
> -		} else if (end + 1 == page_start) {
> -			end += PAGE_SIZE;
> -		} else {
> -			__do_contiguous_readpages(tree, &pages[first_index],
> -						  index - first_index, start,
> -						  end, em_cached,
> -						  bio, bio_flags,
> -						  prev_em_start);
> -			start = page_start;
> -			end = start + PAGE_SIZE - 1;
> -			first_index = index;
> -		}
> -	}
> -
> -	if (end)
> -		__do_contiguous_readpages(tree, &pages[first_index],
> -					  index - first_index, start,
> -					  end, em_cached, bio,
> -					  bio_flags, prev_em_start);
> -}
> -
>  static int __extent_read_full_page(struct extent_io_tree *tree,
>  				   struct page *page,
>  				   get_extent_t *get_extent,
> @@ -4098,6 +4058,8 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
>  	u64 prev_em_start = (u64)-1;
>  
>  	while (!list_empty(pages)) {
> +		u64 contig_end = 0;
> +
>  		for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
>  			struct page *page = lru_to_page(pages);
>  
> @@ -4106,14 +4068,22 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
>  			if (add_to_page_cache_lru(page, mapping, page->index,
>  						readahead_gfp_mask(mapping))) {
>  				put_page(page);
> -				continue;
> +				break;
>  			}
>  
>  			pagepool[nr++] = page;
> +			contig_end = page_offset(page) + PAGE_SIZE - 1;
>  		}
>  
> -		__extent_readpages(tree, pagepool, nr, &em_cached, &bio,
> -				   &bio_flags, &prev_em_start);
> +		if (nr) {
> +			u64 contig_start = page_offset(pagepool[0]);
> +
> +			ASSERT(contig_start + (nr*PAGE_SIZE) - 1 == contig_end);
> +
> +			contiguous_readpages(tree, pagepool, nr, contig_start,
> +				     contig_end, &em_cached, &bio, &bio_flags,
> +				     &prev_em_start);
> +		}
>  	}
>  
>  	if (em_cached)
> -- 
> 2.17.1
Nikolay Borisov March 14, 2019, 7:17 a.m. UTC | #2
On 13.03.19 г. 19:27 ч., David Sterba wrote:
> On Mon, Mar 11, 2019 at 09:55:38AM +0200, Nikolay Borisov wrote:
>> Currently extent_readpages (called from btrfs_redpages) will always call
>> __extent_readpages which tries to create contiguous range of pages and
>> call __do_contiguous_readpages when such contiguous range is created.
>>
>> It turns out this is unnecessary due to the fact that generic VFS code
>> always calls filesystem's ->readpages callback (btrfs_readpages in
>> this case) with already contiguous pages. Armed with this knowledge it's
>> possible to simplify extent_readpages by eliminating the call to
>> __extent_readpages and directly calling contiguous_readpages. The only
>> edge case that needs to be handled is when add_to_page_cache_lru
>> fails. This is easy as all that is needed is to submit whatever is the
>> number of pages successfully added to the lru.
> 
> I'd like to have more details why this is correct. Submitting what we
> have so far seems ok, the reasons why add_to_page_cache_lru are unclear
> and go back to xarray.

I'm having a hard time parsing the sentence after the comma, could you
rephrase that?

> 
> Possible error is EEXSIT, here it's clear that we don't need to read the

Not only EEXIST, the code can also fail due to memory pressure from
mem_cgroup_try_charge in __add_to_page_cache_locked. In fact this code
was triggered by some of the xfstests I can't remember which one hence I
added the if (nr) check.

> pages (already there). A sequence of such pages will make it wildly hop
> from 1st for cycle iteration, to the if (nr) check and back.
> 
> Previously this looped still in the for-cycle, which was easier to
> follow but otherwise not due to __extent_readpages. So I think it's an
> improvement in the end, though a few comments would be good there.
> 
> Reviewed-by: David Sterba <dsterba@suse.com>
> 

<snip>
David Sterba March 25, 2019, 4:41 p.m. UTC | #3
On Thu, Mar 14, 2019 at 09:17:14AM +0200, Nikolay Borisov wrote:
> 
> 
> On 13.03.19 г. 19:27 ч., David Sterba wrote:
> > On Mon, Mar 11, 2019 at 09:55:38AM +0200, Nikolay Borisov wrote:
> >> Currently extent_readpages (called from btrfs_redpages) will always call
> >> __extent_readpages which tries to create contiguous range of pages and
> >> call __do_contiguous_readpages when such contiguous range is created.
> >>
> >> It turns out this is unnecessary due to the fact that generic VFS code
> >> always calls filesystem's ->readpages callback (btrfs_readpages in
> >> this case) with already contiguous pages. Armed with this knowledge it's
> >> possible to simplify extent_readpages by eliminating the call to
> >> __extent_readpages and directly calling contiguous_readpages. The only
> >> edge case that needs to be handled is when add_to_page_cache_lru
> >> fails. This is easy as all that is needed is to submit whatever is the
> >> number of pages successfully added to the lru.
> > 
> > I'd like to have more details why this is correct. Submitting what we
> > have so far seems ok, the reasons why add_to_page_cache_lru are unclear
> > and go back to xarray.
> 
> I'm having a hard time parsing the sentence after the comma, could you
> rephrase that?

The errors that can be returned from add_to_page_cache_lru are deep
inside the MM code, I traced it to the xarray helpers that now manage
the page cache.

> > 
> > Possible error is EEXSIT, here it's clear that we don't need to read the
> 
> Not only EEXIST, the code can also fail due to memory pressure from
> mem_cgroup_try_charge in __add_to_page_cache_locked. In fact this code
> was triggered by some of the xfstests I can't remember which one hence I
> added the if (nr) check.

My concerns were about errors that could affect logic of the readpages,
for the hard errors like ENOMEM we can't do much. 

> > pages (already there). A sequence of such pages will make it wildly hop
> > from 1st for cycle iteration, to the if (nr) check and back.
> > 
> > Previously this looped still in the for-cycle, which was easier to
> > follow but otherwise not due to __extent_readpages. So I think it's an
> > improvement in the end, though a few comments would be good there.
> > 
> > Reviewed-by: David Sterba <dsterba@suse.com>
> > 
> 
> <snip>

Patch
diff mbox series

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b20700ad8752..551dd21d7351 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3066,7 +3066,7 @@  static int __do_readpage(struct extent_io_tree *tree,
 	return ret;
 }
 
-static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+static inline void contiguous_readpages(struct extent_io_tree *tree,
 					     struct page *pages[], int nr_pages,
 					     u64 start, u64 end,
 					     struct extent_map **em_cached,
@@ -3097,46 +3097,6 @@  static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 	}
 }
 
-static void __extent_readpages(struct extent_io_tree *tree,
-			       struct page *pages[],
-			       int nr_pages,
-			       struct extent_map **em_cached,
-			       struct bio **bio, unsigned long *bio_flags,
-			       u64 *prev_em_start)
-{
-	u64 start = 0;
-	u64 end = 0;
-	u64 page_start;
-	int index;
-	int first_index = 0;
-
-	for (index = 0; index < nr_pages; index++) {
-		page_start = page_offset(pages[index]);
-		if (!end) {
-			start = page_start;
-			end = start + PAGE_SIZE - 1;
-			first_index = index;
-		} else if (end + 1 == page_start) {
-			end += PAGE_SIZE;
-		} else {
-			__do_contiguous_readpages(tree, &pages[first_index],
-						  index - first_index, start,
-						  end, em_cached,
-						  bio, bio_flags,
-						  prev_em_start);
-			start = page_start;
-			end = start + PAGE_SIZE - 1;
-			first_index = index;
-		}
-	}
-
-	if (end)
-		__do_contiguous_readpages(tree, &pages[first_index],
-					  index - first_index, start,
-					  end, em_cached, bio,
-					  bio_flags, prev_em_start);
-}
-
 static int __extent_read_full_page(struct extent_io_tree *tree,
 				   struct page *page,
 				   get_extent_t *get_extent,
@@ -4098,6 +4058,8 @@  int extent_readpages(struct address_space *mapping, struct list_head *pages,
 	u64 prev_em_start = (u64)-1;
 
 	while (!list_empty(pages)) {
+		u64 contig_end = 0;
+
 		for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
 			struct page *page = lru_to_page(pages);
 
@@ -4106,14 +4068,22 @@  int extent_readpages(struct address_space *mapping, struct list_head *pages,
 			if (add_to_page_cache_lru(page, mapping, page->index,
 						readahead_gfp_mask(mapping))) {
 				put_page(page);
-				continue;
+				break;
 			}
 
 			pagepool[nr++] = page;
+			contig_end = page_offset(page) + PAGE_SIZE - 1;
 		}
 
-		__extent_readpages(tree, pagepool, nr, &em_cached, &bio,
-				   &bio_flags, &prev_em_start);
+		if (nr) {
+			u64 contig_start = page_offset(pagepool[0]);
+
+			ASSERT(contig_start + (nr*PAGE_SIZE) - 1 == contig_end);
+
+			contiguous_readpages(tree, pagepool, nr, contig_start,
+				     contig_end, &em_cached, &bio, &bio_flags,
+				     &prev_em_start);
+		}
 	}
 
 	if (em_cached)