diff mbox series

btrfs: check if page is extent locked early during release

Message ID sw3jkfih2ztq4jsjwmkfu3mh7msqvbfripxael24krfp3ablgw@tqwogynwdix6 (mailing list archive)
State New, archived
Headers show
Series btrfs: check if page is extent locked early during release | expand

Commit Message

Goldwyn Rodrigues June 9, 2023, 1:06 p.m. UTC
While performing release, check for locking is the last step performed
in try_release_extent_state(). This happens after finding the em and
decoupling the em from the page. try_release_extent_mapping() also
checks if extents are locked.

During memory pressure, it is better to return early if btrfs cannot
release a page and skip all the extent mapping finding and decoupling.
Check if page is locked in try_release_extent_mapping() before starting
extent map resolution. If locked, return immediately with zero (cannot
free page).

Move extent locked check from try_release_extent_state() to
try_release_extent_mapping().

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/btrfs/extent_io.c | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

Comments

Filipe Manana June 9, 2023, 1:46 p.m. UTC | #1
On Fri, Jun 9, 2023 at 2:36 PM Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
>
> While performing release, check for locking is the last step performed
> in try_release_extent_state(). This happens after finding the em and
> decoupling the em from the page. try_release_extent_mapping() also
> checks if extents are locked.
>
> During memory pressure, it is better to return early if btrfs cannot
> release a page and skip all the extent mapping finding and decoupling.
> Check if page is locked in try_release_extent_mapping() before starting
> extent map resolution. If locked, return immediately with zero (cannot
> free page).

That is better if the most common case is having the range of the page
currently locked in the inode io tree.
Otherwise it's not better, we are doing one more search in the io tree
than before, and the io trees can get pretty big sometimes.

Last time I checked, several years ago for a different optimization,
most of the time, by far, the page's range is not currently locked.
When it was currently locked, it accounted for something like 1% or 2%
of the cases, and it mostly corresponded to the case where
writeback just finished and btrfs_finished_ordered_io() is running and
has already locked the range.

Have you made some analysis and found the case where the page's range
is already locked to be the most common case?
I doubt things changed radically to make it the most common case.

Do you have details of a workload that gains anything from this
change, or benchmark results?

>
> Move extent locked check from try_release_extent_state() to
> try_release_extent_mapping().
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/btrfs/extent_io.c | 43 ++++++++++++++++++++-----------------------
>  1 file changed, 20 insertions(+), 23 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index e5bec73b5991..5b49ef95c653 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2352,31 +2352,24 @@ static int try_release_extent_state(struct extent_io_tree *tree,
>  {
>         u64 start = page_offset(page);
>         u64 end = start + PAGE_SIZE - 1;
> -       int ret = 1;
> -
> -       if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
> -               ret = 0;
> -       } else {
> -               u32 clear_bits = ~(EXTENT_NODATASUM |
> -                                  EXTENT_DELALLOC_NEW | EXTENT_CTLBITS);

So this depends on the previous patch you have submitted shortly
before this one.
When there's such dependency, it's best to make them part of the same patchset.

Thanks.

> +       int ret;
> +       u32 clear_bits = ~(EXTENT_NODATASUM |
> +                       EXTENT_DELALLOC_NEW | EXTENT_CTLBITS);
>
> -               /*
> -                * At this point we can safely clear everything except the
> -                * locked bit, the nodatasum bit and the delalloc new bit.
> -                * The delalloc new bit will be cleared by ordered extent
> -                * completion.
> -                */
> -               ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
> +       /*
> +        * At this point we can safely clear everything except the
> +        * locked bit, the nodatasum bit and the delalloc new bit.
> +        * The delalloc new bit will be cleared by ordered extent
> +        * completion.
> +        */
> +       ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
>
> -               /* if clear_extent_bit failed for enomem reasons,
> -                * we can't allow the release to continue.
> -                */
> -               if (ret < 0)
> -                       ret = 0;
> -               else
> -                       ret = 1;
> -       }
> -       return ret;
> +       /* if clear_extent_bit failed for enomem reasons,
> +        * we can't allow the release to continue.
> +        */
> +       if (ret < 0)
> +               return 0;
> +       return 1;
>  }
>
>  /*
> @@ -2393,6 +2386,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
>         struct extent_io_tree *tree = &btrfs_inode->io_tree;
>         struct extent_map_tree *map = &btrfs_inode->extent_tree;
>
> +       if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
> +               return 0;
> +
>         if (gfpflags_allow_blocking(mask) &&
>             page->mapping->host->i_size > SZ_16M) {
>                 u64 len;
> @@ -2413,6 +2409,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
>                                 free_extent_map(em);
>                                 break;
>                         }
> +                       /* Check if entire range is not locked */
>                         if (test_range_bit(tree, em->start,
>                                            extent_map_end(em) - 1,
>                                            EXTENT_LOCKED, 0, NULL))
> --
> 2.40.1
>
>
> --
> Goldwyn
Goldwyn Rodrigues June 12, 2023, 11:59 a.m. UTC | #2
On 14:46 09/06, Filipe Manana wrote:
> On Fri, Jun 9, 2023 at 2:36 PM Goldwyn Rodrigues <rgoldwyn@suse.de> wrote:
> >
> > While performing release, check for locking is the last step performed
> > in try_release_extent_state(). This happens after finding the em and
> > decoupling the em from the page. try_release_extent_mapping() also
> > checks if extents are locked.
> >
> > During memory pressure, it is better to return early if btrfs cannot
> > release a page and skip all the extent mapping finding and decoupling.
> > Check if page is locked in try_release_extent_mapping() before starting
> > extent map resolution. If locked, return immediately with zero (cannot
> > free page).
> 
> That is better if the most common case is having the range of the page
> currently locked in the inode io tree.
> Otherwise it's not better, we are doing one more search in the io tree
> than before, and the io trees can get pretty big sometimes.
> 
> Last time I checked, several years ago for a different optimization,
> most of the time, by far, the page's range is not currently locked.
> When it was currently locked, it accounted for something like 1% or 2%
> of the cases, and it mostly corresponded to the case where
> writeback just finished and btrfs_finished_ordered_io() is running and
> has already locked the range.
> 
> Have you made some analysis and found the case where the page's range
> is already locked to be the most common case?
> I doubt things changed radically to make it the most common case.
> 
> Do you have details of a workload that gains anything from this
> change, or benchmark results?
> 

So, this happens with lock inversion + iomap code. A memory pressure
keeps increasing until oom and further investigation reveals something
to do with page not being freed.

If required I will include the patch in the series. So, this can be
ignored for now.
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e5bec73b5991..5b49ef95c653 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2352,31 +2352,24 @@  static int try_release_extent_state(struct extent_io_tree *tree,
 {
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_SIZE - 1;
-	int ret = 1;
-
-	if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
-		ret = 0;
-	} else {
-		u32 clear_bits = ~(EXTENT_NODATASUM |
-				   EXTENT_DELALLOC_NEW | EXTENT_CTLBITS);
+	int ret;
+	u32 clear_bits = ~(EXTENT_NODATASUM |
+			EXTENT_DELALLOC_NEW | EXTENT_CTLBITS);
 
-		/*
-		 * At this point we can safely clear everything except the
-		 * locked bit, the nodatasum bit and the delalloc new bit.
-		 * The delalloc new bit will be cleared by ordered extent
-		 * completion.
-		 */
-		ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
+	/*
+	 * At this point we can safely clear everything except the
+	 * locked bit, the nodatasum bit and the delalloc new bit.
+	 * The delalloc new bit will be cleared by ordered extent
+	 * completion.
+	 */
+	ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
 
-		/* if clear_extent_bit failed for enomem reasons,
-		 * we can't allow the release to continue.
-		 */
-		if (ret < 0)
-			ret = 0;
-		else
-			ret = 1;
-	}
-	return ret;
+	/* if clear_extent_bit failed for enomem reasons,
+	 * we can't allow the release to continue.
+	 */
+	if (ret < 0)
+		return 0;
+	return 1;
 }
 
 /*
@@ -2393,6 +2386,9 @@  int try_release_extent_mapping(struct page *page, gfp_t mask)
 	struct extent_io_tree *tree = &btrfs_inode->io_tree;
 	struct extent_map_tree *map = &btrfs_inode->extent_tree;
 
+	if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
+		return 0;
+
 	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > SZ_16M) {
 		u64 len;
@@ -2413,6 +2409,7 @@  int try_release_extent_mapping(struct page *page, gfp_t mask)
 				free_extent_map(em);
 				break;
 			}
+			/* Check if entire range is not locked */
 			if (test_range_bit(tree, em->start,
 					   extent_map_end(em) - 1,
 					   EXTENT_LOCKED, 0, NULL))