diff mbox

btrfs: don't bug_on with enomem in __clear_state_bit

Message ID 20180413202855.10453-1-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show

Commit Message

Josef Bacik April 13, 2018, 8:28 p.m. UTC
From: Josef Bacik <jbacik@fb.com>

Since we're allocating under atomic we could every easily enomem, so if
that's the case and we can block then loop around and try to allocate
the prealloc not under a lock.

We also saw this happen during try_to_release_page in production, in
which case it's completely valid to return ENOMEM so we can tell
try_to_release_page that we can't release this page.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/extent_io.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

Comments

Liu Bo April 13, 2018, 11:52 p.m. UTC | #1
On Fri, Apr 13, 2018 at 1:28 PM, Josef Bacik <josef@toxicpanda.com> wrote:
> From: Josef Bacik <jbacik@fb.com>
>
> Since we're allocating under atomic we could every easily enomem, so if
> that's the case and we can block then loop around and try to allocate
> the prealloc not under a lock.
>
> We also saw this happen during try_to_release_page in production, in
> which case it's completely valid to return ENOMEM so we can tell
> try_to_release_page that we can't release this page.
>
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  fs/btrfs/extent_io.c | 28 ++++++++++++++++++++++++----
>  1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index fb32394fd830..1054dc0158b5 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -593,8 +593,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
>         struct extent_state *prealloc = NULL;
>         struct rb_node *node;
>         u64 last_end;
> -       int err;
> +       int err = 0;
>         int clear = 0;
> +       bool need_prealloc = false;
>
>         btrfs_debug_check_extent_io_range(tree, start, end);
>
> @@ -617,6 +618,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
>                  * If we end up needing a new extent state we allocate it later.
>                  */
>                 prealloc = alloc_extent_state(mask);
> +               if (!prealloc && need_prealloc)
> +                       return -ENOMEM;
> +               need_prealloc = false;
>         }
>
>         spin_lock(&tree->lock);
> @@ -676,7 +680,15 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
>
>         if (state->start < start) {
>                 prealloc = alloc_extent_state_atomic(prealloc);
> -               BUG_ON(!prealloc);
> +               if (!prealloc) {
> +                       if (gfpflags_allow_blocking(mask)) {
> +                               need_prealloc = true;
> +                               spin_unlock(&tree->lock);
> +                               goto again;

Could we simply 'goto search_again;' ?

thanks,
liubo

> +                       }
> +                       err = -ENOMEM;
> +                       goto out;
> +               }
>                 err = split_state(tree, state, prealloc, start);
>                 if (err)
>                         extent_io_tree_panic(tree, err);
> @@ -699,7 +711,15 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
>          */
>         if (state->start <= end && state->end > end) {
>                 prealloc = alloc_extent_state_atomic(prealloc);
> -               BUG_ON(!prealloc);
> +               if (!prealloc) {
> +                       if (gfpflags_allow_blocking(mask)) {
> +                               need_prealloc = true;
> +                               spin_unlock(&tree->lock);
> +                               goto again;
> +                       }
> +                       err = -ENOMEM;
> +                       goto out;
> +               }
>                 err = split_state(tree, state, prealloc, end + 1);
>                 if (err)
>                         extent_io_tree_panic(tree, err);
> @@ -734,7 +754,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
>         if (prealloc)
>                 free_extent_state(prealloc);
>
> -       return 0;
> +       return err;
>
>  }
>
> --
> 2.14.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik April 14, 2018, midnight UTC | #2
On Fri, Apr 13, 2018 at 04:52:25PM -0700, Liu Bo wrote:
> On Fri, Apr 13, 2018 at 1:28 PM, Josef Bacik <josef@toxicpanda.com> wrote:
> > From: Josef Bacik <jbacik@fb.com>
> >
> > Since we're allocating under atomic we could every easily enomem, so if
> > that's the case and we can block then loop around and try to allocate
> > the prealloc not under a lock.
> >
> > We also saw this happen during try_to_release_page in production, in
> > which case it's completely valid to return ENOMEM so we can tell
> > try_to_release_page that we can't release this page.
> >
> > Signed-off-by: Josef Bacik <jbacik@fb.com>
> > ---
> >  fs/btrfs/extent_io.c | 28 ++++++++++++++++++++++++----
> >  1 file changed, 24 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index fb32394fd830..1054dc0158b5 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -593,8 +593,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
> >         struct extent_state *prealloc = NULL;
> >         struct rb_node *node;
> >         u64 last_end;
> > -       int err;
> > +       int err = 0;
> >         int clear = 0;
> > +       bool need_prealloc = false;
> >
> >         btrfs_debug_check_extent_io_range(tree, start, end);
> >
> > @@ -617,6 +618,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
> >                  * If we end up needing a new extent state we allocate it later.
> >                  */
> >                 prealloc = alloc_extent_state(mask);
> > +               if (!prealloc && need_prealloc)
> > +                       return -ENOMEM;
> > +               need_prealloc = false;
> >         }
> >
> >         spin_lock(&tree->lock);
> > @@ -676,7 +680,15 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
> >
> >         if (state->start < start) {
> >                 prealloc = alloc_extent_state_atomic(prealloc);
> > -               BUG_ON(!prealloc);
> > +               if (!prealloc) {
> > +                       if (gfpflags_allow_blocking(mask)) {
> > +                               need_prealloc = true;
> > +                               spin_unlock(&tree->lock);
> > +                               goto again;
> 
> Could we simply 'goto search_again;' ?
>

We could, but I have another patch that's more involved that reworks this logic
and kills search_again, so I'm leaving it this way for now for this isolated
fix.  Thanks,

Josef 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba April 14, 2018, 12:49 a.m. UTC | #3
On Fri, Apr 13, 2018 at 04:28:55PM -0400, Josef Bacik wrote:
> From: Josef Bacik <jbacik@fb.com>
> 
> Since we're allocating under atomic we could every easily enomem, so if
> that's the case and we can block then loop around and try to allocate
> the prealloc not under a lock.
> 
> We also saw this happen during try_to_release_page in production, in
> which case it's completely valid to return ENOMEM so we can tell
> try_to_release_page that we can't release this page.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>

Exactly same patch as

https://patchwork.kernel.org/patch/10053319/

so the same comment applies.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik April 16, 2018, 3:12 p.m. UTC | #4
On Sat, Apr 14, 2018 at 02:49:52AM +0200, David Sterba wrote:
> On Fri, Apr 13, 2018 at 04:28:55PM -0400, Josef Bacik wrote:
> > From: Josef Bacik <jbacik@fb.com>
> > 
> > Since we're allocating under atomic we could every easily enomem, so if
> > that's the case and we can block then loop around and try to allocate
> > the prealloc not under a lock.
> > 
> > We also saw this happen during try_to_release_page in production, in
> > which case it's completely valid to return ENOMEM so we can tell
> > try_to_release_page that we can't release this page.
> > 
> > Signed-off-by: Josef Bacik <jbacik@fb.com>
> 
> Exactly same patch as
> 
> https://patchwork.kernel.org/patch/10053319/
> 
> so the same comment applies.

Moving the bugon just makes the same problem happen again.
try_to_release_page() will call with whatever arbitrary gfp mask it has, so if
it's GFP_ATOMIC we _want_ it to return -ENOMEM.  Everybody else that calls that
doesn't check the return value calls with a gfp mask that's will allow retrying
the allocation.  If they fail it's because the box is super out of memory and
we'll have larger problems than us not handling the case properly.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fb32394fd830..1054dc0158b5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -593,8 +593,9 @@  int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	struct extent_state *prealloc = NULL;
 	struct rb_node *node;
 	u64 last_end;
-	int err;
+	int err = 0;
 	int clear = 0;
+	bool need_prealloc = false;
 
 	btrfs_debug_check_extent_io_range(tree, start, end);
 
@@ -617,6 +618,9 @@  int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 		 * If we end up needing a new extent state we allocate it later.
 		 */
 		prealloc = alloc_extent_state(mask);
+		if (!prealloc && need_prealloc)
+			return -ENOMEM;
+		need_prealloc = false;
 	}
 
 	spin_lock(&tree->lock);
@@ -676,7 +680,15 @@  int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 
 	if (state->start < start) {
 		prealloc = alloc_extent_state_atomic(prealloc);
-		BUG_ON(!prealloc);
+		if (!prealloc) {
+			if (gfpflags_allow_blocking(mask)) {
+				need_prealloc = true;
+				spin_unlock(&tree->lock);
+				goto again;
+			}
+			err = -ENOMEM;
+			goto out;
+		}
 		err = split_state(tree, state, prealloc, start);
 		if (err)
 			extent_io_tree_panic(tree, err);
@@ -699,7 +711,15 @@  int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	 */
 	if (state->start <= end && state->end > end) {
 		prealloc = alloc_extent_state_atomic(prealloc);
-		BUG_ON(!prealloc);
+		if (!prealloc) {
+			if (gfpflags_allow_blocking(mask)) {
+				need_prealloc = true;
+				spin_unlock(&tree->lock);
+				goto again;
+			}
+			err = -ENOMEM;
+			goto out;
+		}
 		err = split_state(tree, state, prealloc, end + 1);
 		if (err)
 			extent_io_tree_panic(tree, err);
@@ -734,7 +754,7 @@  int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	if (prealloc)
 		free_extent_state(prealloc);
 
-	return 0;
+	return err;
 
 }