diff mbox series

[1/6] xfs: remove more ondisk directory corruption asserts

Message ID 156158199994.495944.4584531696054696463.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs: scrub-related fixes | expand

Commit Message

Darrick J. Wong June 26, 2019, 8:46 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Continue our game of replacing ASSERTs for corrupt ondisk metadata with
EFSCORRUPTED returns.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_da_btree.c  |   19 ++++++++++++-------
 fs/xfs/libxfs/xfs_dir2_node.c |    3 ++-
 2 files changed, 14 insertions(+), 8 deletions(-)

Comments

Brian Foster July 5, 2019, 2:49 p.m. UTC | #1
On Wed, Jun 26, 2019 at 01:46:40PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Continue our game of replacing ASSERTs for corrupt ondisk metadata with
> EFSCORRUPTED returns.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_da_btree.c  |   19 ++++++++++++-------
>  fs/xfs/libxfs/xfs_dir2_node.c |    3 ++-
>  2 files changed, 14 insertions(+), 8 deletions(-)
> 
> 
...
> diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> index 16731d2d684b..f7f3fb458019 100644
> --- a/fs/xfs/libxfs/xfs_dir2_node.c
> +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> @@ -743,7 +743,8 @@ xfs_dir2_leafn_lookup_for_entry(
>  	ents = dp->d_ops->leaf_ents_p(leaf);
>  
>  	xfs_dir3_leaf_check(dp, bp);
> -	ASSERT(leafhdr.count > 0);
> +	if (leafhdr.count <= 0)
> +		return -EFSCORRUPTED;

This error return bubbles up to xfs_dir2_leafn_lookup_int() and
xfs_da3_node_lookup_int(). The latter has a direct return value as well
as a *result return parameter, which unconditionally carries the return
value from xfs_dir2_leafn_lookup_int(). xfs_da3_node_lookup_int() has
multiple callers, but a quick look at one (xfs_attr_node_addname())
suggests we might not handle corruption errors properly via the *result
parameter. Perhaps we also need to fix up xfs_da3_node_lookup_int() to
return particular errors directly?

Brian

>  
>  	/*
>  	 * Look up the hash value in the leaf entries.
>
Darrick J. Wong July 5, 2019, 5:03 p.m. UTC | #2
On Fri, Jul 05, 2019 at 10:49:05AM -0400, Brian Foster wrote:
> On Wed, Jun 26, 2019 at 01:46:40PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Continue our game of replacing ASSERTs for corrupt ondisk metadata with
> > EFSCORRUPTED returns.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/libxfs/xfs_da_btree.c  |   19 ++++++++++++-------
> >  fs/xfs/libxfs/xfs_dir2_node.c |    3 ++-
> >  2 files changed, 14 insertions(+), 8 deletions(-)
> > 
> > 
> ...
> > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > index 16731d2d684b..f7f3fb458019 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > @@ -743,7 +743,8 @@ xfs_dir2_leafn_lookup_for_entry(
> >  	ents = dp->d_ops->leaf_ents_p(leaf);
> >  
> >  	xfs_dir3_leaf_check(dp, bp);
> > -	ASSERT(leafhdr.count > 0);
> > +	if (leafhdr.count <= 0)
> > +		return -EFSCORRUPTED;
> 
> This error return bubbles up to xfs_dir2_leafn_lookup_int() and
> xfs_da3_node_lookup_int(). The latter has a direct return value as well
> as a *result return parameter, which unconditionally carries the return
> value from xfs_dir2_leafn_lookup_int(). xfs_da3_node_lookup_int() has
> multiple callers, but a quick look at one (xfs_attr_node_addname())
> suggests we might not handle corruption errors properly via the *result
> parameter. Perhaps we also need to fix up xfs_da3_node_lookup_int() to
> return particular errors directly?

It would be a good idea to clean up the whole return value/*retval mess
in that function (and xfs_da3_path_shift where *retval came from) but
that quickly turned into a bigger cleanup of magic values and dual
returns, particularly since the dabtree shrinking code turns the
_path_shift *retval into yet another series of magic int numbers...

...so in the meantime this at least fixes the asserts I see when running
fuzz testing.  I'll look at the broader cleanup for 5.4.

--D

> 
> Brian
> 
> >  
> >  	/*
> >  	 * Look up the hash value in the leaf entries.
> >
Brian Foster July 5, 2019, 5:27 p.m. UTC | #3
On Fri, Jul 05, 2019 at 10:03:45AM -0700, Darrick J. Wong wrote:
> On Fri, Jul 05, 2019 at 10:49:05AM -0400, Brian Foster wrote:
> > On Wed, Jun 26, 2019 at 01:46:40PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Continue our game of replacing ASSERTs for corrupt ondisk metadata with
> > > EFSCORRUPTED returns.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  fs/xfs/libxfs/xfs_da_btree.c  |   19 ++++++++++++-------
> > >  fs/xfs/libxfs/xfs_dir2_node.c |    3 ++-
> > >  2 files changed, 14 insertions(+), 8 deletions(-)
> > > 
> > > 
> > ...
> > > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
> > > index 16731d2d684b..f7f3fb458019 100644
> > > --- a/fs/xfs/libxfs/xfs_dir2_node.c
> > > +++ b/fs/xfs/libxfs/xfs_dir2_node.c
> > > @@ -743,7 +743,8 @@ xfs_dir2_leafn_lookup_for_entry(
> > >  	ents = dp->d_ops->leaf_ents_p(leaf);
> > >  
> > >  	xfs_dir3_leaf_check(dp, bp);
> > > -	ASSERT(leafhdr.count > 0);
> > > +	if (leafhdr.count <= 0)
> > > +		return -EFSCORRUPTED;
> > 
> > This error return bubbles up to xfs_dir2_leafn_lookup_int() and
> > xfs_da3_node_lookup_int(). The latter has a direct return value as well
> > as a *result return parameter, which unconditionally carries the return
> > value from xfs_dir2_leafn_lookup_int(). xfs_da3_node_lookup_int() has
> > multiple callers, but a quick look at one (xfs_attr_node_addname())
> > suggests we might not handle corruption errors properly via the *result
> > parameter. Perhaps we also need to fix up xfs_da3_node_lookup_int() to
> > return particular errors directly?
> 
> It would be a good idea to clean up the whole return value/*retval mess
> in that function (and xfs_da3_path_shift where *retval came from) but
> that quickly turned into a bigger cleanup of magic values and dual
> returns, particularly since the dabtree shrinking code turns the
> _path_shift *retval into yet another series of magic int numbers...
> 

Hm.. sure, but in the meantime couldn't this patch just insert a check
for the obvious -EFSCORRUPTED error in xfs_da3_node_lookup_int() and
return that directly as opposed to via *result? That function already
returns -EFSCORRUPTED directly in other places.

As it is, this hunk is kind of strange because it inserts an error
return in a place where it seems more likely than not to be either
ignored or misinterpreted if it ever occurs. I'm fine with putting this
off as a broader cleanup, but in that case I'd prefer to just drop this
hunk for now and change the assert over when we know the return will be
handled correctly.

Either option seems reasonable (and FWIW the other bits in this patch
look fine to me)...

Brian

> ...so in the meantime this at least fixes the asserts I see when running
> fuzz testing.  I'll look at the broader cleanup for 5.4.
> 
> --D
> 
> > 
> > Brian
> > 
> > >  
> > >  	/*
> > >  	 * Look up the hash value in the leaf entries.
> > >
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e2737e2ac2ae..c914ae763113 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -493,10 +493,8 @@  xfs_da3_split(
 	ASSERT(state->path.active == 0);
 	oldblk = &state->path.blk[0];
 	error = xfs_da3_root_split(state, oldblk, addblk);
-	if (error) {
-		addblk->bp = NULL;
-		return error;	/* GROT: dir is inconsistent */
-	}
+	if (error)
+		goto out;
 
 	/*
 	 * Update pointers to the node which used to be block 0 and just got
@@ -511,7 +509,10 @@  xfs_da3_split(
 	 */
 	node = oldblk->bp->b_addr;
 	if (node->hdr.info.forw) {
-		ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno);
+		if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
+			error = -EFSCORRUPTED;
+			goto out;
+		}
 		node = addblk->bp->b_addr;
 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
 		xfs_trans_log_buf(state->args->trans, addblk->bp,
@@ -520,15 +521,19 @@  xfs_da3_split(
 	}
 	node = oldblk->bp->b_addr;
 	if (node->hdr.info.back) {
-		ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno);
+		if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
+			error = -EFSCORRUPTED;
+			goto out;
+		}
 		node = addblk->bp->b_addr;
 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
 		xfs_trans_log_buf(state->args->trans, addblk->bp,
 				  XFS_DA_LOGRANGE(node, &node->hdr.info,
 				  sizeof(node->hdr.info)));
 	}
+out:
 	addblk->bp = NULL;
-	return 0;
+	return error;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 16731d2d684b..f7f3fb458019 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -743,7 +743,8 @@  xfs_dir2_leafn_lookup_for_entry(
 	ents = dp->d_ops->leaf_ents_p(leaf);
 
 	xfs_dir3_leaf_check(dp, bp);
-	ASSERT(leafhdr.count > 0);
+	if (leafhdr.count <= 0)
+		return -EFSCORRUPTED;
 
 	/*
 	 * Look up the hash value in the leaf entries.