diff mbox series

[01/11] xfs_scrub: separate media error reporting for attribute forks

Message ID 156944737397.300131.4607692740306012565.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs_scrub: fix IO error reporting | expand

Commit Message

Darrick J. Wong Sept. 25, 2019, 9:36 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Use different functions to warn about media errors that were detected
underlying xattr data because logical offsets for attribute fork extents
have no meaning to users.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/phase6.c |   45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

Comments

Eric Sandeen Oct. 21, 2019, 4:18 p.m. UTC | #1
On 9/25/19 4:36 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Use different functions to warn about media errors that were detected
> underlying xattr data because logical offsets for attribute fork extents
> have no meaning to users.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/phase6.c |   45 ++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 38 insertions(+), 7 deletions(-)
> 
> 
> diff --git a/scrub/phase6.c b/scrub/phase6.c
> index 4554af9a..1edd98af 100644
> --- a/scrub/phase6.c
> +++ b/scrub/phase6.c
> @@ -113,7 +113,7 @@ xfs_decode_special_owner(
>  
>  /* Report if this extent overlaps a bad region. */
>  static bool
> -xfs_report_verify_inode_bmap(
> +report_data_loss(
>  	struct scrub_ctx		*ctx,
>  	const char			*descr,
>  	int				fd,
> @@ -142,6 +142,40 @@ _("offset %llu failed read verification."), bmap->bm_offset);
>  	return true;
>  }
>  
> +/* Report if the extended attribute data overlaps a bad region. */

I'd like to see a comment above the typedef for this function
(eventually scrub_bmap_iter_fn), or above the function which uses it
(scrub_iterate_filemaps) in order to explain what the return
values mean and the implication for scanning.

Looking at this w/o a lot of context, 

"Report if the extended attribute data overlaps a bad region."

and nothing but "return true" seems ... odd.  I think what it means
is "print something if found ... and set an error for some problems,
but always continue scanning?"

> +static bool
> +report_attr_loss(
> +	struct scrub_ctx		*ctx,
> +	const char			*descr,
> +	int				fd,
> +	int				whichfork,
> +	struct fsxattr			*fsx,
> +	struct xfs_bmap			*bmap,
> +	void				*arg)
> +{
> +	struct media_verify_state	*vs = arg;
> +	struct bitmap			*bmp = vs->d_bad;
> +
> +	/* Complain about attr fork extents that don't look right. */
> +	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
> +		str_info(ctx, descr,
> +_("found unexpected unwritten/delalloc attr fork extent."));
> +		return true;
> +	}
> +
> +	if (fsx->fsx_xflags & FS_XFLAG_REALTIME) {
> +		str_info(ctx, descr,
> +_("found unexpected realtime attr fork extent."));
> +		return true;
> +	}

so these don't flag any error, and moveon stays true, but

> +
> +	if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
> +		str_error(ctx, descr,
> +_("media error in extended attribute data."));

this actually counts as an error?  OTOH report_data_loss() seems to return
false if it finds something like this, so I'm a little confused about the
difference and the behavior.  Help?

> +
> +	return true;
> +}
> +
>  /* Iterate the extent mappings of a file to report errors. */
>  static bool
>  xfs_report_verify_fd(
> @@ -155,16 +189,13 @@ xfs_report_verify_fd(
>  
>  	/* data fork */
>  	moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
> -			xfs_report_verify_inode_bmap, arg);
> +			report_data_loss, arg);
>  	if (!moveon)
>  		return false;
>  
>  	/* attr fork */
> -	moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
> -			xfs_report_verify_inode_bmap, arg);
> -	if (!moveon)
> -		return false;
> -	return true;
> +	return xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
> +			report_attr_loss, arg);
>  }
>  
>  /* Report read verify errors in unlinked (but still open) files. */
>
Darrick J. Wong Oct. 21, 2019, 5:32 p.m. UTC | #2
On Mon, Oct 21, 2019 at 11:18:09AM -0500, Eric Sandeen wrote:
> On 9/25/19 4:36 PM, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Use different functions to warn about media errors that were detected
> > underlying xattr data because logical offsets for attribute fork extents
> > have no meaning to users.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  scrub/phase6.c |   45 ++++++++++++++++++++++++++++++++++++++-------
> >  1 file changed, 38 insertions(+), 7 deletions(-)
> > 
> > 
> > diff --git a/scrub/phase6.c b/scrub/phase6.c
> > index 4554af9a..1edd98af 100644
> > --- a/scrub/phase6.c
> > +++ b/scrub/phase6.c
> > @@ -113,7 +113,7 @@ xfs_decode_special_owner(
> >  
> >  /* Report if this extent overlaps a bad region. */
> >  static bool
> > -xfs_report_verify_inode_bmap(
> > +report_data_loss(
> >  	struct scrub_ctx		*ctx,
> >  	const char			*descr,
> >  	int				fd,
> > @@ -142,6 +142,40 @@ _("offset %llu failed read verification."), bmap->bm_offset);
> >  	return true;
> >  }
> >  
> > +/* Report if the extended attribute data overlaps a bad region. */
> 
> I'd like to see a comment above the typedef for this function
> (eventually scrub_bmap_iter_fn), or above the function which uses it
> (scrub_iterate_filemaps) in order to explain what the return
> values mean and the implication for scanning.

Ok, I'll add some comments for what the return values are.  FWIW I'm
trying to push all the iterator ->fn() things to "0 to keep going; or
nonzero to end the loop and return immediately".

> Looking at this w/o a lot of context, 
> 
> "Report if the extended attribute data overlaps a bad region."
> 
> and nothing but "return true" seems ... odd.  I think what it means
> is "print something if found ... and set an error for some problems,
> but always continue scanning?"

Correct -- the return value here determines whether or not the iteration
loop continues iterating.

> > +static bool
> > +report_attr_loss(
> > +	struct scrub_ctx		*ctx,
> > +	const char			*descr,
> > +	int				fd,
> > +	int				whichfork,
> > +	struct fsxattr			*fsx,
> > +	struct xfs_bmap			*bmap,
> > +	void				*arg)
> > +{
> > +	struct media_verify_state	*vs = arg;
> > +	struct bitmap			*bmp = vs->d_bad;
> > +
> > +	/* Complain about attr fork extents that don't look right. */
> > +	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
> > +		str_info(ctx, descr,
> > +_("found unexpected unwritten/delalloc attr fork extent."));
> > +		return true;
> > +	}
> > +
> > +	if (fsx->fsx_xflags & FS_XFLAG_REALTIME) {
> > +		str_info(ctx, descr,
> > +_("found unexpected realtime attr fork extent."));
> > +		return true;

..so this hunk complains about seeing things in the metadata that
shouldn't be there.  That isn't a runtime error, so we want to continue
iterating.

The "remove moveon aliens" series later on will clean all this up.

Hmm, why /is/ that a str_info()?  I think my reasoning is that the the
attr fork checker in phase 3 should already have complained about this,
so we don't need to str_error() it again.

> > +	}
> 
> so these don't flag any error, and moveon stays true, but
> 
> > +
> > +	if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
> > +		str_error(ctx, descr,
> > +_("media error in extended attribute data."));
> 
> this actually counts as an error?  OTOH report_data_loss() seems to return
> false if it finds something like this, so I'm a little confused about the
> difference and the behavior.  Help?

<nod> For now, it's marked as a filesystem corruption, since we've lost
data.  A(nother) subsequent series changes this str_error call to
str_unfixable so that we can call this what it is -- we lost user data
and there's nothing we can do about it.

Either way, the data's gone but we /can/ keep iterating the bad blocks
list so we return true here.

--D

> 
> > +
> > +	return true;
> > +}
> > +
> >  /* Iterate the extent mappings of a file to report errors. */
> >  static bool
> >  xfs_report_verify_fd(
> > @@ -155,16 +189,13 @@ xfs_report_verify_fd(
> >  
> >  	/* data fork */
> >  	moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
> > -			xfs_report_verify_inode_bmap, arg);
> > +			report_data_loss, arg);
> >  	if (!moveon)
> >  		return false;
> >  
> >  	/* attr fork */
> > -	moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
> > -			xfs_report_verify_inode_bmap, arg);
> > -	if (!moveon)
> > -		return false;
> > -	return true;
> > +	return xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
> > +			report_attr_loss, arg);
> >  }
> >  
> >  /* Report read verify errors in unlinked (but still open) files. */
> >
diff mbox series

Patch

diff --git a/scrub/phase6.c b/scrub/phase6.c
index 4554af9a..1edd98af 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -113,7 +113,7 @@  xfs_decode_special_owner(
 
 /* Report if this extent overlaps a bad region. */
 static bool
-xfs_report_verify_inode_bmap(
+report_data_loss(
 	struct scrub_ctx		*ctx,
 	const char			*descr,
 	int				fd,
@@ -142,6 +142,40 @@  _("offset %llu failed read verification."), bmap->bm_offset);
 	return true;
 }
 
+/* Report if the extended attribute data overlaps a bad region. */
+static bool
+report_attr_loss(
+	struct scrub_ctx		*ctx,
+	const char			*descr,
+	int				fd,
+	int				whichfork,
+	struct fsxattr			*fsx,
+	struct xfs_bmap			*bmap,
+	void				*arg)
+{
+	struct media_verify_state	*vs = arg;
+	struct bitmap			*bmp = vs->d_bad;
+
+	/* Complain about attr fork extents that don't look right. */
+	if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
+		str_info(ctx, descr,
+_("found unexpected unwritten/delalloc attr fork extent."));
+		return true;
+	}
+
+	if (fsx->fsx_xflags & FS_XFLAG_REALTIME) {
+		str_info(ctx, descr,
+_("found unexpected realtime attr fork extent."));
+		return true;
+	}
+
+	if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
+		str_error(ctx, descr,
+_("media error in extended attribute data."));
+
+	return true;
+}
+
 /* Iterate the extent mappings of a file to report errors. */
 static bool
 xfs_report_verify_fd(
@@ -155,16 +189,13 @@  xfs_report_verify_fd(
 
 	/* data fork */
 	moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
-			xfs_report_verify_inode_bmap, arg);
+			report_data_loss, arg);
 	if (!moveon)
 		return false;
 
 	/* attr fork */
-	moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
-			xfs_report_verify_inode_bmap, arg);
-	if (!moveon)
-		return false;
-	return true;
+	return xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
+			report_attr_loss, arg);
 }
 
 /* Report read verify errors in unlinked (but still open) files. */