diff mbox series

[2/2] xfs_scrub: check summary counters

Message ID 156685446969.2839983.12626550627146659080.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfsprogs: scrub filesystem summary counters | expand

Commit Message

Darrick J. Wong Aug. 26, 2019, 9:21 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Teach scrub to ask the kernel to check and repair summary counters
during phase 7.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/phase4.c |   12 ++++++++++++
 scrub/phase7.c |   14 ++++++++++++++
 scrub/repair.c |    3 +++
 scrub/scrub.c  |   13 +++++++++++++
 scrub/scrub.h  |    2 ++
 5 files changed, 44 insertions(+)

Comments

Dave Chinner Aug. 27, 2019, 5:27 a.m. UTC | #1
On Mon, Aug 26, 2019 at 02:21:09PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Teach scrub to ask the kernel to check and repair summary counters
> during phase 7.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/phase4.c |   12 ++++++++++++
>  scrub/phase7.c |   14 ++++++++++++++
>  scrub/repair.c |    3 +++
>  scrub/scrub.c  |   13 +++++++++++++
>  scrub/scrub.h  |    2 ++
>  5 files changed, 44 insertions(+)
> 
> 
> diff --git a/scrub/phase4.c b/scrub/phase4.c
> index 49f00723..c4da4852 100644
> --- a/scrub/phase4.c
> +++ b/scrub/phase4.c
> @@ -107,6 +107,18 @@ bool
>  xfs_repair_fs(
>  	struct scrub_ctx		*ctx)
>  {
> +	bool				moveon;
> +
> +	/*
> +	 * Check the summary counters early.  Normally we do this during phase
> +	 * seven, but some of the cross-referencing requires fairly-accurate
> +	 * counters, so counter repairs have to be put on the list now so that
> +	 * they get fixed before we stop retrying unfixed metadata repairs.
> +	 */
> +	moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]);
> +	if (!moveon)
> +		return false;

"moveon" doesn't really make sense to me here. i.e. I can't tell if
"moveon = true" meant it failed or not, so I hav eno idea what the
intent of the code here is, and the comment doesn't explain it at
all, either.

> +
>  	return xfs_process_action_items(ctx);
>  }
>  
> diff --git a/scrub/phase7.c b/scrub/phase7.c
> index 1c459dfc..b3156fdf 100644
> --- a/scrub/phase7.c
> +++ b/scrub/phase7.c
> @@ -7,12 +7,15 @@
>  #include <stdint.h>
>  #include <stdlib.h>
>  #include <sys/statvfs.h>
> +#include "list.h"
>  #include "path.h"
>  #include "ptvar.h"
>  #include "xfs_scrub.h"
>  #include "common.h"
> +#include "scrub.h"
>  #include "fscounters.h"
>  #include "spacemap.h"
> +#include "repair.h"
>  
>  /* Phase 7: Check summary counters. */
>  
> @@ -91,6 +94,7 @@ xfs_scan_summary(
>  	struct scrub_ctx	*ctx)
>  {
>  	struct summary_counts	totalcount = {0};
> +	struct xfs_action_list	alist;
>  	struct ptvar		*ptvar;
>  	unsigned long long	used_data;
>  	unsigned long long	used_rt;
> @@ -110,6 +114,16 @@ xfs_scan_summary(
>  	int			ip;
>  	int			error;
>  
> +	/* Check and fix the fs summary counters. */
> +	xfs_action_list_init(&alist);
> +	moveon = xfs_scrub_fs_summary(ctx, &alist);
> +	if (!moveon)
> +		return false;
> +	moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist,
> +			ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS);
> +	if (!moveon)
> +		return moveon;

same here - "moveon" doesn't tell me if we're returning because the
scrub failed or passed....

> +
>  	/* Flush everything out to disk before we start counting. */
>  	error = syncfs(ctx->mnt.fd);
>  	if (error) {
> diff --git a/scrub/repair.c b/scrub/repair.c
> index 45450d8c..54639752 100644
> --- a/scrub/repair.c
> +++ b/scrub/repair.c
> @@ -84,6 +84,9 @@ xfs_action_item_priority(
>  	case XFS_SCRUB_TYPE_GQUOTA:
>  	case XFS_SCRUB_TYPE_PQUOTA:
>  		return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
> +	case XFS_SCRUB_TYPE_FSCOUNTERS:
> +		/* This should always go after AG headers no matter what. */
> +		return PRIO(aitem, INT_MAX);
>  	}
>  	abort();
>  }
> diff --git a/scrub/scrub.c b/scrub/scrub.c
> index 136ed529..a428b524 100644
> --- a/scrub/scrub.c
> +++ b/scrub/scrub.c
> @@ -28,6 +28,7 @@ enum scrub_type {
>  	ST_PERAG,	/* per-AG metadata */
>  	ST_FS,		/* per-FS metadata */
>  	ST_INODE,	/* per-inode metadata */
> +	ST_SUMMARY,	/* summary counters (phase 7) */
>  };

Hmmm - the previous patch used ST_FS for the summary counters.

Oh, wait, io/scrub.c has a duplicate scrub_type enum defined, and
the table looks largely the same, too. Except now the summary type
is different.

/me looks a bit closer...

Oh, the enum scrub_type definitions shadow the kernel enum
xchk_type, but have different values for the same names. I'm
just confused now...

Cheers,

Dave.
Darrick J. Wong Aug. 29, 2019, 3:15 a.m. UTC | #2
On Tue, Aug 27, 2019 at 03:27:26PM +1000, Dave Chinner wrote:
> On Mon, Aug 26, 2019 at 02:21:09PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Teach scrub to ask the kernel to check and repair summary counters
> > during phase 7.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  scrub/phase4.c |   12 ++++++++++++
> >  scrub/phase7.c |   14 ++++++++++++++
> >  scrub/repair.c |    3 +++
> >  scrub/scrub.c  |   13 +++++++++++++
> >  scrub/scrub.h  |    2 ++
> >  5 files changed, 44 insertions(+)
> > 
> > 
> > diff --git a/scrub/phase4.c b/scrub/phase4.c
> > index 49f00723..c4da4852 100644
> > --- a/scrub/phase4.c
> > +++ b/scrub/phase4.c
> > @@ -107,6 +107,18 @@ bool
> >  xfs_repair_fs(
> >  	struct scrub_ctx		*ctx)
> >  {
> > +	bool				moveon;
> > +
> > +	/*
> > +	 * Check the summary counters early.  Normally we do this during phase
> > +	 * seven, but some of the cross-referencing requires fairly-accurate
> > +	 * counters, so counter repairs have to be put on the list now so that
> > +	 * they get fixed before we stop retrying unfixed metadata repairs.
> > +	 */
> > +	moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]);
> > +	if (!moveon)
> > +		return false;
> 
> "moveon" doesn't really make sense to me here. i.e. I can't tell if
> "moveon = true" meant it failed or not, so I hav eno idea what the
> intent of the code here is, and the comment doesn't explain it at
> all, either.

FWIW I created Yet Another Cleanup Series that replaces all the moveon
things with regular old "returns 0 for success, nonzero for error GTFO"
semantics.  I'll tack that on the end of all the stuff I've sent so far.

--D

> > +
> >  	return xfs_process_action_items(ctx);
> >  }
> >  
> > diff --git a/scrub/phase7.c b/scrub/phase7.c
> > index 1c459dfc..b3156fdf 100644
> > --- a/scrub/phase7.c
> > +++ b/scrub/phase7.c
> > @@ -7,12 +7,15 @@
> >  #include <stdint.h>
> >  #include <stdlib.h>
> >  #include <sys/statvfs.h>
> > +#include "list.h"
> >  #include "path.h"
> >  #include "ptvar.h"
> >  #include "xfs_scrub.h"
> >  #include "common.h"
> > +#include "scrub.h"
> >  #include "fscounters.h"
> >  #include "spacemap.h"
> > +#include "repair.h"
> >  
> >  /* Phase 7: Check summary counters. */
> >  
> > @@ -91,6 +94,7 @@ xfs_scan_summary(
> >  	struct scrub_ctx	*ctx)
> >  {
> >  	struct summary_counts	totalcount = {0};
> > +	struct xfs_action_list	alist;
> >  	struct ptvar		*ptvar;
> >  	unsigned long long	used_data;
> >  	unsigned long long	used_rt;
> > @@ -110,6 +114,16 @@ xfs_scan_summary(
> >  	int			ip;
> >  	int			error;
> >  
> > +	/* Check and fix the fs summary counters. */
> > +	xfs_action_list_init(&alist);
> > +	moveon = xfs_scrub_fs_summary(ctx, &alist);
> > +	if (!moveon)
> > +		return false;
> > +	moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist,
> > +			ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS);
> > +	if (!moveon)
> > +		return moveon;
> 
> same here - "moveon" doesn't tell me if we're returning because the
> scrub failed or passed....
> 
> > +
> >  	/* Flush everything out to disk before we start counting. */
> >  	error = syncfs(ctx->mnt.fd);
> >  	if (error) {
> > diff --git a/scrub/repair.c b/scrub/repair.c
> > index 45450d8c..54639752 100644
> > --- a/scrub/repair.c
> > +++ b/scrub/repair.c
> > @@ -84,6 +84,9 @@ xfs_action_item_priority(
> >  	case XFS_SCRUB_TYPE_GQUOTA:
> >  	case XFS_SCRUB_TYPE_PQUOTA:
> >  		return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
> > +	case XFS_SCRUB_TYPE_FSCOUNTERS:
> > +		/* This should always go after AG headers no matter what. */
> > +		return PRIO(aitem, INT_MAX);
> >  	}
> >  	abort();
> >  }
> > diff --git a/scrub/scrub.c b/scrub/scrub.c
> > index 136ed529..a428b524 100644
> > --- a/scrub/scrub.c
> > +++ b/scrub/scrub.c
> > @@ -28,6 +28,7 @@ enum scrub_type {
> >  	ST_PERAG,	/* per-AG metadata */
> >  	ST_FS,		/* per-FS metadata */
> >  	ST_INODE,	/* per-inode metadata */
> > +	ST_SUMMARY,	/* summary counters (phase 7) */
> >  };
> 
> Hmmm - the previous patch used ST_FS for the summary counters.
> 
> Oh, wait, io/scrub.c has a duplicate scrub_type enum defined, and
> the table looks largely the same, too. Except now the summary type
> is different.
> 
> /me looks a bit closer...
> 
> Oh, the enum scrub_type definitions shadow the kernel enum
> xchk_type, but have different values for the same names. I'm
> just confused now...
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/scrub/phase4.c b/scrub/phase4.c
index 49f00723..c4da4852 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -107,6 +107,18 @@  bool
 xfs_repair_fs(
 	struct scrub_ctx		*ctx)
 {
+	bool				moveon;
+
+	/*
+	 * Check the summary counters early.  Normally we do this during phase
+	 * seven, but some of the cross-referencing requires fairly-accurate
+	 * counters, so counter repairs have to be put on the list now so that
+	 * they get fixed before we stop retrying unfixed metadata repairs.
+	 */
+	moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]);
+	if (!moveon)
+		return false;
+
 	return xfs_process_action_items(ctx);
 }
 
diff --git a/scrub/phase7.c b/scrub/phase7.c
index 1c459dfc..b3156fdf 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -7,12 +7,15 @@ 
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/statvfs.h>
+#include "list.h"
 #include "path.h"
 #include "ptvar.h"
 #include "xfs_scrub.h"
 #include "common.h"
+#include "scrub.h"
 #include "fscounters.h"
 #include "spacemap.h"
+#include "repair.h"
 
 /* Phase 7: Check summary counters. */
 
@@ -91,6 +94,7 @@  xfs_scan_summary(
 	struct scrub_ctx	*ctx)
 {
 	struct summary_counts	totalcount = {0};
+	struct xfs_action_list	alist;
 	struct ptvar		*ptvar;
 	unsigned long long	used_data;
 	unsigned long long	used_rt;
@@ -110,6 +114,16 @@  xfs_scan_summary(
 	int			ip;
 	int			error;
 
+	/* Check and fix the fs summary counters. */
+	xfs_action_list_init(&alist);
+	moveon = xfs_scrub_fs_summary(ctx, &alist);
+	if (!moveon)
+		return false;
+	moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist,
+			ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS);
+	if (!moveon)
+		return moveon;
+
 	/* Flush everything out to disk before we start counting. */
 	error = syncfs(ctx->mnt.fd);
 	if (error) {
diff --git a/scrub/repair.c b/scrub/repair.c
index 45450d8c..54639752 100644
--- a/scrub/repair.c
+++ b/scrub/repair.c
@@ -84,6 +84,9 @@  xfs_action_item_priority(
 	case XFS_SCRUB_TYPE_GQUOTA:
 	case XFS_SCRUB_TYPE_PQUOTA:
 		return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
+	case XFS_SCRUB_TYPE_FSCOUNTERS:
+		/* This should always go after AG headers no matter what. */
+		return PRIO(aitem, INT_MAX);
 	}
 	abort();
 }
diff --git a/scrub/scrub.c b/scrub/scrub.c
index 136ed529..a428b524 100644
--- a/scrub/scrub.c
+++ b/scrub/scrub.c
@@ -28,6 +28,7 @@  enum scrub_type {
 	ST_PERAG,	/* per-AG metadata */
 	ST_FS,		/* per-FS metadata */
 	ST_INODE,	/* per-inode metadata */
+	ST_SUMMARY,	/* summary counters (phase 7) */
 };
 struct scrub_descr {
 	const char	*name;
@@ -84,6 +85,8 @@  static const struct scrub_descr scrubbers[XFS_SCRUB_TYPE_NR] = {
 		{"group quotas",			ST_FS},
 	[XFS_SCRUB_TYPE_PQUOTA] =
 		{"project quotas",			ST_FS},
+	[XFS_SCRUB_TYPE_FSCOUNTERS] =
+		{"filesystem summary counters",		ST_SUMMARY},
 };
 
 /* Format a scrub description. */
@@ -105,6 +108,7 @@  format_scrub_descr(
 				(uint64_t)meta->sm_ino, _(sc->name));
 		break;
 	case ST_FS:
+	case ST_SUMMARY:
 		snprintf(buf, buflen, _("%s"), _(sc->name));
 		break;
 	case ST_NONE:
@@ -446,6 +450,15 @@  xfs_scrub_fs_metadata(
 	return xfs_scrub_metadata(ctx, ST_FS, 0, alist);
 }
 
+/* Scrub FS summary metadata. */
+bool
+xfs_scrub_fs_summary(
+	struct scrub_ctx		*ctx,
+	struct xfs_action_list		*alist)
+{
+	return xfs_scrub_metadata(ctx, ST_SUMMARY, 0, alist);
+}
+
 /* How many items do we have to check? */
 unsigned int
 xfs_scrub_estimate_ag_work(
diff --git a/scrub/scrub.h b/scrub/scrub.h
index e6e3f16f..449c43de 100644
--- a/scrub/scrub.h
+++ b/scrub/scrub.h
@@ -25,6 +25,8 @@  bool xfs_scrub_ag_metadata(struct scrub_ctx *ctx, xfs_agnumber_t agno,
 		struct xfs_action_list *alist);
 bool xfs_scrub_fs_metadata(struct scrub_ctx *ctx,
 		struct xfs_action_list *alist);
+bool xfs_scrub_fs_summary(struct scrub_ctx *ctx,
+		struct xfs_action_list *alist);
 
 bool xfs_can_scrub_fs_metadata(struct scrub_ctx *ctx);
 bool xfs_can_scrub_inode(struct scrub_ctx *ctx);