Message ID | 156685446969.2839983.12626550627146659080.stgit@magnolia (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xfsprogs: scrub filesystem summary counters | expand |
On Mon, Aug 26, 2019 at 02:21:09PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@oracle.com> > > Teach scrub to ask the kernel to check and repair summary counters > during phase 7. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- > scrub/phase4.c | 12 ++++++++++++ > scrub/phase7.c | 14 ++++++++++++++ > scrub/repair.c | 3 +++ > scrub/scrub.c | 13 +++++++++++++ > scrub/scrub.h | 2 ++ > 5 files changed, 44 insertions(+) > > > diff --git a/scrub/phase4.c b/scrub/phase4.c > index 49f00723..c4da4852 100644 > --- a/scrub/phase4.c > +++ b/scrub/phase4.c > @@ -107,6 +107,18 @@ bool > xfs_repair_fs( > struct scrub_ctx *ctx) > { > + bool moveon; > + > + /* > + * Check the summary counters early. Normally we do this during phase > + * seven, but some of the cross-referencing requires fairly-accurate > + * counters, so counter repairs have to be put on the list now so that > + * they get fixed before we stop retrying unfixed metadata repairs. > + */ > + moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]); > + if (!moveon) > + return false; "moveon" doesn't really make sense to me here. i.e. I can't tell if "moveon = true" meant it failed or not, so I hav eno idea what the intent of the code here is, and the comment doesn't explain it at all, either. > + > return xfs_process_action_items(ctx); > } > > diff --git a/scrub/phase7.c b/scrub/phase7.c > index 1c459dfc..b3156fdf 100644 > --- a/scrub/phase7.c > +++ b/scrub/phase7.c > @@ -7,12 +7,15 @@ > #include <stdint.h> > #include <stdlib.h> > #include <sys/statvfs.h> > +#include "list.h" > #include "path.h" > #include "ptvar.h" > #include "xfs_scrub.h" > #include "common.h" > +#include "scrub.h" > #include "fscounters.h" > #include "spacemap.h" > +#include "repair.h" > > /* Phase 7: Check summary counters. */ > > @@ -91,6 +94,7 @@ xfs_scan_summary( > struct scrub_ctx *ctx) > { > struct summary_counts totalcount = {0}; > + struct xfs_action_list alist; > struct ptvar *ptvar; > unsigned long long used_data; > unsigned long long used_rt; > @@ -110,6 +114,16 @@ xfs_scan_summary( > int ip; > int error; > > + /* Check and fix the fs summary counters. */ > + xfs_action_list_init(&alist); > + moveon = xfs_scrub_fs_summary(ctx, &alist); > + if (!moveon) > + return false; > + moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist, > + ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS); > + if (!moveon) > + return moveon; same here - "moveon" doesn't tell me if we're returning because the scrub failed or passed.... > + > /* Flush everything out to disk before we start counting. */ > error = syncfs(ctx->mnt.fd); > if (error) { > diff --git a/scrub/repair.c b/scrub/repair.c > index 45450d8c..54639752 100644 > --- a/scrub/repair.c > +++ b/scrub/repair.c > @@ -84,6 +84,9 @@ xfs_action_item_priority( > case XFS_SCRUB_TYPE_GQUOTA: > case XFS_SCRUB_TYPE_PQUOTA: > return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA); > + case XFS_SCRUB_TYPE_FSCOUNTERS: > + /* This should always go after AG headers no matter what. */ > + return PRIO(aitem, INT_MAX); > } > abort(); > } > diff --git a/scrub/scrub.c b/scrub/scrub.c > index 136ed529..a428b524 100644 > --- a/scrub/scrub.c > +++ b/scrub/scrub.c > @@ -28,6 +28,7 @@ enum scrub_type { > ST_PERAG, /* per-AG metadata */ > ST_FS, /* per-FS metadata */ > ST_INODE, /* per-inode metadata */ > + ST_SUMMARY, /* summary counters (phase 7) */ > }; Hmmm - the previous patch used ST_FS for the summary counters. Oh, wait, io/scrub.c has a duplicate scrub_type enum defined, and the table looks largely the same, too. Except now the summary type is different. /me looks a bit closer... Oh, the enum scrub_type definitions shadow the kernel enum xchk_type, but have different values for the same names. I'm just confused now... Cheers, Dave.
On Tue, Aug 27, 2019 at 03:27:26PM +1000, Dave Chinner wrote: > On Mon, Aug 26, 2019 at 02:21:09PM -0700, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Teach scrub to ask the kernel to check and repair summary counters > > during phase 7. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > > scrub/phase4.c | 12 ++++++++++++ > > scrub/phase7.c | 14 ++++++++++++++ > > scrub/repair.c | 3 +++ > > scrub/scrub.c | 13 +++++++++++++ > > scrub/scrub.h | 2 ++ > > 5 files changed, 44 insertions(+) > > > > > > diff --git a/scrub/phase4.c b/scrub/phase4.c > > index 49f00723..c4da4852 100644 > > --- a/scrub/phase4.c > > +++ b/scrub/phase4.c > > @@ -107,6 +107,18 @@ bool > > xfs_repair_fs( > > struct scrub_ctx *ctx) > > { > > + bool moveon; > > + > > + /* > > + * Check the summary counters early. Normally we do this during phase > > + * seven, but some of the cross-referencing requires fairly-accurate > > + * counters, so counter repairs have to be put on the list now so that > > + * they get fixed before we stop retrying unfixed metadata repairs. > > + */ > > + moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]); > > + if (!moveon) > > + return false; > > "moveon" doesn't really make sense to me here. i.e. I can't tell if > "moveon = true" meant it failed or not, so I hav eno idea what the > intent of the code here is, and the comment doesn't explain it at > all, either. FWIW I created Yet Another Cleanup Series that replaces all the moveon things with regular old "returns 0 for success, nonzero for error GTFO" semantics. I'll tack that on the end of all the stuff I've sent so far. --D > > + > > return xfs_process_action_items(ctx); > > } > > > > diff --git a/scrub/phase7.c b/scrub/phase7.c > > index 1c459dfc..b3156fdf 100644 > > --- a/scrub/phase7.c > > +++ b/scrub/phase7.c > > @@ -7,12 +7,15 @@ > > #include <stdint.h> > > #include <stdlib.h> > > #include <sys/statvfs.h> > > +#include "list.h" > > #include "path.h" > > #include "ptvar.h" > > #include "xfs_scrub.h" > > #include "common.h" > > +#include "scrub.h" > > #include "fscounters.h" > > #include "spacemap.h" > > +#include "repair.h" > > > > /* Phase 7: Check summary counters. */ > > > > @@ -91,6 +94,7 @@ xfs_scan_summary( > > struct scrub_ctx *ctx) > > { > > struct summary_counts totalcount = {0}; > > + struct xfs_action_list alist; > > struct ptvar *ptvar; > > unsigned long long used_data; > > unsigned long long used_rt; > > @@ -110,6 +114,16 @@ xfs_scan_summary( > > int ip; > > int error; > > > > + /* Check and fix the fs summary counters. */ > > + xfs_action_list_init(&alist); > > + moveon = xfs_scrub_fs_summary(ctx, &alist); > > + if (!moveon) > > + return false; > > + moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist, > > + ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS); > > + if (!moveon) > > + return moveon; > > same here - "moveon" doesn't tell me if we're returning because the > scrub failed or passed.... > > > + > > /* Flush everything out to disk before we start counting. */ > > error = syncfs(ctx->mnt.fd); > > if (error) { > > diff --git a/scrub/repair.c b/scrub/repair.c > > index 45450d8c..54639752 100644 > > --- a/scrub/repair.c > > +++ b/scrub/repair.c > > @@ -84,6 +84,9 @@ xfs_action_item_priority( > > case XFS_SCRUB_TYPE_GQUOTA: > > case XFS_SCRUB_TYPE_PQUOTA: > > return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA); > > + case XFS_SCRUB_TYPE_FSCOUNTERS: > > + /* This should always go after AG headers no matter what. */ > > + return PRIO(aitem, INT_MAX); > > } > > abort(); > > } > > diff --git a/scrub/scrub.c b/scrub/scrub.c > > index 136ed529..a428b524 100644 > > --- a/scrub/scrub.c > > +++ b/scrub/scrub.c > > @@ -28,6 +28,7 @@ enum scrub_type { > > ST_PERAG, /* per-AG metadata */ > > ST_FS, /* per-FS metadata */ > > ST_INODE, /* per-inode metadata */ > > + ST_SUMMARY, /* summary counters (phase 7) */ > > }; > > Hmmm - the previous patch used ST_FS for the summary counters. > > Oh, wait, io/scrub.c has a duplicate scrub_type enum defined, and > the table looks largely the same, too. Except now the summary type > is different. > > /me looks a bit closer... > > Oh, the enum scrub_type definitions shadow the kernel enum > xchk_type, but have different values for the same names. I'm > just confused now... > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com
diff --git a/scrub/phase4.c b/scrub/phase4.c index 49f00723..c4da4852 100644 --- a/scrub/phase4.c +++ b/scrub/phase4.c @@ -107,6 +107,18 @@ bool xfs_repair_fs( struct scrub_ctx *ctx) { + bool moveon; + + /* + * Check the summary counters early. Normally we do this during phase + * seven, but some of the cross-referencing requires fairly-accurate + * counters, so counter repairs have to be put on the list now so that + * they get fixed before we stop retrying unfixed metadata repairs. + */ + moveon = xfs_scrub_fs_summary(ctx, &ctx->action_lists[0]); + if (!moveon) + return false; + return xfs_process_action_items(ctx); } diff --git a/scrub/phase7.c b/scrub/phase7.c index 1c459dfc..b3156fdf 100644 --- a/scrub/phase7.c +++ b/scrub/phase7.c @@ -7,12 +7,15 @@ #include <stdint.h> #include <stdlib.h> #include <sys/statvfs.h> +#include "list.h" #include "path.h" #include "ptvar.h" #include "xfs_scrub.h" #include "common.h" +#include "scrub.h" #include "fscounters.h" #include "spacemap.h" +#include "repair.h" /* Phase 7: Check summary counters. */ @@ -91,6 +94,7 @@ xfs_scan_summary( struct scrub_ctx *ctx) { struct summary_counts totalcount = {0}; + struct xfs_action_list alist; struct ptvar *ptvar; unsigned long long used_data; unsigned long long used_rt; @@ -110,6 +114,16 @@ xfs_scan_summary( int ip; int error; + /* Check and fix the fs summary counters. */ + xfs_action_list_init(&alist); + moveon = xfs_scrub_fs_summary(ctx, &alist); + if (!moveon) + return false; + moveon = xfs_action_list_process(ctx, ctx->mnt.fd, &alist, + ALP_COMPLAIN_IF_UNFIXED | ALP_NOPROGRESS); + if (!moveon) + return moveon; + /* Flush everything out to disk before we start counting. */ error = syncfs(ctx->mnt.fd); if (error) { diff --git a/scrub/repair.c b/scrub/repair.c index 45450d8c..54639752 100644 --- a/scrub/repair.c +++ b/scrub/repair.c @@ -84,6 +84,9 @@ xfs_action_item_priority( case XFS_SCRUB_TYPE_GQUOTA: case XFS_SCRUB_TYPE_PQUOTA: return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA); + case XFS_SCRUB_TYPE_FSCOUNTERS: + /* This should always go after AG headers no matter what. */ + return PRIO(aitem, INT_MAX); } abort(); } diff --git a/scrub/scrub.c b/scrub/scrub.c index 136ed529..a428b524 100644 --- a/scrub/scrub.c +++ b/scrub/scrub.c @@ -28,6 +28,7 @@ enum scrub_type { ST_PERAG, /* per-AG metadata */ ST_FS, /* per-FS metadata */ ST_INODE, /* per-inode metadata */ + ST_SUMMARY, /* summary counters (phase 7) */ }; struct scrub_descr { const char *name; @@ -84,6 +85,8 @@ static const struct scrub_descr scrubbers[XFS_SCRUB_TYPE_NR] = { {"group quotas", ST_FS}, [XFS_SCRUB_TYPE_PQUOTA] = {"project quotas", ST_FS}, + [XFS_SCRUB_TYPE_FSCOUNTERS] = + {"filesystem summary counters", ST_SUMMARY}, }; /* Format a scrub description. */ @@ -105,6 +108,7 @@ format_scrub_descr( (uint64_t)meta->sm_ino, _(sc->name)); break; case ST_FS: + case ST_SUMMARY: snprintf(buf, buflen, _("%s"), _(sc->name)); break; case ST_NONE: @@ -446,6 +450,15 @@ xfs_scrub_fs_metadata( return xfs_scrub_metadata(ctx, ST_FS, 0, alist); } +/* Scrub FS summary metadata. */ +bool +xfs_scrub_fs_summary( + struct scrub_ctx *ctx, + struct xfs_action_list *alist) +{ + return xfs_scrub_metadata(ctx, ST_SUMMARY, 0, alist); +} + /* How many items do we have to check? */ unsigned int xfs_scrub_estimate_ag_work( diff --git a/scrub/scrub.h b/scrub/scrub.h index e6e3f16f..449c43de 100644 --- a/scrub/scrub.h +++ b/scrub/scrub.h @@ -25,6 +25,8 @@ bool xfs_scrub_ag_metadata(struct scrub_ctx *ctx, xfs_agnumber_t agno, struct xfs_action_list *alist); bool xfs_scrub_fs_metadata(struct scrub_ctx *ctx, struct xfs_action_list *alist); +bool xfs_scrub_fs_summary(struct scrub_ctx *ctx, + struct xfs_action_list *alist); bool xfs_can_scrub_fs_metadata(struct scrub_ctx *ctx); bool xfs_can_scrub_inode(struct scrub_ctx *ctx);