diff mbox series

[8/9] xfs_scrub: retry incomplete repairs

Message ID 170404999552.1797790.13157454061191356913.stgit@frogsfrogsfrogs (mailing list archive)
State New
Headers show
Series [1/9] xfs_scrub: track repair items by principal, not by individual repairs | expand

Commit Message

Darrick J. Wong Dec. 31, 2023, 10:42 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

If a repair says it didn't do anything on account of not being able to
complete a scan of the metadata, retry the repair a few times; if even
that doesn't work, we can delay it to phase 4.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 scrub/repair.c        |   15 ++++++++++++++-
 scrub/scrub.c         |    3 +--
 scrub/scrub_private.h |   10 ++++++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Jan. 5, 2024, 5:03 a.m. UTC | #1
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/scrub/repair.c b/scrub/repair.c
index 9b4b5d01626..2b863bb4195 100644
--- a/scrub/repair.c
+++ b/scrub/repair.c
@@ -58,6 +58,7 @@  xfs_repair_metadata(
 	struct xfs_scrub_metadata	oldm;
 	DEFINE_DESCR(dsc, ctx, format_scrub_descr);
 	bool				repair_only;
+	unsigned int			tries = 0;
 	int				error;
 
 	/*
@@ -99,6 +100,7 @@  xfs_repair_metadata(
 		str_info(ctx, descr_render(&dsc),
 				_("Attempting optimization."));
 
+retry:
 	error = -xfrog_scrub_metadata(xfdp, &meta);
 	switch (error) {
 	case 0:
@@ -179,9 +181,20 @@  _("Read-only filesystem; cannot make changes."));
 		return CHECK_DONE;
 	}
 
+	/*
+	 * If the kernel says the repair was incomplete or that there was a
+	 * cross-referencing discrepancy but no obvious corruption, we'll try
+	 * the repair again, just in case the fs was busy.  Only retry so many
+	 * times.
+	 */
+	if (want_retry(&meta) && tries < 10) {
+		tries++;
+		goto retry;
+	}
+
 	if (repair_flags & XRM_FINAL_WARNING)
 		scrub_warn_incomplete_scrub(ctx, &dsc, &meta);
-	if (needs_repair(&meta)) {
+	if (needs_repair(&meta) || is_incomplete(&meta)) {
 		/*
 		 * Still broken; if we've been told not to complain then we
 		 * just requeue this and try again later.  Otherwise we
diff --git a/scrub/scrub.c b/scrub/scrub.c
index 5c14ed2092e..5fc549f9728 100644
--- a/scrub/scrub.c
+++ b/scrub/scrub.c
@@ -137,8 +137,7 @@  _("Filesystem is shut down, aborting."));
 	 * we'll try the scan again, just in case the fs was busy.
 	 * Only retry so many times.
 	 */
-	if (tries < 10 && (is_incomplete(meta) ||
-			   (xref_disagrees(meta) && !is_corrupt(meta)))) {
+	if (want_retry(meta) && tries < 10) {
 		tries++;
 		goto retry;
 	}
diff --git a/scrub/scrub_private.h b/scrub/scrub_private.h
index 08b9130cbc9..53372e1f322 100644
--- a/scrub/scrub_private.h
+++ b/scrub/scrub_private.h
@@ -49,6 +49,16 @@  static inline bool needs_repair(struct xfs_scrub_metadata *sm)
 	return is_corrupt(sm) || xref_disagrees(sm);
 }
 
+/*
+ * We want to retry an operation if the kernel says it couldn't complete the
+ * scan/repair; or if there were cross-referencing problems but the object was
+ * not obviously corrupt.
+ */
+static inline bool want_retry(struct xfs_scrub_metadata *sm)
+{
+	return is_incomplete(sm) || (xref_disagrees(sm) && !is_corrupt(sm));
+}
+
 void scrub_warn_incomplete_scrub(struct scrub_ctx *ctx, struct descr *dsc,
 		struct xfs_scrub_metadata *meta);