diff mbox series

[4/6] xfs_scrub: don't try any file repairs during phase 3 if AG metadata bad

Message ID 165176688430.252160.7374263325275359962.stgit@magnolia (mailing list archive)
State Accepted
Headers show
Series xfs_scrub: small performance tweaks | expand

Commit Message

Darrick J. Wong May 5, 2022, 4:08 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Currently, phase 3 tries to repair file metadata even after phase 2
tells us that there are problems with the AG metadata.  While this
generally won't cause too many problems since the repair code will bail
out on any obvious corruptions it finds, this isn't totally foolproof.
If the filesystem space metadata are not in good shape, we want to queue
the file repairs to run /after/ the space metadata repairs in phase 4.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 scrub/phase3.c |   50 +++++++++++++++++++++++++++++++++++++++++++++-----
 scrub/repair.h |    6 ++++++
 2 files changed, 51 insertions(+), 5 deletions(-)

Comments

Eric Sandeen May 12, 2022, 9:02 p.m. UTC | #1
On 5/5/22 11:08 AM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Currently, phase 3 tries to repair file metadata even after phase 2
> tells us that there are problems with the AG metadata.  While this
> generally won't cause too many problems since the repair code will bail
> out on any obvious corruptions it finds, this isn't totally foolproof.
> If the filesystem space metadata are not in good shape, we want to queue
> the file repairs to run /after/ the space metadata repairs in phase 4.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>

Reviewed-by: Eric Sandeen <sandeen@redhat.com>
diff mbox series

Patch

diff --git a/scrub/phase3.c b/scrub/phase3.c
index d22758c1..fd8e5419 100644
--- a/scrub/phase3.c
+++ b/scrub/phase3.c
@@ -21,8 +21,16 @@ 
 /* Phase 3: Scan all inodes. */
 
 struct scrub_inode_ctx {
+	struct scrub_ctx	*ctx;
+
+	/* Number of inodes scanned. */
 	struct ptcounter	*icount;
+
+	/* Set to true to abort all threads. */
 	bool			aborted;
+
+	/* Set to true if we want to defer file repairs to phase 4. */
+	bool			always_defer_repairs;
 };
 
 /* Report a filesystem error that the vfs fed us on close. */
@@ -40,6 +48,24 @@  report_close_error(
 	str_errno(ctx, descr);
 }
 
+/* Run repair actions now and defer unfinished items for later. */
+static int
+try_inode_repair(
+	struct scrub_inode_ctx	*ictx,
+	xfs_agnumber_t		agno,
+	struct action_list	*alist)
+{
+	/*
+	 * If at the start of phase 3 we already had ag/rt metadata repairs
+	 * queued up for phase 4, leave the action list untouched so that file
+	 * metadata repairs will be deferred in scan order until phase 4.
+	 */
+	if (ictx->always_defer_repairs)
+		return 0;
+
+	return action_list_process_or_defer(ictx->ctx, agno, alist);
+}
+
 /* Verify the contents, xattrs, and extent maps of an inode. */
 static int
 scrub_inode(
@@ -91,7 +117,7 @@  scrub_inode(
 	if (error)
 		goto out;
 
-	error = action_list_process_or_defer(ctx, agno, &alist);
+	error = try_inode_repair(ictx, agno, &alist);
 	if (error)
 		goto out;
 
@@ -106,7 +132,7 @@  scrub_inode(
 	if (error)
 		goto out;
 
-	error = action_list_process_or_defer(ctx, agno, &alist);
+	error = try_inode_repair(ictx, agno, &alist);
 	if (error)
 		goto out;
 
@@ -132,7 +158,7 @@  scrub_inode(
 		goto out;
 
 	/* Try to repair the file while it's open. */
-	error = action_list_process_or_defer(ctx, agno, &alist);
+	error = try_inode_repair(ictx, agno, &alist);
 	if (error)
 		goto out;
 
@@ -147,7 +173,10 @@  scrub_inode(
 		ictx->aborted = true;
 	}
 	progress_add(1);
-	action_list_defer(ctx, agno, &alist);
+
+	if (!error && !ictx->aborted)
+		action_list_defer(ctx, agno, &alist);
+
 	if (fd >= 0) {
 		int	err2;
 
@@ -168,8 +197,9 @@  int
 phase3_func(
 	struct scrub_ctx	*ctx)
 {
-	struct scrub_inode_ctx	ictx = { NULL };
+	struct scrub_inode_ctx	ictx = { .ctx = ctx };
 	uint64_t		val;
+	xfs_agnumber_t		agno;
 	int			err;
 
 	err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
@@ -178,6 +208,16 @@  phase3_func(
 		return err;
 	}
 
+	/*
+	 * If we already have ag/fs metadata to repair from previous phases,
+	 * we would rather not try to repair file metadata until we've tried
+	 * to repair the space metadata.
+	 */
+	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
+		if (!action_list_empty(&ctx->action_lists[agno]))
+			ictx.always_defer_repairs = true;
+	}
+
 	err = scrub_scan_all_inodes(ctx, scrub_inode, &ictx);
 	if (!err && ictx.aborted)
 		err = ECANCELED;
diff --git a/scrub/repair.h b/scrub/repair.h
index 1994c50a..4261be49 100644
--- a/scrub/repair.h
+++ b/scrub/repair.h
@@ -16,6 +16,12 @@  int action_lists_alloc(size_t nr, struct action_list **listsp);
 void action_lists_free(struct action_list **listsp);
 
 void action_list_init(struct action_list *alist);
+
+static inline bool action_list_empty(const struct action_list *alist)
+{
+	return list_empty(&alist->list);
+}
+
 size_t action_list_length(struct action_list *alist);
 void action_list_add(struct action_list *dest, struct action_item *item);
 void action_list_splice(struct action_list *dest, struct action_list *src);