diff mbox series

[44/51] xfs_scrub: scrub realtime allocation group metadata

Message ID 173498944477.2297565.5337342089404023845.stgit@frogsfrogsfrogs (mailing list archive)
State New
Headers show
Series [01/51] libxfs: remove XFS_ILOCK_RT* | expand

Commit Message

Darrick J. Wong Dec. 23, 2024, 10:23 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Scan realtime group metadata as part of phase 2, just like we do for AG
metadata.  For pre-rtgroup filesystems, pretend that this is a "rtgroup
0" scrub request because the kernel expects that.  Replace the old
cond_wait code with a scrub barrier because they're equivalent for two
items that cannot be scrubbed in parallel.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 libfrog/scrub.c |    4 +-
 scrub/phase2.c  |  124 ++++++++++++++++++++++++++++++++++++++-----------------
 scrub/scrub.c   |    1 
 scrub/scrub.h   |    9 ++++
 4 files changed, 98 insertions(+), 40 deletions(-)
diff mbox series

Patch

diff --git a/libfrog/scrub.c b/libfrog/scrub.c
index 66000f1ed66be4..d40364d35ce0b4 100644
--- a/libfrog/scrub.c
+++ b/libfrog/scrub.c
@@ -107,12 +107,12 @@  const struct xfrog_scrub_descr xfrog_scrubbers[XFS_SCRUB_TYPE_NR] = {
 	[XFS_SCRUB_TYPE_RTBITMAP] = {
 		.name	= "rtbitmap",
 		.descr	= "realtime bitmap",
-		.group	= XFROG_SCRUB_GROUP_FS,
+		.group	= XFROG_SCRUB_GROUP_RTGROUP,
 	},
 	[XFS_SCRUB_TYPE_RTSUM] = {
 		.name	= "rtsummary",
 		.descr	= "realtime summary",
-		.group	= XFROG_SCRUB_GROUP_FS,
+		.group	= XFROG_SCRUB_GROUP_RTGROUP,
 	},
 	[XFS_SCRUB_TYPE_UQUOTA] = {
 		.name	= "usrquota",
diff --git a/scrub/phase2.c b/scrub/phase2.c
index c24d137358c74d..c7828c332e7c3a 100644
--- a/scrub/phase2.c
+++ b/scrub/phase2.c
@@ -21,12 +21,10 @@ 
 
 struct scan_ctl {
 	/*
-	 * Control mechanism to signal that the rt bitmap file scan is done and
-	 * wake up any waiters.
+	 * Control mechanism to signal that each group's scan of the rt bitmap
+	 * file scan is done and wake up any waiters.
 	 */
-	pthread_cond_t		rbm_wait;
-	pthread_mutex_t		rbm_waitlock;
-	bool			rbm_done;
+	unsigned int		rbm_group_count;
 
 	bool			aborted;
 };
@@ -202,7 +200,7 @@  scan_fs_metadata(
 	int			ret;
 
 	if (sctl->aborted)
-		goto out;
+		return;
 
 	/*
 	 * Try to check all of the metadata files that we just scheduled.  If
@@ -215,14 +213,14 @@  scan_fs_metadata(
 	ret = scrub_item_check(ctx, &sri);
 	if (ret) {
 		sctl->aborted = true;
-		goto out;
+		return;
 	}
 
 	ret = repair_and_scrub_loop(ctx, &sri, xfrog_scrubbers[type].descr,
 			&defer_repairs);
 	if (ret) {
 		sctl->aborted = true;
-		goto out;
+		return;
 	}
 	if (defer_repairs)
 		goto defer;
@@ -235,15 +233,60 @@  scan_fs_metadata(
 	ret = defer_fs_repair(ctx, &sri);
 	if (ret) {
 		sctl->aborted = true;
-		goto out;
+		return;
 	}
+}
 
-out:
-	if (type == XFS_SCRUB_TYPE_RTBITMAP) {
-		pthread_mutex_lock(&sctl->rbm_waitlock);
-		sctl->rbm_done = true;
-		pthread_cond_broadcast(&sctl->rbm_wait);
-		pthread_mutex_unlock(&sctl->rbm_waitlock);
+/*
+ * Scrub each rt group's metadata.  For pre-rtgroup filesystems, we ask to
+ * scrub "rtgroup 0" because that's how the kernel ioctl works.
+ */
+static void
+scan_rtgroup_metadata(
+	struct workqueue	*wq,
+	xfs_agnumber_t		rgno,
+	void			*arg)
+{
+	struct scrub_item	sri;
+	struct scrub_ctx	*ctx = (struct scrub_ctx *)wq->wq_ctx;
+	struct scan_ctl		*sctl = arg;
+	char			descr[DESCR_BUFSZ];
+	bool			defer_repairs;
+	int			ret;
+
+	if (sctl->aborted)
+		return;
+
+	scrub_item_init_rtgroup(&sri, rgno);
+	if (ctx->mnt.fsgeom.rgcount == 0)
+		snprintf(descr, DESCR_BUFSZ, _("realtime"));
+	else
+		snprintf(descr, DESCR_BUFSZ, _("rtgroup %u"), rgno);
+
+	/*
+	 * Try to check all of the rtgroup metadata items that we just
+	 * scheduled.  If we return with some types still needing a check, try
+	 * repairing any damaged metadata that we've found so far, and try
+	 * again.  Abort if we stop making forward progress.
+	 */
+	scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_RTGROUP);
+	ret = scrub_item_check(ctx, &sri);
+	if (ret) {
+		sctl->aborted = true;
+		return;
+	}
+
+	ret = repair_and_scrub_loop(ctx, &sri, descr, &defer_repairs);
+	if (ret) {
+		sctl->aborted = true;
+		return;
+	}
+
+	/* Everything else gets fixed during phase 4. */
+	ret = defer_fs_repair(ctx, &sri);
+	if (ret) {
+		sctl->aborted = true;
+		return;
 	}
 }
 
@@ -255,17 +298,14 @@  phase2_func(
 	struct workqueue	wq;
 	struct scan_ctl		sctl = {
 		.aborted	= false,
-		.rbm_done	= false,
 	};
 	struct scrub_item	sri;
 	const struct xfrog_scrub_descr *sc = xfrog_scrubbers;
 	xfs_agnumber_t		agno;
+	xfs_rgnumber_t		rgno;
 	unsigned int		type;
 	int			ret, ret2;
 
-	pthread_mutex_init(&sctl.rbm_waitlock, NULL);
-	pthread_cond_init(&sctl.rbm_wait, NULL);
-
 	ret = -workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
@@ -311,8 +351,6 @@  phase2_func(
 	for (type = 0; type < XFS_SCRUB_TYPE_NR; type++, sc++) {
 		if (sc->group != XFROG_SCRUB_GROUP_FS)
 			continue;
-		if (type == XFS_SCRUB_TYPE_RTSUM)
-			continue;
 
 		ret = -workqueue_add(&wq, scan_fs_metadata, type, &sctl);
 		if (ret) {
@@ -325,24 +363,37 @@  phase2_func(
 	if (sctl.aborted)
 		goto out_wq;
 
-	/*
-	 * Wait for the rt bitmap to finish scanning, then scan the rt summary
-	 * since the summary can be regenerated completely from the bitmap.
-	 */
-	pthread_mutex_lock(&sctl.rbm_waitlock);
-	while (!sctl.rbm_done)
-		pthread_cond_wait(&sctl.rbm_wait, &sctl.rbm_waitlock);
-	pthread_mutex_unlock(&sctl.rbm_waitlock);
+	if (ctx->mnt.fsgeom.rgcount == 0) {
+		/*
+		 * When rtgroups were added, the bitmap and summary files
+		 * became per-rtgroup metadata so the scrub interface for the
+		 * two started to accept sm_agno.  For pre-rtgroups
+		 * filesystems, we still accept sm_agno==0, so invoke scrub in
+		 * this manner.
+		 */
+		ret = -workqueue_add(&wq, scan_rtgroup_metadata, 0, &sctl);
+		if (ret) {
+			str_liberror(ctx, ret,
+					_("queueing realtime scrub work"));
+			goto out_wq;
+		}
+	}
+
+	/* Scan each rtgroup in parallel. */
+	for (rgno = 0;
+	     rgno < ctx->mnt.fsgeom.rgcount && !sctl.aborted;
+	     rgno++) {
+		ret = -workqueue_add(&wq, scan_rtgroup_metadata, rgno, &sctl);
+		if (ret) {
+			str_liberror(ctx, ret,
+					_("queueing rtgroup scrub work"));
+			goto out_wq;
+		}
+	}
 
 	if (sctl.aborted)
 		goto out_wq;
 
-	ret = -workqueue_add(&wq, scan_fs_metadata, XFS_SCRUB_TYPE_RTSUM, &sctl);
-	if (ret) {
-		str_liberror(ctx, ret, _("queueing rtsummary scrub work"));
-		goto out_wq;
-	}
-
 out_wq:
 	ret2 = -workqueue_terminate(&wq);
 	if (ret2) {
@@ -352,9 +403,6 @@  phase2_func(
 	}
 	workqueue_destroy(&wq);
 out_wait:
-	pthread_cond_destroy(&sctl.rbm_wait);
-	pthread_mutex_destroy(&sctl.rbm_waitlock);
-
 	if (!ret && sctl.aborted)
 		ret = ECANCELED;
 	return ret;
diff --git a/scrub/scrub.c b/scrub/scrub.c
index a2fd8d77d82be0..de687af687d32d 100644
--- a/scrub/scrub.c
+++ b/scrub/scrub.c
@@ -50,6 +50,7 @@  static const unsigned int scrub_deps[XFS_SCRUB_TYPE_NR] = {
 	[XFS_SCRUB_TYPE_QUOTACHECK]	= DEP(XFS_SCRUB_TYPE_UQUOTA) |
 					  DEP(XFS_SCRUB_TYPE_GQUOTA) |
 					  DEP(XFS_SCRUB_TYPE_PQUOTA),
+	[XFS_SCRUB_TYPE_RTSUM]		= DEP(XFS_SCRUB_TYPE_RTBITMAP),
 };
 #undef DEP
 
diff --git a/scrub/scrub.h b/scrub/scrub.h
index 3bb3ea1d07bf40..bb94a11dcfce71 100644
--- a/scrub/scrub.h
+++ b/scrub/scrub.h
@@ -90,6 +90,15 @@  scrub_item_init_ag(struct scrub_item *sri, xfs_agnumber_t agno)
 	sri->sri_gen = -1U;
 }
 
+static inline void
+scrub_item_init_rtgroup(struct scrub_item *sri, xfs_rgnumber_t rgno)
+{
+	memset(sri, 0, sizeof(*sri));
+	sri->sri_agno = rgno;
+	sri->sri_ino = -1ULL;
+	sri->sri_gen = -1U;
+}
+
 static inline void
 scrub_item_init_fs(struct scrub_item *sri)
 {