diff mbox series

[3/4] xfs: disable reaping in fscounters scrub

Message ID 168263577739.1719564.16150152466509865245.stgit@frogsfrogsfrogs (mailing list archive)
State Superseded, archived
Headers show
Series xfs: inodegc fixes for 6.4-rc1 | expand

Commit Message

Darrick J. Wong April 27, 2023, 10:49 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

The fscounters scrub code doesn't work properly because it cannot
quiesce updates to the percpu counters in the filesystem, hence it
returns false corruption reports.  This has been fixed properly in
one of the online repair patchsets that are under review by replacing
the xchk_disable_reaping calls with an exclusive filesystem freeze.
Disabling background gc isn't sufficient to fix the problem.

In other words, scrub doesn't need to call xfs_inodegc_stop, which is
just as well since it wasn't correct to allow scrub to call
xfs_inodegc_start when something else could be calling xfs_inodegc_stop
(e.g. trying to freeze the filesystem).

Neuter the scrubber for now, and remove the xchk_*_reaping functions.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/scrub/common.c     |   26 --------------------------
 fs/xfs/scrub/common.h     |    2 --
 fs/xfs/scrub/fscounters.c |   10 +++-------
 fs/xfs/scrub/scrub.c      |    2 --
 fs/xfs/scrub/scrub.h      |    1 -
 fs/xfs/scrub/trace.h      |    1 -
 6 files changed, 3 insertions(+), 39 deletions(-)

Comments

Dave Chinner April 28, 2023, 2:20 a.m. UTC | #1
On Thu, Apr 27, 2023 at 03:49:37PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> The fscounters scrub code doesn't work properly because it cannot
> quiesce updates to the percpu counters in the filesystem, hence it
> returns false corruption reports.  This has been fixed properly in
> one of the online repair patchsets that are under review by replacing
> the xchk_disable_reaping calls with an exclusive filesystem freeze.
> Disabling background gc isn't sufficient to fix the problem.
> 
> In other words, scrub doesn't need to call xfs_inodegc_stop, which is
> just as well since it wasn't correct to allow scrub to call
> xfs_inodegc_start when something else could be calling xfs_inodegc_stop
> (e.g. trying to freeze the filesystem).
> 
> Neuter the scrubber for now, and remove the xchk_*_reaping functions.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>

Looks ok, minor nit below.

> @@ -453,6 +446,9 @@ xchk_fscounters(
>  	if (frextents > mp->m_sb.sb_rextents)
>  		xchk_set_corrupt(sc);
>  
> +	/* XXX: We can't quiesce percpu counter updates, so exit early. */
> +	return 0;

Can you just add to this that we can re-enable this functionality
when we have the exclusive freeze functionality in the kernel?

With that,

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Darrick J. Wong April 28, 2023, 4:28 a.m. UTC | #2
On Fri, Apr 28, 2023 at 12:20:41PM +1000, Dave Chinner wrote:
> On Thu, Apr 27, 2023 at 03:49:37PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > The fscounters scrub code doesn't work properly because it cannot
> > quiesce updates to the percpu counters in the filesystem, hence it
> > returns false corruption reports.  This has been fixed properly in
> > one of the online repair patchsets that are under review by replacing
> > the xchk_disable_reaping calls with an exclusive filesystem freeze.
> > Disabling background gc isn't sufficient to fix the problem.
> > 
> > In other words, scrub doesn't need to call xfs_inodegc_stop, which is
> > just as well since it wasn't correct to allow scrub to call
> > xfs_inodegc_start when something else could be calling xfs_inodegc_stop
> > (e.g. trying to freeze the filesystem).
> > 
> > Neuter the scrubber for now, and remove the xchk_*_reaping functions.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> 
> Looks ok, minor nit below.
> 
> > @@ -453,6 +446,9 @@ xchk_fscounters(
> >  	if (frextents > mp->m_sb.sb_rextents)
> >  		xchk_set_corrupt(sc);
> >  
> > +	/* XXX: We can't quiesce percpu counter updates, so exit early. */
> > +	return 0;
> 
> Can you just add to this that we can re-enable this functionality
> when we have the exclusive freeze functionality in the kernel?

Will do.

--D

> With that,
> 
> Reviewed-by: Dave Chinner <dchinner@redhat.com>
> 
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 9aa79665c608..7a20256be969 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1164,32 +1164,6 @@  xchk_metadata_inode_forks(
 	return 0;
 }
 
-/* Pause background reaping of resources. */
-void
-xchk_stop_reaping(
-	struct xfs_scrub	*sc)
-{
-	sc->flags |= XCHK_REAPING_DISABLED;
-	xfs_blockgc_stop(sc->mp);
-	xfs_inodegc_stop(sc->mp);
-}
-
-/* Restart background reaping of resources. */
-void
-xchk_start_reaping(
-	struct xfs_scrub	*sc)
-{
-	/*
-	 * Readonly filesystems do not perform inactivation or speculative
-	 * preallocation, so there's no need to restart the workers.
-	 */
-	if (!xfs_is_readonly(sc->mp)) {
-		xfs_inodegc_start(sc->mp);
-		xfs_blockgc_start(sc->mp);
-	}
-	sc->flags &= ~XCHK_REAPING_DISABLED;
-}
-
 /*
  * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
  * operation.  Callers must not hold any locks that intersect with the CPU
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 18b5f2b62f13..791235cd9b00 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -156,8 +156,6 @@  static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
 }
 
 int xchk_metadata_inode_forks(struct xfs_scrub *sc);
-void xchk_stop_reaping(struct xfs_scrub *sc);
-void xchk_start_reaping(struct xfs_scrub *sc);
 
 /*
  * Setting up a hook to wait for intents to drain is costly -- we have to take
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index faa315be7978..0d90d00de770 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -150,13 +150,6 @@  xchk_setup_fscounters(
 	if (error)
 		return error;
 
-	/*
-	 * Pause background reclaim while we're scrubbing to reduce the
-	 * likelihood of background perturbations to the counters throwing off
-	 * our calculations.
-	 */
-	xchk_stop_reaping(sc);
-
 	return xchk_trans_alloc(sc, 0);
 }
 
@@ -453,6 +446,9 @@  xchk_fscounters(
 	if (frextents > mp->m_sb.sb_rextents)
 		xchk_set_corrupt(sc);
 
+	/* XXX: We can't quiesce percpu counter updates, so exit early. */
+	return 0;
+
 	/*
 	 * If ifree exceeds icount by more than the minimum variance then
 	 * something's probably wrong with the counters.
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 02819bedc5b1..3d98f604765e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -186,8 +186,6 @@  xchk_teardown(
 	}
 	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
 		mnt_drop_write_file(sc->file);
-	if (sc->flags & XCHK_REAPING_DISABLED)
-		xchk_start_reaping(sc);
 	if (sc->buf) {
 		if (sc->buf_cleanup)
 			sc->buf_cleanup(sc->buf);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e71903474cd7..b38e93830dde 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -106,7 +106,6 @@  struct xfs_scrub {
 
 /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
 #define XCHK_TRY_HARDER		(1 << 0)  /* can't get resources, try again */
-#define XCHK_REAPING_DISABLED	(1 << 1)  /* background block reaping paused */
 #define XCHK_FSGATES_DRAIN	(1 << 2)  /* defer ops draining enabled */
 #define XCHK_NEED_DRAIN		(1 << 3)  /* scrub needs to drain defer ops */
 #define XREP_ALREADY_FIXED	(1 << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 68efd6fda61c..b3894daeb86a 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -98,7 +98,6 @@  TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
 
 #define XFS_SCRUB_STATE_STRINGS \
 	{ XCHK_TRY_HARDER,			"try_harder" }, \
-	{ XCHK_REAPING_DISABLED,		"reaping_disabled" }, \
 	{ XCHK_FSGATES_DRAIN,			"fsgates_drain" }, \
 	{ XCHK_NEED_DRAIN,			"need_drain" }, \
 	{ XREP_ALREADY_FIXED,			"already_fixed" }