Message ID | 168263577739.1719564.16150152466509865245.stgit@frogsfrogsfrogs (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: inodegc fixes for 6.4-rc1 | expand |
On Thu, Apr 27, 2023 at 03:49:37PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > The fscounters scrub code doesn't work properly because it cannot > quiesce updates to the percpu counters in the filesystem, hence it > returns false corruption reports. This has been fixed properly in > one of the online repair patchsets that are under review by replacing > the xchk_disable_reaping calls with an exclusive filesystem freeze. > Disabling background gc isn't sufficient to fix the problem. > > In other words, scrub doesn't need to call xfs_inodegc_stop, which is > just as well since it wasn't correct to allow scrub to call > xfs_inodegc_start when something else could be calling xfs_inodegc_stop > (e.g. trying to freeze the filesystem). > > Neuter the scrubber for now, and remove the xchk_*_reaping functions. > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> Looks ok, minor nit below. > @@ -453,6 +446,9 @@ xchk_fscounters( > if (frextents > mp->m_sb.sb_rextents) > xchk_set_corrupt(sc); > > + /* XXX: We can't quiesce percpu counter updates, so exit early. */ > + return 0; Can you just add to this that we can re-enable this functionality when we have the exclusive freeze functionality in the kernel? With that, Reviewed-by: Dave Chinner <dchinner@redhat.com>
On Fri, Apr 28, 2023 at 12:20:41PM +1000, Dave Chinner wrote: > On Thu, Apr 27, 2023 at 03:49:37PM -0700, Darrick J. Wong wrote: > > From: Darrick J. Wong <djwong@kernel.org> > > > > The fscounters scrub code doesn't work properly because it cannot > > quiesce updates to the percpu counters in the filesystem, hence it > > returns false corruption reports. This has been fixed properly in > > one of the online repair patchsets that are under review by replacing > > the xchk_disable_reaping calls with an exclusive filesystem freeze. > > Disabling background gc isn't sufficient to fix the problem. > > > > In other words, scrub doesn't need to call xfs_inodegc_stop, which is > > just as well since it wasn't correct to allow scrub to call > > xfs_inodegc_start when something else could be calling xfs_inodegc_stop > > (e.g. trying to freeze the filesystem). > > > > Neuter the scrubber for now, and remove the xchk_*_reaping functions. > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > Looks ok, minor nit below. > > > @@ -453,6 +446,9 @@ xchk_fscounters( > > if (frextents > mp->m_sb.sb_rextents) > > xchk_set_corrupt(sc); > > > > + /* XXX: We can't quiesce percpu counter updates, so exit early. */ > > + return 0; > > Can you just add to this that we can re-enable this functionality > when we have the exclusive freeze functionality in the kernel? Will do. --D > With that, > > Reviewed-by: Dave Chinner <dchinner@redhat.com> > > -- > Dave Chinner > david@fromorbit.com
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 9aa79665c608..7a20256be969 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks( return 0; } -/* Pause background reaping of resources. */ -void -xchk_stop_reaping( - struct xfs_scrub *sc) -{ - sc->flags |= XCHK_REAPING_DISABLED; - xfs_blockgc_stop(sc->mp); - xfs_inodegc_stop(sc->mp); -} - -/* Restart background reaping of resources. */ -void -xchk_start_reaping( - struct xfs_scrub *sc) -{ - /* - * Readonly filesystems do not perform inactivation or speculative - * preallocation, so there's no need to restart the workers. - */ - if (!xfs_is_readonly(sc->mp)) { - xfs_inodegc_start(sc->mp); - xfs_blockgc_start(sc->mp); - } - sc->flags &= ~XCHK_REAPING_DISABLED; -} - /* * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub * operation. Callers must not hold any locks that intersect with the CPU diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 18b5f2b62f13..791235cd9b00 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) } int xchk_metadata_inode_forks(struct xfs_scrub *sc); -void xchk_stop_reaping(struct xfs_scrub *sc); -void xchk_start_reaping(struct xfs_scrub *sc); /* * Setting up a hook to wait for intents to drain is costly -- we have to take diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index faa315be7978..0d90d00de770 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -150,13 +150,6 @@ xchk_setup_fscounters( if (error) return error; - /* - * Pause background reclaim while we're scrubbing to reduce the - * likelihood of background perturbations to the counters throwing off - * our calculations. - */ - xchk_stop_reaping(sc); - return xchk_trans_alloc(sc, 0); } @@ -453,6 +446,9 @@ xchk_fscounters( if (frextents > mp->m_sb.sb_rextents) xchk_set_corrupt(sc); + /* XXX: We can't quiesce percpu counter updates, so exit early. */ + return 0; + /* * If ifree exceeds icount by more than the minimum variance then * something's probably wrong with the counters. diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 02819bedc5b1..3d98f604765e 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -186,8 +186,6 @@ xchk_teardown( } if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) mnt_drop_write_file(sc->file); - if (sc->flags & XCHK_REAPING_DISABLED) - xchk_start_reaping(sc); if (sc->buf) { if (sc->buf_cleanup) sc->buf_cleanup(sc->buf); diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index e71903474cd7..b38e93830dde 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -106,7 +106,6 @@ struct xfs_scrub { /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ #define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ -#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */ #define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */ #define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 68efd6fda61c..b3894daeb86a 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); #define XFS_SCRUB_STATE_STRINGS \ { XCHK_TRY_HARDER, "try_harder" }, \ - { XCHK_REAPING_DISABLED, "reaping_disabled" }, \ { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ { XCHK_NEED_DRAIN, "need_drain" }, \ { XREP_ALREADY_FIXED, "already_fixed" }