diff mbox

[09/25] xfs: scrub the backup superblocks

Message ID 150706330624.19351.11752175372525617518.stgit@magnolia (mailing list archive)
State New, archived
Headers show

Commit Message

Darrick J. Wong Oct. 3, 2017, 8:41 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Ensure that the geometry presented in the backup superblocks matches
the primary superblock so that repair can recover the filesystem if
that primary gets corrupted.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/Makefile         |    1 
 fs/xfs/libxfs/xfs_fs.h  |    3 
 fs/xfs/scrub/agheader.c |  317 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/common.h   |    2 
 fs/xfs/scrub/scrub.c    |    4 +
 fs/xfs/scrub/scrub.h    |    1 
 6 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 fs/xfs/scrub/agheader.c



--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dave Chinner Oct. 4, 2017, 12:57 a.m. UTC | #1
On Tue, Oct 03, 2017 at 01:41:46PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Ensure that the geometry presented in the backup superblocks matches
> the primary superblock so that repair can recover the filesystem if
> that primary gets corrupted.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/Makefile         |    1 
>  fs/xfs/libxfs/xfs_fs.h  |    3 
>  fs/xfs/scrub/agheader.c |  317 +++++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/scrub/common.h   |    2 
>  fs/xfs/scrub/scrub.c    |    4 +
>  fs/xfs/scrub/scrub.h    |    1 
>  6 files changed, 327 insertions(+), 1 deletion(-)
>  create mode 100644 fs/xfs/scrub/agheader.c
> 
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 5888b9f..e92d04d 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -146,6 +146,7 @@ ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
>  
>  xfs-y				+= $(addprefix scrub/, \
>  				   trace.o \
> +				   agheader.o \
>  				   btree.o \
>  				   common.o \
>  				   scrub.o \
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index 765f91e..8543cbb 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -484,9 +484,10 @@ struct xfs_scrub_metadata {
>  
>  /* Scrub subcommands. */
>  #define XFS_SCRUB_TYPE_PROBE	0	/* presence test ioctl */
> +#define XFS_SCRUB_TYPE_SB	1	/* superblock */
>  
>  /* Number of scrub subcommands. */
> -#define XFS_SCRUB_TYPE_NR	1
> +#define XFS_SCRUB_TYPE_NR	2
>  
>  /* i: Repair this metadata. */
>  #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
> diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
> new file mode 100644
> index 0000000..487c4f4
> --- /dev/null
> +++ b/fs/xfs/scrub/agheader.c
> @@ -0,0 +1,317 @@
> +/*
> + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_btree.h"
> +#include "xfs_bit.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_sb.h"
> +#include "xfs_inode.h"
> +#include "scrub/xfs_scrub.h"
> +#include "scrub/scrub.h"
> +#include "scrub/common.h"
> +#include "scrub/trace.h"
> +
> +/* Set us up to check an AG header. */
> +int
> +xfs_scrub_setup_ag_header(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_inode		*ip)
> +{

Not immediately clear what "AG header" is being set up here?

> +	struct xfs_mount		*mp = sc->mp;
> +
> +	if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
> +	    sc->sm->sm_ino || sc->sm->sm_gen)
> +		return -EINVAL;
> +	return xfs_scrub_setup_fs(sc, ip);
> +}
> +
> +/* Superblock */
> +
> +/* Scrub the filesystem superblock. */
> +int
> +xfs_scrub_superblock(
> +	struct xfs_scrub_context	*sc)
> +{
> +	struct xfs_mount		*mp = sc->mp;
> +	struct xfs_buf			*bp;
> +	struct xfs_dsb			*sb;
> +	xfs_agnumber_t			agno;
> +	uint32_t			v2_ok;
> +	__be32				features_mask;
> +	int				error;
> +	__be16				vernum_mask;
> +
> +	agno = sc->sm->sm_agno;
> +	if (agno == 0)
> +		return 0;
> +
> +	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
> +		  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
> +		  XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
> +	if (!xfs_scrub_op_ok(sc, agno, XFS_SB_BLOCK(mp), &error))
> +		return error;

Might be worth a comment to say the verifier is doing validity/range
checks of the on-disk fields so they aren't duplicated here. I took
a little while to work out why range checks weren't being done
here...

> +
> +	sb = XFS_BUF_TO_SBP(bp);
> +
> +	/*
> +	 * Verify the geometries match.  Fields that are permanently
> +	 * set by mkfs are checked; fields that can be updated later
> +	 * (and are not propagated to backup superblocks) are preen
> +	 * checked.
> +	 */
> +	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
> +		xfs_scrub_block_set_corrupt(sc, bp);
> +
> +	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
> +		xfs_scrub_block_set_corrupt(sc, bp);

Just wondering - once we've set the corrupt flag, do we need to
bother checking any of the other fields? It makes no difference to
what is reported to userspace or the action it is going to take,
so couldn't we just do something like:

	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
		goto out_corrupt;

.....
out_corrupt:
	xfs_scrub_block_set_corrupt(sc, bp);
	return 0;

Cheers,

Dave.
Darrick J. Wong Oct. 4, 2017, 4:06 a.m. UTC | #2
On Wed, Oct 04, 2017 at 11:57:09AM +1100, Dave Chinner wrote:
> On Tue, Oct 03, 2017 at 01:41:46PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Ensure that the geometry presented in the backup superblocks matches
> > the primary superblock so that repair can recover the filesystem if
> > that primary gets corrupted.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/Makefile         |    1 
> >  fs/xfs/libxfs/xfs_fs.h  |    3 
> >  fs/xfs/scrub/agheader.c |  317 +++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/scrub/common.h   |    2 
> >  fs/xfs/scrub/scrub.c    |    4 +
> >  fs/xfs/scrub/scrub.h    |    1 
> >  6 files changed, 327 insertions(+), 1 deletion(-)
> >  create mode 100644 fs/xfs/scrub/agheader.c
> > 
> > 
> > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> > index 5888b9f..e92d04d 100644
> > --- a/fs/xfs/Makefile
> > +++ b/fs/xfs/Makefile
> > @@ -146,6 +146,7 @@ ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> >  
> >  xfs-y				+= $(addprefix scrub/, \
> >  				   trace.o \
> > +				   agheader.o \
> >  				   btree.o \
> >  				   common.o \
> >  				   scrub.o \
> > diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> > index 765f91e..8543cbb 100644
> > --- a/fs/xfs/libxfs/xfs_fs.h
> > +++ b/fs/xfs/libxfs/xfs_fs.h
> > @@ -484,9 +484,10 @@ struct xfs_scrub_metadata {
> >  
> >  /* Scrub subcommands. */
> >  #define XFS_SCRUB_TYPE_PROBE	0	/* presence test ioctl */
> > +#define XFS_SCRUB_TYPE_SB	1	/* superblock */
> >  
> >  /* Number of scrub subcommands. */
> > -#define XFS_SCRUB_TYPE_NR	1
> > +#define XFS_SCRUB_TYPE_NR	2
> >  
> >  /* i: Repair this metadata. */
> >  #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
> > diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
> > new file mode 100644
> > index 0000000..487c4f4
> > --- /dev/null
> > +++ b/fs/xfs/scrub/agheader.c
> > @@ -0,0 +1,317 @@
> > +/*
> > + * Copyright (C) 2017 Oracle.  All Rights Reserved.
> > + *
> > + * Author: Darrick J. Wong <darrick.wong@oracle.com>
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version 2
> > + * of the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it would be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write the Free Software Foundation,
> > + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> > + */
> > +#include "xfs.h"
> > +#include "xfs_fs.h"
> > +#include "xfs_shared.h"
> > +#include "xfs_format.h"
> > +#include "xfs_trans_resv.h"
> > +#include "xfs_mount.h"
> > +#include "xfs_defer.h"
> > +#include "xfs_btree.h"
> > +#include "xfs_bit.h"
> > +#include "xfs_log_format.h"
> > +#include "xfs_trans.h"
> > +#include "xfs_sb.h"
> > +#include "xfs_inode.h"
> > +#include "scrub/xfs_scrub.h"
> > +#include "scrub/scrub.h"
> > +#include "scrub/common.h"
> > +#include "scrub/trace.h"
> > +
> > +/* Set us up to check an AG header. */
> > +int
> > +xfs_scrub_setup_ag_header(
> > +	struct xfs_scrub_context	*sc,
> > +	struct xfs_inode		*ip)
> > +{
> 
> Not immediately clear what "AG header" is being set up here?

AGF/AGFL/AGI.  All three of them.  Maybe I ought to split them into
three separate files...?

> 
> > +	struct xfs_mount		*mp = sc->mp;
> > +
> > +	if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
> > +	    sc->sm->sm_ino || sc->sm->sm_gen)
> > +		return -EINVAL;
> > +	return xfs_scrub_setup_fs(sc, ip);
> > +}
> > +
> > +/* Superblock */
> > +
> > +/* Scrub the filesystem superblock. */
> > +int
> > +xfs_scrub_superblock(
> > +	struct xfs_scrub_context	*sc)
> > +{
> > +	struct xfs_mount		*mp = sc->mp;
> > +	struct xfs_buf			*bp;
> > +	struct xfs_dsb			*sb;
> > +	xfs_agnumber_t			agno;
> > +	uint32_t			v2_ok;
> > +	__be32				features_mask;
> > +	int				error;
> > +	__be16				vernum_mask;
> > +
> > +	agno = sc->sm->sm_agno;
> > +	if (agno == 0)
> > +		return 0;
> > +
> > +	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
> > +		  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
> > +		  XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
> > +	if (!xfs_scrub_op_ok(sc, agno, XFS_SB_BLOCK(mp), &error))
> > +		return error;
> 
> Might be worth a comment to say the verifier is doing validity/range
> checks of the on-disk fields so they aren't duplicated here. I took
> a little while to work out why range checks weren't being done
> here...

Ok.

> > +
> > +	sb = XFS_BUF_TO_SBP(bp);
> > +
> > +	/*
> > +	 * Verify the geometries match.  Fields that are permanently
> > +	 * set by mkfs are checked; fields that can be updated later
> > +	 * (and are not propagated to backup superblocks) are preen
> > +	 * checked.
> > +	 */
> > +	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
> > +		xfs_scrub_block_set_corrupt(sc, bp);
> > +
> > +	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
> > +		xfs_scrub_block_set_corrupt(sc, bp);
> 
> Just wondering - once we've set the corrupt flag, do we need to
> bother checking any of the other fields? It makes no difference to
> what is reported to userspace or the action it is going to take,
> so couldn't we just do something like:

This is something I've also struggled with for quite a while.  The most
pragmatic reaction is to set the corrupt flag and jump out immediately
on any failure since we really only care about whether or not we have to
react to bad metadata either by fixing it or shutting down.

On the other hand, continuing with the checks gives us the ability to
report /everything/ that's broken in the data structure, which could be
useful for online forensics (cough) to correlate scrub's report against
anything else that has popped up in dmesg.

A downside of having everything jump to a single call to
xfs_scrub_block_set_corrupt at the end of the function is that the
return address that we record in the tracepoint will be the end of the
function instead of right after the failing check.

(Turning on the ludicrous speed optimizer might do that anyway...)

--D

> 	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
> 		goto out_corrupt;
> 
> .....
> out_corrupt:
> 	xfs_scrub_block_set_corrupt(sc, bp);
> 	return 0;
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner Oct. 4, 2017, 6:13 a.m. UTC | #3
On Tue, Oct 03, 2017 at 09:06:46PM -0700, Darrick J. Wong wrote:
> On Wed, Oct 04, 2017 at 11:57:09AM +1100, Dave Chinner wrote:
> > On Tue, Oct 03, 2017 at 01:41:46PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Ensure that the geometry presented in the backup superblocks matches
> > > the primary superblock so that repair can recover the filesystem if
> > > that primary gets corrupted.
.....

> > > +
> > > +/* Set us up to check an AG header. */
> > > +int
> > > +xfs_scrub_setup_ag_header(
> > > +	struct xfs_scrub_context	*sc,
> > > +	struct xfs_inode		*ip)
> > > +{
> > 
> > Not immediately clear what "AG header" is being set up here?
> 
> AGF/AGFL/AGI.  All three of them.  Maybe I ought to split them into
> three separate files...?

No, just clarify the comment.

/*
 * Set up scrub to check all the static metadata in each AG. These
 * are the SB, AGF, AGI and AGFL header structures.
 */

> > > +	sb = XFS_BUF_TO_SBP(bp);
> > > +
> > > +	/*
> > > +	 * Verify the geometries match.  Fields that are permanently
> > > +	 * set by mkfs are checked; fields that can be updated later
> > > +	 * (and are not propagated to backup superblocks) are preen
> > > +	 * checked.
> > > +	 */
> > > +	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
> > > +		xfs_scrub_block_set_corrupt(sc, bp);
> > > +
> > > +	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
> > > +		xfs_scrub_block_set_corrupt(sc, bp);
> > 
> > Just wondering - once we've set the corrupt flag, do we need to
> > bother checking any of the other fields? It makes no difference to
> > what is reported to userspace or the action it is going to take,
> > so couldn't we just do something like:
> 
> This is something I've also struggled with for quite a while.  The most
> pragmatic reaction is to set the corrupt flag and jump out immediately
> on any failure since we really only care about whether or not we have to
> react to bad metadata either by fixing it or shutting down.

*nod*

> On the other hand, continuing with the checks gives us the ability to
> report /everything/ that's broken in the data structure, which could be
> useful for online forensics (cough) to correlate scrub's report against
> anything else that has popped up in dmesg.

Report where, exactly? The only detailed report we get out of this
is tracepoint information, isn't it? And we'll have to convert the
return address in the tracepoint to a line number to work out what
actually was reported as corrupt. I really can't see myself spending
the time to do that for every corruption in a single structure. Once
I know the structure is corrupt, I don't care about other
corruptions I just want to move on to repair.

IMO, scrub is for detecting errors so they can be repaired or
analysed, not for doing fault analysis. For actual forensics work
we'll still be using xfs_db - analysis processes that require manual
decoding of tracepoints, structures and/or error reports is just not
going to be efficient or usuable by the average developer....

> A downside of having everything jump to a single call to
> xfs_scrub_block_set_corrupt at the end of the function is that the
> return address that we record in the tracepoint will be the end of the
> function instead of right after the failing check.

That's the same optimisation issue we solved for the verifiers
tracing, right?

Cheers,

Dave.
Darrick J. Wong Oct. 4, 2017, 5:56 p.m. UTC | #4
On Wed, Oct 04, 2017 at 05:13:00PM +1100, Dave Chinner wrote:
> On Tue, Oct 03, 2017 at 09:06:46PM -0700, Darrick J. Wong wrote:
> > On Wed, Oct 04, 2017 at 11:57:09AM +1100, Dave Chinner wrote:
> > > On Tue, Oct 03, 2017 at 01:41:46PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Ensure that the geometry presented in the backup superblocks matches
> > > > the primary superblock so that repair can recover the filesystem if
> > > > that primary gets corrupted.
> .....
> 
> > > > +
> > > > +/* Set us up to check an AG header. */
> > > > +int
> > > > +xfs_scrub_setup_ag_header(
> > > > +	struct xfs_scrub_context	*sc,
> > > > +	struct xfs_inode		*ip)
> > > > +{
> > > 
> > > Not immediately clear what "AG header" is being set up here?
> > 
> > AGF/AGFL/AGI.  All three of them.  Maybe I ought to split them into
> > three separate files...?
> 
> No, just clarify the comment.
> 
> /*
>  * Set up scrub to check all the static metadata in each AG. These
>  * are the SB, AGF, AGI and AGFL header structures.
>  */
> 
> > > > +	sb = XFS_BUF_TO_SBP(bp);
> > > > +
> > > > +	/*
> > > > +	 * Verify the geometries match.  Fields that are permanently
> > > > +	 * set by mkfs are checked; fields that can be updated later
> > > > +	 * (and are not propagated to backup superblocks) are preen
> > > > +	 * checked.
> > > > +	 */
> > > > +	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
> > > > +		xfs_scrub_block_set_corrupt(sc, bp);
> > > > +
> > > > +	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
> > > > +		xfs_scrub_block_set_corrupt(sc, bp);
> > > 
> > > Just wondering - once we've set the corrupt flag, do we need to
> > > bother checking any of the other fields? It makes no difference to
> > > what is reported to userspace or the action it is going to take,
> > > so couldn't we just do something like:
> > 
> > This is something I've also struggled with for quite a while.  The most
> > pragmatic reaction is to set the corrupt flag and jump out immediately
> > on any failure since we really only care about whether or not we have to
> > react to bad metadata either by fixing it or shutting down.
> 
> *nod*
> 
> > On the other hand, continuing with the checks gives us the ability to
> > report /everything/ that's broken in the data structure, which could be
> > useful for online forensics (cough) to correlate scrub's report against
> > anything else that has popped up in dmesg.
> 
> Report where, exactly? The only detailed report we get out of this
> is tracepoint information, isn't it? And we'll have to convert the
> return address in the tracepoint to a line number to work out what
> actually was reported as corrupt. I really can't see myself spending
> the time to do that for every corruption in a single structure. Once
> I know the structure is corrupt, I don't care about other
> corruptions I just want to move on to repair.
> 
> IMO, scrub is for detecting errors so they can be repaired or
> analysed, not for doing fault analysis. For actual forensics work
> we'll still be using xfs_db - analysis processes that require manual
> decoding of tracepoints, structures and/or error reports is just not
> going to be efficient or usuable by the average developer....
> 
> > A downside of having everything jump to a single call to
> > xfs_scrub_block_set_corrupt at the end of the function is that the
> > return address that we record in the tracepoint will be the end of the
> > function instead of right after the failing check.
> 
> That's the same optimisation issue we solved for the verifiers
> tracing, right?

Not quite.  For the optimizers we adopted:

#define __this_address   ({ __label__ __here; __here: asm volatile(""); &&__here; })

(The asm volatile("") piece will (so far as I can tell) prevent the
optimizer from moving the label around within the verifier functions.)

Whereas for scrub we just use __return_address, which is a gcc-ism which
doesn't disable reorganization optimizations.

Granted I guess I could rework all those little helpers to take (void *)
and then stuff in __this_address...

--D

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5888b9f..e92d04d 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -146,6 +146,7 @@  ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
 
 xfs-y				+= $(addprefix scrub/, \
 				   trace.o \
+				   agheader.o \
 				   btree.o \
 				   common.o \
 				   scrub.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 765f91e..8543cbb 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -484,9 +484,10 @@  struct xfs_scrub_metadata {
 
 /* Scrub subcommands. */
 #define XFS_SCRUB_TYPE_PROBE	0	/* presence test ioctl */
+#define XFS_SCRUB_TYPE_SB	1	/* superblock */
 
 /* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR	1
+#define XFS_SCRUB_TYPE_NR	2
 
 /* i: Repair this metadata. */
 #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
new file mode 100644
index 0000000..487c4f4
--- /dev/null
+++ b/fs/xfs/scrub/agheader.c
@@ -0,0 +1,317 @@ 
+/*
+ * Copyright (C) 2017 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+
+/* Set us up to check an AG header. */
+int
+xfs_scrub_setup_ag_header(
+	struct xfs_scrub_context	*sc,
+	struct xfs_inode		*ip)
+{
+	struct xfs_mount		*mp = sc->mp;
+
+	if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
+	    sc->sm->sm_ino || sc->sm->sm_gen)
+		return -EINVAL;
+	return xfs_scrub_setup_fs(sc, ip);
+}
+
+/* Superblock */
+
+/* Scrub the filesystem superblock. */
+int
+xfs_scrub_superblock(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*bp;
+	struct xfs_dsb			*sb;
+	xfs_agnumber_t			agno;
+	uint32_t			v2_ok;
+	__be32				features_mask;
+	int				error;
+	__be16				vernum_mask;
+
+	agno = sc->sm->sm_agno;
+	if (agno == 0)
+		return 0;
+
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+		  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+		  XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+	if (!xfs_scrub_op_ok(sc, agno, XFS_SB_BLOCK(mp), &error))
+		return error;
+
+	sb = XFS_BUF_TO_SBP(bp);
+
+	/*
+	 * Verify the geometries match.  Fields that are permanently
+	 * set by mkfs are checked; fields that can be updated later
+	 * (and are not propagated to backup superblocks) are preen
+	 * checked.
+	 */
+	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	/* Check sb_versionnum bits that are set at mkfs time. */
+	vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS |
+				  XFS_SB_VERSION_NUMBITS |
+				  XFS_SB_VERSION_ALIGNBIT |
+				  XFS_SB_VERSION_DALIGNBIT |
+				  XFS_SB_VERSION_SHAREDBIT |
+				  XFS_SB_VERSION_LOGV2BIT |
+				  XFS_SB_VERSION_SECTORBIT |
+				  XFS_SB_VERSION_EXTFLGBIT |
+				  XFS_SB_VERSION_DIRV2BIT);
+	if ((sb->sb_versionnum & vernum_mask) !=
+	    (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	/* Check sb_versionnum bits that can be set after mkfs time. */
+	vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT |
+				  XFS_SB_VERSION_NLINKBIT |
+				  XFS_SB_VERSION_QUOTABIT);
+	if ((sb->sb_versionnum & vernum_mask) !=
+	    (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname)))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_blocklog != mp->m_sb.sb_blocklog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_sectlog != mp->m_sb.sb_sectlog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_inodelog != mp->m_sb.sb_inodelog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_inopblog != mp->m_sb.sb_inopblog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_agblklog != mp->m_sb.sb_agblklog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_rextslog != mp->m_sb.sb_rextslog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct)
+		xfs_scrub_block_set_preen(sc, bp);
+
+	/*
+	 * Skip the summary counters since we track them in memory anyway.
+	 * sb_icount, sb_ifree, sb_fdblocks, sb_frexents
+	 */
+
+	if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	/*
+	 * Skip the quota flags since repair will force quotacheck.
+	 * sb_qflags
+	 */
+
+	if (sb->sb_flags != mp->m_sb.sb_flags)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width))
+		xfs_scrub_block_set_preen(sc, bp);
+
+	if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog)
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	/* Do we see any invalid bits in sb_features2? */
+	if (!xfs_sb_version_hasmorebits(&mp->m_sb)) {
+		if (sb->sb_features2 != 0)
+			xfs_scrub_block_set_corrupt(sc, bp);
+	} else {
+		v2_ok = XFS_SB_VERSION2_OKBITS;
+		if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5)
+			v2_ok |= XFS_SB_VERSION2_CRCBIT;
+
+		if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
+			xfs_scrub_block_set_corrupt(sc, bp);
+
+		if (sb->sb_features2 != sb->sb_bad_features2)
+			xfs_scrub_block_set_preen(sc, bp);
+	}
+
+	/* Check sb_features2 flags that are set at mkfs time. */
+	features_mask = cpu_to_be32(XFS_SB_VERSION2_LAZYSBCOUNTBIT |
+				    XFS_SB_VERSION2_PROJID32BIT |
+				    XFS_SB_VERSION2_CRCBIT |
+				    XFS_SB_VERSION2_FTYPE);
+	if ((sb->sb_features2 & features_mask) !=
+	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	/* Check sb_features2 flags that can be set after mkfs time. */
+	features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT);
+	if ((sb->sb_features2 & features_mask) !=
+	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+		/* all v5 fields must be zero */
+		if (memchr_inv(&sb->sb_features_compat, 0,
+				sizeof(struct xfs_dsb) -
+				offsetof(struct xfs_dsb, sb_features_compat)))
+			xfs_scrub_block_set_corrupt(sc, bp);
+	} else {
+		/* Check compat flags; all are set at mkfs time. */
+		features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN);
+		if ((sb->sb_features_compat & features_mask) !=
+		    (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask))
+			xfs_scrub_block_set_corrupt(sc, bp);
+
+		/* Check ro compat flags; all are set at mkfs time. */
+		features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN |
+					    XFS_SB_FEAT_RO_COMPAT_FINOBT |
+					    XFS_SB_FEAT_RO_COMPAT_RMAPBT |
+					    XFS_SB_FEAT_RO_COMPAT_REFLINK);
+		if ((sb->sb_features_ro_compat & features_mask) !=
+		    (cpu_to_be32(mp->m_sb.sb_features_ro_compat) & features_mask))
+			xfs_scrub_block_set_corrupt(sc, bp);
+
+		/* Check incompat flags; all are set at mkfs time. */
+		features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN |
+					    XFS_SB_FEAT_INCOMPAT_FTYPE |
+					    XFS_SB_FEAT_INCOMPAT_SPINODES |
+					    XFS_SB_FEAT_INCOMPAT_META_UUID);
+		if ((sb->sb_features_incompat & features_mask) !=
+		    (cpu_to_be32(mp->m_sb.sb_features_incompat) & features_mask))
+			xfs_scrub_block_set_corrupt(sc, bp);
+
+		/* Check log incompat flags; all are set at mkfs time. */
+		features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN);
+		if ((sb->sb_features_log_incompat & features_mask) !=
+		    (cpu_to_be32(mp->m_sb.sb_features_log_incompat) & features_mask))
+			xfs_scrub_block_set_corrupt(sc, bp);
+
+		/* Don't care about sb_crc */
+
+		if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
+			xfs_scrub_block_set_corrupt(sc, bp);
+
+		if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
+			xfs_scrub_block_set_preen(sc, bp);
+
+		/* Don't care about sb_lsn */
+	}
+
+	if (xfs_sb_version_hasmetauuid(&mp->m_sb)) {
+		/* The metadata UUID must be the same for all supers */
+		if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid))
+			xfs_scrub_block_set_corrupt(sc, bp);
+	}
+
+	/* Everything else must be zero. */
+	if (memchr_inv(sb + 1, 0,
+			BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
+		xfs_scrub_block_set_corrupt(sc, bp);
+
+	return error;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 979ad89..390f772 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -76,6 +76,8 @@  void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
 
 /* Setup functions */
 int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
+int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
+			      struct xfs_inode *ip);
 
 void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa);
 int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index ae577d7..7f6f997 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -153,6 +153,10 @@  static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_probe,
 	},
+	{ /* superblock */
+		.setup	= xfs_scrub_setup_ag_header,
+		.scrub	= xfs_scrub_superblock,
+	},
 };
 
 /* This isn't a stable feature, warn once per day. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 1385295..13e3f9b 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -67,5 +67,6 @@  struct xfs_scrub_context {
 
 /* Metadata scrubbers */
 int xfs_scrub_tester(struct xfs_scrub_context *sc);
+int xfs_scrub_superblock(struct xfs_scrub_context *sc);
 
 #endif	/* __XFS_SCRUB_SCRUB_H__ */