diff mbox

[11/25] xfs: scrub the AGI

Message ID 150706331918.19351.1010060377239825093.stgit@magnolia (mailing list archive)
State Superseded
Headers show

Commit Message

Darrick J. Wong Oct. 3, 2017, 8:41 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Add a forgotten check to the AGI verifier, then wire up the scrub
infrastructure to check the AGI contents.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_fs.h  |    3 +-
 fs/xfs/scrub/agheader.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/common.c   |    6 ++-
 fs/xfs/scrub/scrub.c    |    4 ++
 fs/xfs/scrub/scrub.h    |    1 +
 5 files changed, 99 insertions(+), 3 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dave Chinner Oct. 4, 2017, 1:43 a.m. UTC | #1
On Tue, Oct 03, 2017 at 01:41:59PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Add a forgotten check to the AGI verifier, then wire up the scrub
> infrastructure to check the AGI contents.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_fs.h  |    3 +-
>  fs/xfs/scrub/agheader.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/scrub/common.c   |    6 ++-
>  fs/xfs/scrub/scrub.c    |    4 ++
>  fs/xfs/scrub/scrub.h    |    1 +
>  5 files changed, 99 insertions(+), 3 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index aeb2a66..1e326dd 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -487,9 +487,10 @@ struct xfs_scrub_metadata {
>  #define XFS_SCRUB_TYPE_SB	1	/* superblock */
>  #define XFS_SCRUB_TYPE_AGF	2	/* AG free header */
>  #define XFS_SCRUB_TYPE_AGFL	3	/* AG free list */
> +#define XFS_SCRUB_TYPE_AGI	4	/* AG inode header */
>  
>  /* Number of scrub subcommands. */
> -#define XFS_SCRUB_TYPE_NR	4
> +#define XFS_SCRUB_TYPE_NR	5
>  
>  /* i: Repair this metadata. */
>  #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
> diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
> index 7fe6630..3d269c2 100644
> --- a/fs/xfs/scrub/agheader.c
> +++ b/fs/xfs/scrub/agheader.c
> @@ -535,3 +535,91 @@ xfs_scrub_agfl(
>  out:
>  	return error;
>  }
> +
> +/* AGI */
> +
> +/* Scrub the AGI. */
> +int
> +xfs_scrub_agi(
> +	struct xfs_scrub_context	*sc)
> +{
> +	struct xfs_mount		*mp = sc->mp;
> +	struct xfs_agi			*agi;
> +	xfs_daddr_t			daddr;
> +	xfs_daddr_t			eofs;
> +	xfs_agnumber_t			agno;
> +	xfs_agblock_t			agbno;
> +	xfs_agblock_t			eoag;
> +	xfs_agino_t			agino;
> +	xfs_agino_t			first_agino;
> +	xfs_agino_t			last_agino;
> +	int				i;
> +	int				level;
> +	int				error = 0;
> +
> +	agno = sc->sm->sm_agno;
> +	error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
> +	if (!xfs_scrub_op_ok(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
> +		goto out;
> +
> +	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
> +	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
> +
> +	/* Check the AG length */
> +	eoag = be32_to_cpu(agi->agi_length);
> +	if (eoag != xfs_scrub_ag_blocks(mp, agno))
> +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);

Should we be cross checking that the AGI and AGF both have
the same length here?

> +
> +	/* Check btree roots and levels */
> +	agbno = be32_to_cpu(agi->agi_root);
> +	daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
> +	if (agbno <= XFS_AGI_BLOCK(mp) || agbno >= mp->m_sb.sb_agblocks ||
> +	    agbno >= eoag || daddr >= eofs)
> +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);

xfs_verify_agbno(), again :P

> +
> +	level = be32_to_cpu(agi->agi_level);
> +	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
> +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> +
> +	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> +		agbno = be32_to_cpu(agi->agi_free_root);
> +		daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
> +		if (agbno <= XFS_AGI_BLOCK(mp) ||
> +		    agbno >= mp->m_sb.sb_agblocks ||
> +		    agbno >= eoag ||
> +		    daddr >= eofs)

Broken records are us....

> +			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> +
> +		level = be32_to_cpu(agi->agi_free_level);
> +		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
> +			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> +	}
> +
> +	/* Check inode counters */
> +	first_agino = XFS_OFFBNO_TO_AGINO(mp, XFS_AGI_BLOCK(mp) + 1, 0);

Don't think this is right. AGFL, not AGI....

> +	last_agino = XFS_OFFBNO_TO_AGINO(mp, eoag + 1, 0) - 1;

Not sure this is right, either, because inode chunks won't be
allocated over the end of the AG. hence if the eoag is not chunk
aligned, there will be up to (chunk size - 1) blocks inodes won't be
allocated in...

> +	agino = be32_to_cpu(agi->agi_count);
> +	if (agino > last_agino - first_agino + 1 ||
> +	    agino < be32_to_cpu(agi->agi_freecount))
> +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);

Please don't use agino as a count of inodes - this confused me
very much because I was wondering how these checks were in any way
realted to valid AG inode numbers....

> +
> +	/* Check inode pointers */
> +	agino = be32_to_cpu(agi->agi_newino);
> +	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
> +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> +	agino = be32_to_cpu(agi->agi_dirino);
> +	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
> +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);

Perhaps we also need a xfs_verify_agino() helper here.

> +	/* Check unlinked inode buckets */
> +	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
> +		agino = be32_to_cpu(agi->agi_unlinked[i]);
> +		if (agino == NULLAGINO)
> +			continue;
> +		if (agino < first_agino || agino > last_agino)
> +			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);

This is effectively the same check as above:

	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))

so all these checks could use the same helper to make it easier
to read.

Cheers,

Dave.
Darrick J. Wong Oct. 4, 2017, 4:25 a.m. UTC | #2
On Wed, Oct 04, 2017 at 12:43:47PM +1100, Dave Chinner wrote:
> On Tue, Oct 03, 2017 at 01:41:59PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Add a forgotten check to the AGI verifier, then wire up the scrub
> > infrastructure to check the AGI contents.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/libxfs/xfs_fs.h  |    3 +-
> >  fs/xfs/scrub/agheader.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/scrub/common.c   |    6 ++-
> >  fs/xfs/scrub/scrub.c    |    4 ++
> >  fs/xfs/scrub/scrub.h    |    1 +
> >  5 files changed, 99 insertions(+), 3 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> > index aeb2a66..1e326dd 100644
> > --- a/fs/xfs/libxfs/xfs_fs.h
> > +++ b/fs/xfs/libxfs/xfs_fs.h
> > @@ -487,9 +487,10 @@ struct xfs_scrub_metadata {
> >  #define XFS_SCRUB_TYPE_SB	1	/* superblock */
> >  #define XFS_SCRUB_TYPE_AGF	2	/* AG free header */
> >  #define XFS_SCRUB_TYPE_AGFL	3	/* AG free list */
> > +#define XFS_SCRUB_TYPE_AGI	4	/* AG inode header */
> >  
> >  /* Number of scrub subcommands. */
> > -#define XFS_SCRUB_TYPE_NR	4
> > +#define XFS_SCRUB_TYPE_NR	5
> >  
> >  /* i: Repair this metadata. */
> >  #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
> > diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
> > index 7fe6630..3d269c2 100644
> > --- a/fs/xfs/scrub/agheader.c
> > +++ b/fs/xfs/scrub/agheader.c
> > @@ -535,3 +535,91 @@ xfs_scrub_agfl(
> >  out:
> >  	return error;
> >  }
> > +
> > +/* AGI */
> > +
> > +/* Scrub the AGI. */
> > +int
> > +xfs_scrub_agi(
> > +	struct xfs_scrub_context	*sc)
> > +{
> > +	struct xfs_mount		*mp = sc->mp;
> > +	struct xfs_agi			*agi;
> > +	xfs_daddr_t			daddr;
> > +	xfs_daddr_t			eofs;
> > +	xfs_agnumber_t			agno;
> > +	xfs_agblock_t			agbno;
> > +	xfs_agblock_t			eoag;
> > +	xfs_agino_t			agino;
> > +	xfs_agino_t			first_agino;
> > +	xfs_agino_t			last_agino;
> > +	int				i;
> > +	int				level;
> > +	int				error = 0;
> > +
> > +	agno = sc->sm->sm_agno;
> > +	error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
> > +	if (!xfs_scrub_op_ok(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
> > +		goto out;
> > +
> > +	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
> > +	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
> > +
> > +	/* Check the AG length */
> > +	eoag = be32_to_cpu(agi->agi_length);
> > +	if (eoag != xfs_scrub_ag_blocks(mp, agno))
> > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> 
> Should we be cross checking that the AGI and AGF both have
> the same length here?

Isn't that what this does?  Albeit indirectly?

xfs_scrub_ag_blocks returns sb_agcount for every AG except the last one.
For the last AG it returns (sb_dblocks - (all blocks in the other AGs))
which should be the same as agf->agf_length, right?

> > +
> > +	/* Check btree roots and levels */
> > +	agbno = be32_to_cpu(agi->agi_root);
> > +	daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
> > +	if (agbno <= XFS_AGI_BLOCK(mp) || agbno >= mp->m_sb.sb_agblocks ||
> > +	    agbno >= eoag || daddr >= eofs)
> > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> 
> xfs_verify_agbno(), again :P
> 
> > +
> > +	level = be32_to_cpu(agi->agi_level);
> > +	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
> > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> > +
> > +	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
> > +		agbno = be32_to_cpu(agi->agi_free_root);
> > +		daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
> > +		if (agbno <= XFS_AGI_BLOCK(mp) ||
> > +		    agbno >= mp->m_sb.sb_agblocks ||
> > +		    agbno >= eoag ||
> > +		    daddr >= eofs)
> 
> Broken records are us....
> 
> > +			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> > +
> > +		level = be32_to_cpu(agi->agi_free_level);
> > +		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
> > +			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> > +	}
> > +
> > +	/* Check inode counters */
> > +	first_agino = XFS_OFFBNO_TO_AGINO(mp, XFS_AGI_BLOCK(mp) + 1, 0);
> 
> Don't think this is right. AGFL, not AGI....

Yes.

> > +	last_agino = XFS_OFFBNO_TO_AGINO(mp, eoag + 1, 0) - 1;
> 
> Not sure this is right, either, because inode chunks won't be
> allocated over the end of the AG. hence if the eoag is not chunk
> aligned, there will be up to (chunk size - 1) blocks inodes won't be
> allocated in...

Yes.

> > +	agino = be32_to_cpu(agi->agi_count);
> > +	if (agino > last_agino - first_agino + 1 ||
> > +	    agino < be32_to_cpu(agi->agi_freecount))
> > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> 
> Please don't use agino as a count of inodes - this confused me
> very much because I was wondering how these checks were in any way
> realted to valid AG inode numbers....

<nod> Sorry about that, will fix.

> > +
> > +	/* Check inode pointers */
> > +	agino = be32_to_cpu(agi->agi_newino);
> > +	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
> > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> > +	agino = be32_to_cpu(agi->agi_dirino);
> > +	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
> > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> 
> Perhaps we also need a xfs_verify_agino() helper here.
> 
> > +	/* Check unlinked inode buckets */
> > +	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
> > +		agino = be32_to_cpu(agi->agi_unlinked[i]);
> > +		if (agino == NULLAGINO)
> > +			continue;
> > +		if (agino < first_agino || agino > last_agino)
> > +			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> 
> This is effectively the same check as above:
> 
> 	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
> 
> so all these checks could use the same helper to make it easier
> to read.

<nod> Will do.  Thank you for the review so far!

--D

> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner Oct. 4, 2017, 6:43 a.m. UTC | #3
On Tue, Oct 03, 2017 at 09:25:01PM -0700, Darrick J. Wong wrote:
> On Wed, Oct 04, 2017 at 12:43:47PM +1100, Dave Chinner wrote:
> > On Tue, Oct 03, 2017 at 01:41:59PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Add a forgotten check to the AGI verifier, then wire up the scrub
> > > infrastructure to check the AGI contents.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  fs/xfs/libxfs/xfs_fs.h  |    3 +-
> > >  fs/xfs/scrub/agheader.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++
> > >  fs/xfs/scrub/common.c   |    6 ++-
> > >  fs/xfs/scrub/scrub.c    |    4 ++
> > >  fs/xfs/scrub/scrub.h    |    1 +
> > >  5 files changed, 99 insertions(+), 3 deletions(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> > > index aeb2a66..1e326dd 100644
> > > --- a/fs/xfs/libxfs/xfs_fs.h
> > > +++ b/fs/xfs/libxfs/xfs_fs.h
> > > @@ -487,9 +487,10 @@ struct xfs_scrub_metadata {
> > >  #define XFS_SCRUB_TYPE_SB	1	/* superblock */
> > >  #define XFS_SCRUB_TYPE_AGF	2	/* AG free header */
> > >  #define XFS_SCRUB_TYPE_AGFL	3	/* AG free list */
> > > +#define XFS_SCRUB_TYPE_AGI	4	/* AG inode header */
> > >  
> > >  /* Number of scrub subcommands. */
> > > -#define XFS_SCRUB_TYPE_NR	4
> > > +#define XFS_SCRUB_TYPE_NR	5
> > >  
> > >  /* i: Repair this metadata. */
> > >  #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
> > > diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
> > > index 7fe6630..3d269c2 100644
> > > --- a/fs/xfs/scrub/agheader.c
> > > +++ b/fs/xfs/scrub/agheader.c
> > > @@ -535,3 +535,91 @@ xfs_scrub_agfl(
> > >  out:
> > >  	return error;
> > >  }
> > > +
> > > +/* AGI */
> > > +
> > > +/* Scrub the AGI. */
> > > +int
> > > +xfs_scrub_agi(
> > > +	struct xfs_scrub_context	*sc)
> > > +{
> > > +	struct xfs_mount		*mp = sc->mp;
> > > +	struct xfs_agi			*agi;
> > > +	xfs_daddr_t			daddr;
> > > +	xfs_daddr_t			eofs;
> > > +	xfs_agnumber_t			agno;
> > > +	xfs_agblock_t			agbno;
> > > +	xfs_agblock_t			eoag;
> > > +	xfs_agino_t			agino;
> > > +	xfs_agino_t			first_agino;
> > > +	xfs_agino_t			last_agino;
> > > +	int				i;
> > > +	int				level;
> > > +	int				error = 0;
> > > +
> > > +	agno = sc->sm->sm_agno;
> > > +	error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
> > > +	if (!xfs_scrub_op_ok(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
> > > +		goto out;
> > > +
> > > +	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
> > > +	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
> > > +
> > > +	/* Check the AG length */
> > > +	eoag = be32_to_cpu(agi->agi_length);
> > > +	if (eoag != xfs_scrub_ag_blocks(mp, agno))
> > > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> > 
> > Should we be cross checking that the AGI and AGF both have
> > the same length here?
> 
> Isn't that what this does?  Albeit indirectly?

I was kinda thinking of explicit checks, but you are right, it's
indirectly verified....

> xfs_scrub_ag_blocks returns sb_agcount for every AG except the last one.
> For the last AG it returns (sb_dblocks - (all blocks in the other AGs))
> which should be the same as agf->agf_length, right?

... which assumes we've validated sb_agblocks and sb_dblocks in some
way, which we haven't really done in the superblock scrubber.

It seems to me that we're using the superblock 0 values as the
golden master because it's a mounted filesystem, and then comparing
everything else against it. Maybe we should at least check a couple
of secondary superblocks to see that they match the primary
superblock - that way we'll have some confidence that at least
things like agcount, agblocks, dblocks, etc are valid before we go
any further...

BUt maybe all we need is comment in the overall scrub description -
that we're pretty much assuming that sb 0 is intact because we write
what is in memory back to it and so we can simply validate
everything else against the primary superblock contents...

Cheers,

Dave.
Darrick J. Wong Oct. 4, 2017, 6:02 p.m. UTC | #4
On Wed, Oct 04, 2017 at 05:43:33PM +1100, Dave Chinner wrote:
> On Tue, Oct 03, 2017 at 09:25:01PM -0700, Darrick J. Wong wrote:
> > On Wed, Oct 04, 2017 at 12:43:47PM +1100, Dave Chinner wrote:
> > > On Tue, Oct 03, 2017 at 01:41:59PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Add a forgotten check to the AGI verifier, then wire up the scrub
> > > > infrastructure to check the AGI contents.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  fs/xfs/libxfs/xfs_fs.h  |    3 +-
> > > >  fs/xfs/scrub/agheader.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++
> > > >  fs/xfs/scrub/common.c   |    6 ++-
> > > >  fs/xfs/scrub/scrub.c    |    4 ++
> > > >  fs/xfs/scrub/scrub.h    |    1 +
> > > >  5 files changed, 99 insertions(+), 3 deletions(-)
> > > > 
> > > > 
> > > > diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> > > > index aeb2a66..1e326dd 100644
> > > > --- a/fs/xfs/libxfs/xfs_fs.h
> > > > +++ b/fs/xfs/libxfs/xfs_fs.h
> > > > @@ -487,9 +487,10 @@ struct xfs_scrub_metadata {
> > > >  #define XFS_SCRUB_TYPE_SB	1	/* superblock */
> > > >  #define XFS_SCRUB_TYPE_AGF	2	/* AG free header */
> > > >  #define XFS_SCRUB_TYPE_AGFL	3	/* AG free list */
> > > > +#define XFS_SCRUB_TYPE_AGI	4	/* AG inode header */
> > > >  
> > > >  /* Number of scrub subcommands. */
> > > > -#define XFS_SCRUB_TYPE_NR	4
> > > > +#define XFS_SCRUB_TYPE_NR	5
> > > >  
> > > >  /* i: Repair this metadata. */
> > > >  #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
> > > > diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
> > > > index 7fe6630..3d269c2 100644
> > > > --- a/fs/xfs/scrub/agheader.c
> > > > +++ b/fs/xfs/scrub/agheader.c
> > > > @@ -535,3 +535,91 @@ xfs_scrub_agfl(
> > > >  out:
> > > >  	return error;
> > > >  }
> > > > +
> > > > +/* AGI */
> > > > +
> > > > +/* Scrub the AGI. */
> > > > +int
> > > > +xfs_scrub_agi(
> > > > +	struct xfs_scrub_context	*sc)
> > > > +{
> > > > +	struct xfs_mount		*mp = sc->mp;
> > > > +	struct xfs_agi			*agi;
> > > > +	xfs_daddr_t			daddr;
> > > > +	xfs_daddr_t			eofs;
> > > > +	xfs_agnumber_t			agno;
> > > > +	xfs_agblock_t			agbno;
> > > > +	xfs_agblock_t			eoag;
> > > > +	xfs_agino_t			agino;
> > > > +	xfs_agino_t			first_agino;
> > > > +	xfs_agino_t			last_agino;
> > > > +	int				i;
> > > > +	int				level;
> > > > +	int				error = 0;
> > > > +
> > > > +	agno = sc->sm->sm_agno;
> > > > +	error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
> > > > +	if (!xfs_scrub_op_ok(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
> > > > +		goto out;
> > > > +
> > > > +	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
> > > > +	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
> > > > +
> > > > +	/* Check the AG length */
> > > > +	eoag = be32_to_cpu(agi->agi_length);
> > > > +	if (eoag != xfs_scrub_ag_blocks(mp, agno))
> > > > +		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
> > > 
> > > Should we be cross checking that the AGI and AGF both have
> > > the same length here?
> > 
> > Isn't that what this does?  Albeit indirectly?
> 
> I was kinda thinking of explicit checks, but you are right, it's
> indirectly verified....
> 
> > xfs_scrub_ag_blocks returns sb_agcount for every AG except the last one.
> > For the last AG it returns (sb_dblocks - (all blocks in the other AGs))
> > which should be the same as agf->agf_length, right?
> 
> ... which assumes we've validated sb_agblocks and sb_dblocks in some
> way, which we haven't really done in the superblock scrubber.

Yes.

> It seems to me that we're using the superblock 0 values as the
> golden master because it's a mounted filesystem, and then comparing
> everything else against it. Maybe we should at least check a couple
> of secondary superblocks to see that they match the primary
> superblock - that way we'll have some confidence that at least
> things like agcount, agblocks, dblocks, etc are valid before we go
> any further...

xfs_scrub_superblock does check the secondary superblock geometry
against whatever's in mp->m_sb, which came from sb 0.

> BUt maybe all we need is comment in the overall scrub description -
> that we're pretty much assuming that sb 0 is intact because we write
> what is in memory back to it and so we can simply validate
> everything else against the primary superblock contents...

Correct.  Since scrub is run against a mounted live filesystem we assume
that the mount code fully validated sb 0 and therefore we can rely on it
not being wrong.

If OTOH sb 0 *is* wrong then the admin is better off running xfs_repair
because there's too much whirring machinery to go changing fundamental
geometry.

Ok more comments are coming.

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner Oct. 4, 2017, 10:16 p.m. UTC | #5
On Wed, Oct 04, 2017 at 11:02:04AM -0700, Darrick J. Wong wrote:
> On Wed, Oct 04, 2017 at 05:43:33PM +1100, Dave Chinner wrote:
> > It seems to me that we're using the superblock 0 values as the
> > golden master because it's a mounted filesystem, and then comparing
> > everything else against it. Maybe we should at least check a couple
> > of secondary superblocks to see that they match the primary
> > superblock - that way we'll have some confidence that at least
> > things like agcount, agblocks, dblocks, etc are valid before we go
> > any further...
> 
> xfs_scrub_superblock does check the secondary superblock geometry
> against whatever's in mp->m_sb, which came from sb 0.

/me smacks forehead

The patch is even named "scrub the backup superblocks".

Perhaps it didn't sink in because they are normally called
"secondary superblocks". My bad....

> > BUt maybe all we need is comment in the overall scrub description -
> > that we're pretty much assuming that sb 0 is intact because we write
> > what is in memory back to it and so we can simply validate
> > everything else against the primary superblock contents...
> 
> Correct.  Since scrub is run against a mounted live filesystem we assume
> that the mount code fully validated sb 0 and therefore we can rely on it
> not being wrong.
> 
> If OTOH sb 0 *is* wrong then the admin is better off running xfs_repair
> because there's too much whirring machinery to go changing fundamental
> geometry.

Yup, that makes sense.

Cheers,

Dave.
Darrick J. Wong Oct. 4, 2017, 11:12 p.m. UTC | #6
On Thu, Oct 05, 2017 at 09:16:34AM +1100, Dave Chinner wrote:
> On Wed, Oct 04, 2017 at 11:02:04AM -0700, Darrick J. Wong wrote:
> > On Wed, Oct 04, 2017 at 05:43:33PM +1100, Dave Chinner wrote:
> > > It seems to me that we're using the superblock 0 values as the
> > > golden master because it's a mounted filesystem, and then comparing
> > > everything else against it. Maybe we should at least check a couple
> > > of secondary superblocks to see that they match the primary
> > > superblock - that way we'll have some confidence that at least
> > > things like agcount, agblocks, dblocks, etc are valid before we go
> > > any further...
> > 
> > xfs_scrub_superblock does check the secondary superblock geometry
> > against whatever's in mp->m_sb, which came from sb 0.
> 
> /me smacks forehead
> 
> The patch is even named "scrub the backup superblocks".
> 
> Perhaps it didn't sink in because they are normally called
> "secondary superblocks". My bad....

Sometimes I think I still have ext4 on the brain. :(

--D

> > > BUt maybe all we need is comment in the overall scrub description -
> > > that we're pretty much assuming that sb 0 is intact because we write
> > > what is in memory back to it and so we can simply validate
> > > everything else against the primary superblock contents...
> > 
> > Correct.  Since scrub is run against a mounted live filesystem we assume
> > that the mount code fully validated sb 0 and therefore we can rely on it
> > not being wrong.
> > 
> > If OTOH sb 0 *is* wrong then the admin is better off running xfs_repair
> > because there's too much whirring machinery to go changing fundamental
> > geometry.
> 
> Yup, that makes sense.
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index aeb2a66..1e326dd 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -487,9 +487,10 @@  struct xfs_scrub_metadata {
 #define XFS_SCRUB_TYPE_SB	1	/* superblock */
 #define XFS_SCRUB_TYPE_AGF	2	/* AG free header */
 #define XFS_SCRUB_TYPE_AGFL	3	/* AG free list */
+#define XFS_SCRUB_TYPE_AGI	4	/* AG inode header */
 
 /* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR	4
+#define XFS_SCRUB_TYPE_NR	5
 
 /* i: Repair this metadata. */
 #define XFS_SCRUB_IFLAG_REPAIR		(1 << 0)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 7fe6630..3d269c2 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -535,3 +535,91 @@  xfs_scrub_agfl(
 out:
 	return error;
 }
+
+/* AGI */
+
+/* Scrub the AGI. */
+int
+xfs_scrub_agi(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_agi			*agi;
+	xfs_daddr_t			daddr;
+	xfs_daddr_t			eofs;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	xfs_agblock_t			eoag;
+	xfs_agino_t			agino;
+	xfs_agino_t			first_agino;
+	xfs_agino_t			last_agino;
+	int				i;
+	int				level;
+	int				error = 0;
+
+	agno = sc->sm->sm_agno;
+	error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
+	if (!xfs_scrub_op_ok(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
+		goto out;
+
+	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+
+	/* Check the AG length */
+	eoag = be32_to_cpu(agi->agi_length);
+	if (eoag != xfs_scrub_ag_blocks(mp, agno))
+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+
+	/* Check btree roots and levels */
+	agbno = be32_to_cpu(agi->agi_root);
+	daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+	if (agbno <= XFS_AGI_BLOCK(mp) || agbno >= mp->m_sb.sb_agblocks ||
+	    agbno >= eoag || daddr >= eofs)
+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+
+	level = be32_to_cpu(agi->agi_level);
+	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+
+	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+		agbno = be32_to_cpu(agi->agi_free_root);
+		daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+		if (agbno <= XFS_AGI_BLOCK(mp) ||
+		    agbno >= mp->m_sb.sb_agblocks ||
+		    agbno >= eoag ||
+		    daddr >= eofs)
+			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+
+		level = be32_to_cpu(agi->agi_free_level);
+		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
+			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+	}
+
+	/* Check inode counters */
+	first_agino = XFS_OFFBNO_TO_AGINO(mp, XFS_AGI_BLOCK(mp) + 1, 0);
+	last_agino = XFS_OFFBNO_TO_AGINO(mp, eoag + 1, 0) - 1;
+	agino = be32_to_cpu(agi->agi_count);
+	if (agino > last_agino - first_agino + 1 ||
+	    agino < be32_to_cpu(agi->agi_freecount))
+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+
+	/* Check inode pointers */
+	agino = be32_to_cpu(agi->agi_newino);
+	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+	agino = be32_to_cpu(agi->agi_dirino);
+	if (agino != NULLAGINO && (agino < first_agino || agino > last_agino))
+		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+
+	/* Check unlinked inode buckets */
+	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
+		agino = be32_to_cpu(agi->agi_unlinked[i]);
+		if (agino == NULLAGINO)
+			continue;
+		if (agino < first_agino || agino > last_agino)
+			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+	}
+
+out:
+	return error;
+}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index ee8e7be..4f42401 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -507,12 +507,14 @@  xfs_scrub_load_ag_headers(
 	struct xfs_mount		*mp = sc->mp;
 	int				error;
 
-	ASSERT(type == XFS_SCRUB_TYPE_AGF || type == XFS_SCRUB_TYPE_AGFL);
+	ASSERT(type == XFS_SCRUB_TYPE_AGF || type == XFS_SCRUB_TYPE_AGFL ||
+	       type == XFS_SCRUB_TYPE_AGI);
 	memset(&sc->sa, 0, sizeof(sc->sa));
 	sc->sa.agno = agno;
 
 	error = xfs_scrub_load_ag_header(sc, XFS_AGI_DADDR(mp),
-			&sc->sa.agi_bp, &xfs_agi_buf_ops, false);
+			&sc->sa.agi_bp, &xfs_agi_buf_ops,
+			type == XFS_SCRUB_TYPE_AGI);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 6f3c4f0..a62f53b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -165,6 +165,10 @@  static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
 		.setup	= xfs_scrub_setup_ag_header,
 		.scrub	= xfs_scrub_agfl,
 	},
+	{ /* agi */
+		.setup	= xfs_scrub_setup_ag_header,
+		.scrub	= xfs_scrub_agi,
+	},
 };
 
 /* This isn't a stable feature, warn once per day. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 50f8641..09952c2 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -70,5 +70,6 @@  int xfs_scrub_tester(struct xfs_scrub_context *sc);
 int xfs_scrub_superblock(struct xfs_scrub_context *sc);
 int xfs_scrub_agf(struct xfs_scrub_context *sc);
 int xfs_scrub_agfl(struct xfs_scrub_context *sc);
+int xfs_scrub_agi(struct xfs_scrub_context *sc);
 
 #endif	/* __XFS_SCRUB_SCRUB_H__ */