diff mbox series

[02/14] xfs: repair the AGF

Message ID 153292968232.24509.16936110804102265045.stgit@magnolia (mailing list archive)
State Superseded, archived
Headers show
Series xfs-4.19: online repair support | expand

Commit Message

Darrick J. Wong July 30, 2018, 5:48 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Regenerate the AGF from the rmap data.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/agheader_repair.c |  370 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/repair.c          |   27 ++-
 fs/xfs/scrub/repair.h          |    4 
 fs/xfs/scrub/scrub.c           |    2 
 4 files changed, 393 insertions(+), 10 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Brian Foster July 30, 2018, 4:22 p.m. UTC | #1
On Sun, Jul 29, 2018 at 10:48:02PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Regenerate the AGF from the rmap data.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/scrub/agheader_repair.c |  370 ++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/scrub/repair.c          |   27 ++-
>  fs/xfs/scrub/repair.h          |    4 
>  fs/xfs/scrub/scrub.c           |    2 
>  4 files changed, 393 insertions(+), 10 deletions(-)
> 
> 
> diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> index 1e96621ece3a..4842fc598c9b 100644
> --- a/fs/xfs/scrub/agheader_repair.c
> +++ b/fs/xfs/scrub/agheader_repair.c
...
> @@ -54,3 +61,366 @@ xrep_superblock(
...
> +/* Repair the AGF. v5 filesystems only. */
> +int
> +xrep_agf(
> +	struct xfs_scrub		*sc)
> +{
...
> +	/* Start rewriting the header and implant the btrees we found. */
> +	xrep_agf_init_header(sc, agf_bp, &old_agf);
> +	xrep_agf_set_roots(sc, agf, fab);
> +	error = xrep_agf_calc_from_btrees(sc, agf_bp);
> +	if (error)
> +		goto out_revert;
> +
> +	/* Commit the changes and reinitialize incore state. */
> +	return xrep_agf_commit_new(sc, agf_bp);
> +
> +out_revert:
> +	/* Mark the incore AGF state stale and revert the AGF. */
> +	sc->sa.pag->pagf_init = 0;

Hmm, looking at this again I'm not sure it's safe to reset ->pagf_init
like this. The contexts where we hold agf might be Ok because I think
that might prevent some other thread from actually coming in and
resetting it, but look at xfs_alloc_read_agf() does in this case if the
agf becomes available with !pagf_init. Specifically, are we at risk of
corrupting a populated ->pagb_tree or causing other problems by
reinitializing the spinlock? Perhaps we need another patch to separate
out some of those fields that should only ever be initialized once.

With something like that, it might subsequently make sense to factor the
reinit from disk bits into a helper to be shared between
xrep_agf_commit_new() and xfs_allo_read_agf(). I also wonder if it's
sufficient to just update the agf on disk and leave pagf_init == 0.

Otherwise the rest of this patch seems Ok to me.

Brian

> +	memcpy(agf, &old_agf, sizeof(old_agf));
> +	return error;
> +}
> diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> index 85b048b341a0..17cf48564390 100644
> --- a/fs/xfs/scrub/repair.c
> +++ b/fs/xfs/scrub/repair.c
> @@ -128,9 +128,12 @@ xrep_roll_ag_trans(
>  	int			error;
>  
>  	/* Keep the AG header buffers locked so we can keep going. */
> -	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> -	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> -	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> +	if (sc->sa.agi_bp)
> +		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> +	if (sc->sa.agf_bp)
> +		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> +	if (sc->sa.agfl_bp)
> +		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
>  
>  	/* Roll the transaction. */
>  	error = xfs_trans_roll(&sc->tp);
> @@ -138,9 +141,12 @@ xrep_roll_ag_trans(
>  		goto out_release;
>  
>  	/* Join AG headers to the new transaction. */
> -	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> -	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> -	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> +	if (sc->sa.agi_bp)
> +		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> +	if (sc->sa.agf_bp)
> +		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> +	if (sc->sa.agfl_bp)
> +		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
>  
>  	return 0;
>  
> @@ -150,9 +156,12 @@ xrep_roll_ag_trans(
>  	 * buffers will be released during teardown on our way out
>  	 * of the kernel.
>  	 */
> -	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> -	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> -	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> +	if (sc->sa.agi_bp)
> +		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> +	if (sc->sa.agf_bp)
> +		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> +	if (sc->sa.agfl_bp)
> +		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
>  
>  	return error;
>  }
> diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
> index 5a4e92221916..1d283360b5ab 100644
> --- a/fs/xfs/scrub/repair.h
> +++ b/fs/xfs/scrub/repair.h
> @@ -58,6 +58,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
>  
>  int xrep_probe(struct xfs_scrub *sc);
>  int xrep_superblock(struct xfs_scrub *sc);
> +int xrep_agf(struct xfs_scrub *sc);
> +int xrep_agfl(struct xfs_scrub *sc);
>  
>  #else
>  
> @@ -81,6 +83,8 @@ xrep_calc_ag_resblks(
>  
>  #define xrep_probe			xrep_notsupported
>  #define xrep_superblock			xrep_notsupported
> +#define xrep_agf			xrep_notsupported
> +#define xrep_agfl			xrep_notsupported
>  
>  #endif /* CONFIG_XFS_ONLINE_REPAIR */
>  
> diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
> index 6efb926f3cf8..1e8a17c8e2b9 100644
> --- a/fs/xfs/scrub/scrub.c
> +++ b/fs/xfs/scrub/scrub.c
> @@ -214,7 +214,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
>  		.type	= ST_PERAG,
>  		.setup	= xchk_setup_fs,
>  		.scrub	= xchk_agf,
> -		.repair	= xrep_notsupported,
> +		.repair	= xrep_agf,
>  	},
>  	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
>  		.type	= ST_PERAG,
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong July 30, 2018, 5:31 p.m. UTC | #2
On Mon, Jul 30, 2018 at 12:22:24PM -0400, Brian Foster wrote:
> On Sun, Jul 29, 2018 at 10:48:02PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Regenerate the AGF from the rmap data.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/scrub/agheader_repair.c |  370 ++++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/scrub/repair.c          |   27 ++-
> >  fs/xfs/scrub/repair.h          |    4 
> >  fs/xfs/scrub/scrub.c           |    2 
> >  4 files changed, 393 insertions(+), 10 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> > index 1e96621ece3a..4842fc598c9b 100644
> > --- a/fs/xfs/scrub/agheader_repair.c
> > +++ b/fs/xfs/scrub/agheader_repair.c
> ...
> > @@ -54,3 +61,366 @@ xrep_superblock(
> ...
> > +/* Repair the AGF. v5 filesystems only. */
> > +int
> > +xrep_agf(
> > +	struct xfs_scrub		*sc)
> > +{
> ...
> > +	/* Start rewriting the header and implant the btrees we found. */
> > +	xrep_agf_init_header(sc, agf_bp, &old_agf);
> > +	xrep_agf_set_roots(sc, agf, fab);
> > +	error = xrep_agf_calc_from_btrees(sc, agf_bp);
> > +	if (error)
> > +		goto out_revert;
> > +
> > +	/* Commit the changes and reinitialize incore state. */
> > +	return xrep_agf_commit_new(sc, agf_bp);
> > +
> > +out_revert:
> > +	/* Mark the incore AGF state stale and revert the AGF. */
> > +	sc->sa.pag->pagf_init = 0;
> 
> Hmm, looking at this again I'm not sure it's safe to reset ->pagf_init
> like this. The contexts where we hold agf might be Ok because I think
> that might prevent some other thread from actually coming in and
> resetting it, but look at xfs_alloc_read_agf() does in this case if the
> agf becomes available with !pagf_init. Specifically, are we at risk of
> corrupting a populated ->pagb_tree or causing other problems by
> reinitializing the spinlock? Perhaps we need another patch to separate
> out some of those fields that should only ever be initialized once.

Yikes, the pagb_tree & spinlock should not get reinitialized.  I don't
see where we ever tear them down except for unmount, so I *think* we can
move it to xfs_initialize_perag.  It's a little mystifying why we don't
initialze those things there like we do for the incore inode radix tree.

Also it would finally fix the discrepancy with xfsprogs libxfs where
they comment out the RB_ROOT initialization.

> With something like that, it might subsequently make sense to factor the
> reinit from disk bits into a helper to be shared between
> xrep_agf_commit_new() and xfs_allo_read_agf(). I also wonder if it's
> sufficient to just update the agf on disk and leave pagf_init == 0.

Hmm, wasn't there some verifier that used pag*_init (can't remember
which one) to decide if we were in log recovery?

--D

> Otherwise the rest of this patch seems Ok to me.
> 
> Brian
> 
> > +	memcpy(agf, &old_agf, sizeof(old_agf));
> > +	return error;
> > +}
> > diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> > index 85b048b341a0..17cf48564390 100644
> > --- a/fs/xfs/scrub/repair.c
> > +++ b/fs/xfs/scrub/repair.c
> > @@ -128,9 +128,12 @@ xrep_roll_ag_trans(
> >  	int			error;
> >  
> >  	/* Keep the AG header buffers locked so we can keep going. */
> > -	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > -	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > -	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > +	if (sc->sa.agi_bp)
> > +		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > +	if (sc->sa.agf_bp)
> > +		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > +	if (sc->sa.agfl_bp)
> > +		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> >  
> >  	/* Roll the transaction. */
> >  	error = xfs_trans_roll(&sc->tp);
> > @@ -138,9 +141,12 @@ xrep_roll_ag_trans(
> >  		goto out_release;
> >  
> >  	/* Join AG headers to the new transaction. */
> > -	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > -	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > -	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > +	if (sc->sa.agi_bp)
> > +		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > +	if (sc->sa.agf_bp)
> > +		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > +	if (sc->sa.agfl_bp)
> > +		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> >  
> >  	return 0;
> >  
> > @@ -150,9 +156,12 @@ xrep_roll_ag_trans(
> >  	 * buffers will be released during teardown on our way out
> >  	 * of the kernel.
> >  	 */
> > -	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > -	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > -	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > +	if (sc->sa.agi_bp)
> > +		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > +	if (sc->sa.agf_bp)
> > +		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > +	if (sc->sa.agfl_bp)
> > +		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> >  
> >  	return error;
> >  }
> > diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
> > index 5a4e92221916..1d283360b5ab 100644
> > --- a/fs/xfs/scrub/repair.h
> > +++ b/fs/xfs/scrub/repair.h
> > @@ -58,6 +58,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
> >  
> >  int xrep_probe(struct xfs_scrub *sc);
> >  int xrep_superblock(struct xfs_scrub *sc);
> > +int xrep_agf(struct xfs_scrub *sc);
> > +int xrep_agfl(struct xfs_scrub *sc);
> >  
> >  #else
> >  
> > @@ -81,6 +83,8 @@ xrep_calc_ag_resblks(
> >  
> >  #define xrep_probe			xrep_notsupported
> >  #define xrep_superblock			xrep_notsupported
> > +#define xrep_agf			xrep_notsupported
> > +#define xrep_agfl			xrep_notsupported
> >  
> >  #endif /* CONFIG_XFS_ONLINE_REPAIR */
> >  
> > diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
> > index 6efb926f3cf8..1e8a17c8e2b9 100644
> > --- a/fs/xfs/scrub/scrub.c
> > +++ b/fs/xfs/scrub/scrub.c
> > @@ -214,7 +214,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
> >  		.type	= ST_PERAG,
> >  		.setup	= xchk_setup_fs,
> >  		.scrub	= xchk_agf,
> > -		.repair	= xrep_notsupported,
> > +		.repair	= xrep_agf,
> >  	},
> >  	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
> >  		.type	= ST_PERAG,
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster July 30, 2018, 6:19 p.m. UTC | #3
On Mon, Jul 30, 2018 at 10:31:11AM -0700, Darrick J. Wong wrote:
> On Mon, Jul 30, 2018 at 12:22:24PM -0400, Brian Foster wrote:
> > On Sun, Jul 29, 2018 at 10:48:02PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Regenerate the AGF from the rmap data.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  fs/xfs/scrub/agheader_repair.c |  370 ++++++++++++++++++++++++++++++++++++++++
> > >  fs/xfs/scrub/repair.c          |   27 ++-
> > >  fs/xfs/scrub/repair.h          |    4 
> > >  fs/xfs/scrub/scrub.c           |    2 
> > >  4 files changed, 393 insertions(+), 10 deletions(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> > > index 1e96621ece3a..4842fc598c9b 100644
> > > --- a/fs/xfs/scrub/agheader_repair.c
> > > +++ b/fs/xfs/scrub/agheader_repair.c
> > ...
> > > @@ -54,3 +61,366 @@ xrep_superblock(
> > ...
> > > +/* Repair the AGF. v5 filesystems only. */
> > > +int
> > > +xrep_agf(
> > > +	struct xfs_scrub		*sc)
> > > +{
> > ...
> > > +	/* Start rewriting the header and implant the btrees we found. */
> > > +	xrep_agf_init_header(sc, agf_bp, &old_agf);
> > > +	xrep_agf_set_roots(sc, agf, fab);
> > > +	error = xrep_agf_calc_from_btrees(sc, agf_bp);
> > > +	if (error)
> > > +		goto out_revert;
> > > +
> > > +	/* Commit the changes and reinitialize incore state. */
> > > +	return xrep_agf_commit_new(sc, agf_bp);
> > > +
> > > +out_revert:
> > > +	/* Mark the incore AGF state stale and revert the AGF. */
> > > +	sc->sa.pag->pagf_init = 0;
> > 
> > Hmm, looking at this again I'm not sure it's safe to reset ->pagf_init
> > like this. The contexts where we hold agf might be Ok because I think
> > that might prevent some other thread from actually coming in and
> > resetting it, but look at xfs_alloc_read_agf() does in this case if the
> > agf becomes available with !pagf_init. Specifically, are we at risk of
> > corrupting a populated ->pagb_tree or causing other problems by
> > reinitializing the spinlock? Perhaps we need another patch to separate
> > out some of those fields that should only ever be initialized once.
> 
> Yikes, the pagb_tree & spinlock should not get reinitialized.  I don't
> see where we ever tear them down except for unmount, so I *think* we can
> move it to xfs_initialize_perag.  It's a little mystifying why we don't
> initialze those things there like we do for the incore inode radix tree.
> 
> Also it would finally fix the discrepancy with xfsprogs libxfs where
> they comment out the RB_ROOT initialization.
> 
> > With something like that, it might subsequently make sense to factor the
> > reinit from disk bits into a helper to be shared between
> > xrep_agf_commit_new() and xfs_allo_read_agf(). I also wonder if it's
> > sufficient to just update the agf on disk and leave pagf_init == 0.
> 
> Hmm, wasn't there some verifier that used pag*_init (can't remember
> which one) to decide if we were in log recovery?
> 

It looks like xfs_attr3_leaf_verify() might do something like that. But
don't we have to handle that either way if the error path leaves
pagf_init == 0 on return? Actually we might have to address it
regardless if we want to use pagf_init if that path isn't holding the
agf.

Brian

> --D
> 
> > Otherwise the rest of this patch seems Ok to me.
> > 
> > Brian
> > 
> > > +	memcpy(agf, &old_agf, sizeof(old_agf));
> > > +	return error;
> > > +}
> > > diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> > > index 85b048b341a0..17cf48564390 100644
> > > --- a/fs/xfs/scrub/repair.c
> > > +++ b/fs/xfs/scrub/repair.c
> > > @@ -128,9 +128,12 @@ xrep_roll_ag_trans(
> > >  	int			error;
> > >  
> > >  	/* Keep the AG header buffers locked so we can keep going. */
> > > -	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > > -	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > > -	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > > +	if (sc->sa.agi_bp)
> > > +		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > > +	if (sc->sa.agf_bp)
> > > +		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > > +	if (sc->sa.agfl_bp)
> > > +		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > >  
> > >  	/* Roll the transaction. */
> > >  	error = xfs_trans_roll(&sc->tp);
> > > @@ -138,9 +141,12 @@ xrep_roll_ag_trans(
> > >  		goto out_release;
> > >  
> > >  	/* Join AG headers to the new transaction. */
> > > -	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > > -	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > > -	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > > +	if (sc->sa.agi_bp)
> > > +		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > > +	if (sc->sa.agf_bp)
> > > +		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > > +	if (sc->sa.agfl_bp)
> > > +		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > >  
> > >  	return 0;
> > >  
> > > @@ -150,9 +156,12 @@ xrep_roll_ag_trans(
> > >  	 * buffers will be released during teardown on our way out
> > >  	 * of the kernel.
> > >  	 */
> > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > > +	if (sc->sa.agi_bp)
> > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > > +	if (sc->sa.agf_bp)
> > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > > +	if (sc->sa.agfl_bp)
> > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > >  
> > >  	return error;
> > >  }
> > > diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
> > > index 5a4e92221916..1d283360b5ab 100644
> > > --- a/fs/xfs/scrub/repair.h
> > > +++ b/fs/xfs/scrub/repair.h
> > > @@ -58,6 +58,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
> > >  
> > >  int xrep_probe(struct xfs_scrub *sc);
> > >  int xrep_superblock(struct xfs_scrub *sc);
> > > +int xrep_agf(struct xfs_scrub *sc);
> > > +int xrep_agfl(struct xfs_scrub *sc);
> > >  
> > >  #else
> > >  
> > > @@ -81,6 +83,8 @@ xrep_calc_ag_resblks(
> > >  
> > >  #define xrep_probe			xrep_notsupported
> > >  #define xrep_superblock			xrep_notsupported
> > > +#define xrep_agf			xrep_notsupported
> > > +#define xrep_agfl			xrep_notsupported
> > >  
> > >  #endif /* CONFIG_XFS_ONLINE_REPAIR */
> > >  
> > > diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
> > > index 6efb926f3cf8..1e8a17c8e2b9 100644
> > > --- a/fs/xfs/scrub/scrub.c
> > > +++ b/fs/xfs/scrub/scrub.c
> > > @@ -214,7 +214,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
> > >  		.type	= ST_PERAG,
> > >  		.setup	= xchk_setup_fs,
> > >  		.scrub	= xchk_agf,
> > > -		.repair	= xrep_notsupported,
> > > +		.repair	= xrep_agf,
> > >  	},
> > >  	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
> > >  		.type	= ST_PERAG,
> > > 
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong July 30, 2018, 6:22 p.m. UTC | #4
On Mon, Jul 30, 2018 at 02:19:15PM -0400, Brian Foster wrote:
> On Mon, Jul 30, 2018 at 10:31:11AM -0700, Darrick J. Wong wrote:
> > On Mon, Jul 30, 2018 at 12:22:24PM -0400, Brian Foster wrote:
> > > On Sun, Jul 29, 2018 at 10:48:02PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > 
> > > > Regenerate the AGF from the rmap data.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > >  fs/xfs/scrub/agheader_repair.c |  370 ++++++++++++++++++++++++++++++++++++++++
> > > >  fs/xfs/scrub/repair.c          |   27 ++-
> > > >  fs/xfs/scrub/repair.h          |    4 
> > > >  fs/xfs/scrub/scrub.c           |    2 
> > > >  4 files changed, 393 insertions(+), 10 deletions(-)
> > > > 
> > > > 
> > > > diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> > > > index 1e96621ece3a..4842fc598c9b 100644
> > > > --- a/fs/xfs/scrub/agheader_repair.c
> > > > +++ b/fs/xfs/scrub/agheader_repair.c
> > > ...
> > > > @@ -54,3 +61,366 @@ xrep_superblock(
> > > ...
> > > > +/* Repair the AGF. v5 filesystems only. */
> > > > +int
> > > > +xrep_agf(
> > > > +	struct xfs_scrub		*sc)
> > > > +{
> > > ...
> > > > +	/* Start rewriting the header and implant the btrees we found. */
> > > > +	xrep_agf_init_header(sc, agf_bp, &old_agf);
> > > > +	xrep_agf_set_roots(sc, agf, fab);
> > > > +	error = xrep_agf_calc_from_btrees(sc, agf_bp);
> > > > +	if (error)
> > > > +		goto out_revert;
> > > > +
> > > > +	/* Commit the changes and reinitialize incore state. */
> > > > +	return xrep_agf_commit_new(sc, agf_bp);
> > > > +
> > > > +out_revert:
> > > > +	/* Mark the incore AGF state stale and revert the AGF. */
> > > > +	sc->sa.pag->pagf_init = 0;
> > > 
> > > Hmm, looking at this again I'm not sure it's safe to reset ->pagf_init
> > > like this. The contexts where we hold agf might be Ok because I think
> > > that might prevent some other thread from actually coming in and
> > > resetting it, but look at xfs_alloc_read_agf() does in this case if the
> > > agf becomes available with !pagf_init. Specifically, are we at risk of
> > > corrupting a populated ->pagb_tree or causing other problems by
> > > reinitializing the spinlock? Perhaps we need another patch to separate
> > > out some of those fields that should only ever be initialized once.
> > 
> > Yikes, the pagb_tree & spinlock should not get reinitialized.  I don't
> > see where we ever tear them down except for unmount, so I *think* we can
> > move it to xfs_initialize_perag.  It's a little mystifying why we don't
> > initialze those things there like we do for the incore inode radix tree.
> > 
> > Also it would finally fix the discrepancy with xfsprogs libxfs where
> > they comment out the RB_ROOT initialization.
> > 
> > > With something like that, it might subsequently make sense to factor the
> > > reinit from disk bits into a helper to be shared between
> > > xrep_agf_commit_new() and xfs_allo_read_agf(). I also wonder if it's
> > > sufficient to just update the agf on disk and leave pagf_init == 0.
> > 
> > Hmm, wasn't there some verifier that used pag*_init (can't remember
> > which one) to decide if we were in log recovery?
> > 
> 
> It looks like xfs_attr3_leaf_verify() might do something like that. But
> don't we have to handle that either way if the error path leaves
> pagf_init == 0 on return? Actually we might have to address it
> regardless if we want to use pagf_init if that path isn't holding the
> agf.

<nod> I think we should simply add a xlog helper that decides if the log
is in recovery and call it from xfs_attr3_leaf_verify rather than having
an open-coded check on some other data structure.

--D

> Brian
> 
> > --D
> > 
> > > Otherwise the rest of this patch seems Ok to me.
> > > 
> > > Brian
> > > 
> > > > +	memcpy(agf, &old_agf, sizeof(old_agf));
> > > > +	return error;
> > > > +}
> > > > diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> > > > index 85b048b341a0..17cf48564390 100644
> > > > --- a/fs/xfs/scrub/repair.c
> > > > +++ b/fs/xfs/scrub/repair.c
> > > > @@ -128,9 +128,12 @@ xrep_roll_ag_trans(
> > > >  	int			error;
> > > >  
> > > >  	/* Keep the AG header buffers locked so we can keep going. */
> > > > -	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > > > -	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > > > -	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > > > +	if (sc->sa.agi_bp)
> > > > +		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > > > +	if (sc->sa.agf_bp)
> > > > +		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > > > +	if (sc->sa.agfl_bp)
> > > > +		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > > >  
> > > >  	/* Roll the transaction. */
> > > >  	error = xfs_trans_roll(&sc->tp);
> > > > @@ -138,9 +141,12 @@ xrep_roll_ag_trans(
> > > >  		goto out_release;
> > > >  
> > > >  	/* Join AG headers to the new transaction. */
> > > > -	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > > > -	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > > > -	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > > > +	if (sc->sa.agi_bp)
> > > > +		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > > > +	if (sc->sa.agf_bp)
> > > > +		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > > > +	if (sc->sa.agfl_bp)
> > > > +		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > > >  
> > > >  	return 0;
> > > >  
> > > > @@ -150,9 +156,12 @@ xrep_roll_ag_trans(
> > > >  	 * buffers will be released during teardown on our way out
> > > >  	 * of the kernel.
> > > >  	 */
> > > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > > > +	if (sc->sa.agi_bp)
> > > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > > > +	if (sc->sa.agf_bp)
> > > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > > > +	if (sc->sa.agfl_bp)
> > > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > > >  
> > > >  	return error;
> > > >  }
> > > > diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
> > > > index 5a4e92221916..1d283360b5ab 100644
> > > > --- a/fs/xfs/scrub/repair.h
> > > > +++ b/fs/xfs/scrub/repair.h
> > > > @@ -58,6 +58,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
> > > >  
> > > >  int xrep_probe(struct xfs_scrub *sc);
> > > >  int xrep_superblock(struct xfs_scrub *sc);
> > > > +int xrep_agf(struct xfs_scrub *sc);
> > > > +int xrep_agfl(struct xfs_scrub *sc);
> > > >  
> > > >  #else
> > > >  
> > > > @@ -81,6 +83,8 @@ xrep_calc_ag_resblks(
> > > >  
> > > >  #define xrep_probe			xrep_notsupported
> > > >  #define xrep_superblock			xrep_notsupported
> > > > +#define xrep_agf			xrep_notsupported
> > > > +#define xrep_agfl			xrep_notsupported
> > > >  
> > > >  #endif /* CONFIG_XFS_ONLINE_REPAIR */
> > > >  
> > > > diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
> > > > index 6efb926f3cf8..1e8a17c8e2b9 100644
> > > > --- a/fs/xfs/scrub/scrub.c
> > > > +++ b/fs/xfs/scrub/scrub.c
> > > > @@ -214,7 +214,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
> > > >  		.type	= ST_PERAG,
> > > >  		.setup	= xchk_setup_fs,
> > > >  		.scrub	= xchk_agf,
> > > > -		.repair	= xrep_notsupported,
> > > > +		.repair	= xrep_agf,
> > > >  	},
> > > >  	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
> > > >  		.type	= ST_PERAG,
> > > > 
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster July 30, 2018, 6:33 p.m. UTC | #5
On Mon, Jul 30, 2018 at 11:22:31AM -0700, Darrick J. Wong wrote:
> On Mon, Jul 30, 2018 at 02:19:15PM -0400, Brian Foster wrote:
> > On Mon, Jul 30, 2018 at 10:31:11AM -0700, Darrick J. Wong wrote:
> > > On Mon, Jul 30, 2018 at 12:22:24PM -0400, Brian Foster wrote:
> > > > On Sun, Jul 29, 2018 at 10:48:02PM -0700, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > 
> > > > > Regenerate the AGF from the rmap data.
> > > > > 
> > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > ---
> > > > >  fs/xfs/scrub/agheader_repair.c |  370 ++++++++++++++++++++++++++++++++++++++++
> > > > >  fs/xfs/scrub/repair.c          |   27 ++-
> > > > >  fs/xfs/scrub/repair.h          |    4 
> > > > >  fs/xfs/scrub/scrub.c           |    2 
> > > > >  4 files changed, 393 insertions(+), 10 deletions(-)
> > > > > 
> > > > > 
> > > > > diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> > > > > index 1e96621ece3a..4842fc598c9b 100644
> > > > > --- a/fs/xfs/scrub/agheader_repair.c
> > > > > +++ b/fs/xfs/scrub/agheader_repair.c
> > > > ...
> > > > > @@ -54,3 +61,366 @@ xrep_superblock(
> > > > ...
> > > > > +/* Repair the AGF. v5 filesystems only. */
> > > > > +int
> > > > > +xrep_agf(
> > > > > +	struct xfs_scrub		*sc)
> > > > > +{
> > > > ...
> > > > > +	/* Start rewriting the header and implant the btrees we found. */
> > > > > +	xrep_agf_init_header(sc, agf_bp, &old_agf);
> > > > > +	xrep_agf_set_roots(sc, agf, fab);
> > > > > +	error = xrep_agf_calc_from_btrees(sc, agf_bp);
> > > > > +	if (error)
> > > > > +		goto out_revert;
> > > > > +
> > > > > +	/* Commit the changes and reinitialize incore state. */
> > > > > +	return xrep_agf_commit_new(sc, agf_bp);
> > > > > +
> > > > > +out_revert:
> > > > > +	/* Mark the incore AGF state stale and revert the AGF. */
> > > > > +	sc->sa.pag->pagf_init = 0;
> > > > 
> > > > Hmm, looking at this again I'm not sure it's safe to reset ->pagf_init
> > > > like this. The contexts where we hold agf might be Ok because I think
> > > > that might prevent some other thread from actually coming in and
> > > > resetting it, but look at xfs_alloc_read_agf() does in this case if the
> > > > agf becomes available with !pagf_init. Specifically, are we at risk of
> > > > corrupting a populated ->pagb_tree or causing other problems by
> > > > reinitializing the spinlock? Perhaps we need another patch to separate
> > > > out some of those fields that should only ever be initialized once.
> > > 
> > > Yikes, the pagb_tree & spinlock should not get reinitialized.  I don't
> > > see where we ever tear them down except for unmount, so I *think* we can
> > > move it to xfs_initialize_perag.  It's a little mystifying why we don't
> > > initialze those things there like we do for the incore inode radix tree.
> > > 
> > > Also it would finally fix the discrepancy with xfsprogs libxfs where
> > > they comment out the RB_ROOT initialization.
> > > 
> > > > With something like that, it might subsequently make sense to factor the
> > > > reinit from disk bits into a helper to be shared between
> > > > xrep_agf_commit_new() and xfs_allo_read_agf(). I also wonder if it's
> > > > sufficient to just update the agf on disk and leave pagf_init == 0.
> > > 
> > > Hmm, wasn't there some verifier that used pag*_init (can't remember
> > > which one) to decide if we were in log recovery?
> > > 
> > 
> > It looks like xfs_attr3_leaf_verify() might do something like that. But
> > don't we have to handle that either way if the error path leaves
> > pagf_init == 0 on return? Actually we might have to address it
> > regardless if we want to use pagf_init if that path isn't holding the
> > agf.
> 
> <nod> I think we should simply add a xlog helper that decides if the log
> is in recovery and call it from xfs_attr3_leaf_verify rather than having
> an open-coded check on some other data structure.
> 

Agreed.

Brian

> --D
> 
> > Brian
> > 
> > > --D
> > > 
> > > > Otherwise the rest of this patch seems Ok to me.
> > > > 
> > > > Brian
> > > > 
> > > > > +	memcpy(agf, &old_agf, sizeof(old_agf));
> > > > > +	return error;
> > > > > +}
> > > > > diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> > > > > index 85b048b341a0..17cf48564390 100644
> > > > > --- a/fs/xfs/scrub/repair.c
> > > > > +++ b/fs/xfs/scrub/repair.c
> > > > > @@ -128,9 +128,12 @@ xrep_roll_ag_trans(
> > > > >  	int			error;
> > > > >  
> > > > >  	/* Keep the AG header buffers locked so we can keep going. */
> > > > > -	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > > > > -	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > > > > -	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > > > > +	if (sc->sa.agi_bp)
> > > > > +		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
> > > > > +	if (sc->sa.agf_bp)
> > > > > +		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
> > > > > +	if (sc->sa.agfl_bp)
> > > > > +		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
> > > > >  
> > > > >  	/* Roll the transaction. */
> > > > >  	error = xfs_trans_roll(&sc->tp);
> > > > > @@ -138,9 +141,12 @@ xrep_roll_ag_trans(
> > > > >  		goto out_release;
> > > > >  
> > > > >  	/* Join AG headers to the new transaction. */
> > > > > -	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > > > > -	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > > > > -	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > > > > +	if (sc->sa.agi_bp)
> > > > > +		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
> > > > > +	if (sc->sa.agf_bp)
> > > > > +		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
> > > > > +	if (sc->sa.agfl_bp)
> > > > > +		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
> > > > >  
> > > > >  	return 0;
> > > > >  
> > > > > @@ -150,9 +156,12 @@ xrep_roll_ag_trans(
> > > > >  	 * buffers will be released during teardown on our way out
> > > > >  	 * of the kernel.
> > > > >  	 */
> > > > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > > > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > > > > -	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > > > > +	if (sc->sa.agi_bp)
> > > > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
> > > > > +	if (sc->sa.agf_bp)
> > > > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
> > > > > +	if (sc->sa.agfl_bp)
> > > > > +		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
> > > > >  
> > > > >  	return error;
> > > > >  }
> > > > > diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
> > > > > index 5a4e92221916..1d283360b5ab 100644
> > > > > --- a/fs/xfs/scrub/repair.h
> > > > > +++ b/fs/xfs/scrub/repair.h
> > > > > @@ -58,6 +58,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
> > > > >  
> > > > >  int xrep_probe(struct xfs_scrub *sc);
> > > > >  int xrep_superblock(struct xfs_scrub *sc);
> > > > > +int xrep_agf(struct xfs_scrub *sc);
> > > > > +int xrep_agfl(struct xfs_scrub *sc);
> > > > >  
> > > > >  #else
> > > > >  
> > > > > @@ -81,6 +83,8 @@ xrep_calc_ag_resblks(
> > > > >  
> > > > >  #define xrep_probe			xrep_notsupported
> > > > >  #define xrep_superblock			xrep_notsupported
> > > > > +#define xrep_agf			xrep_notsupported
> > > > > +#define xrep_agfl			xrep_notsupported
> > > > >  
> > > > >  #endif /* CONFIG_XFS_ONLINE_REPAIR */
> > > > >  
> > > > > diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
> > > > > index 6efb926f3cf8..1e8a17c8e2b9 100644
> > > > > --- a/fs/xfs/scrub/scrub.c
> > > > > +++ b/fs/xfs/scrub/scrub.c
> > > > > @@ -214,7 +214,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
> > > > >  		.type	= ST_PERAG,
> > > > >  		.setup	= xchk_setup_fs,
> > > > >  		.scrub	= xchk_agf,
> > > > > -		.repair	= xrep_notsupported,
> > > > > +		.repair	= xrep_agf,
> > > > >  	},
> > > > >  	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
> > > > >  		.type	= ST_PERAG,
> > > > > 
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox series

Patch

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 1e96621ece3a..4842fc598c9b 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -17,12 +17,19 @@ 
 #include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
 
 /* Superblock */
 
@@ -54,3 +61,366 @@  xrep_superblock(
 	xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
 	return error;
 }
+
+/* AGF */
+
+struct xrep_agf_allocbt {
+	struct xfs_scrub	*sc;
+	xfs_agblock_t		freeblks;
+	xfs_agblock_t		longest;
+};
+
+/* Record free space shape information. */
+STATIC int
+xrep_agf_walk_allocbt(
+	struct xfs_btree_cur		*cur,
+	struct xfs_alloc_rec_incore	*rec,
+	void				*priv)
+{
+	struct xrep_agf_allocbt		*raa = priv;
+	int				error = 0;
+
+	if (xchk_should_terminate(raa->sc, &error))
+		return error;
+
+	raa->freeblks += rec->ar_blockcount;
+	if (rec->ar_blockcount > raa->longest)
+		raa->longest = rec->ar_blockcount;
+	return error;
+}
+
+/* Does this AGFL block look sane? */
+STATIC int
+xrep_agf_check_agfl_block(
+	struct xfs_mount	*mp,
+	xfs_agblock_t		agbno,
+	void			*priv)
+{
+	struct xfs_scrub	*sc = priv;
+
+	if (!xfs_verify_agbno(mp, sc->sa.agno, agbno))
+		return -EFSCORRUPTED;
+	return 0;
+}
+
+/*
+ * Offset within the xrep_find_ag_btree array for each btree type.  Avoid the
+ * XFS_BTNUM_ names here to avoid creating a sparse array.
+ */
+enum {
+	XREP_AGF_BNOBT = 0,
+	XREP_AGF_CNTBT,
+	XREP_AGF_RMAPBT,
+	XREP_AGF_REFCOUNTBT,
+	XREP_AGF_END,
+	XREP_AGF_MAX
+};
+
+/*
+ * Given the btree roots described by *fab, find the roots, check them for
+ * sanity, and pass the root data back out via *fab.
+ *
+ * This is /also/ a chicken and egg problem because we have to use the rmapbt
+ * (rooted in the AGF) to find the btrees rooted in the AGF.  We also have no
+ * idea if the btrees make any sense.  If we hit obvious corruptions in those
+ * btrees we'll bail out.
+ */
+STATIC int
+xrep_agf_find_btrees(
+	struct xfs_scrub		*sc,
+	struct xfs_buf			*agf_bp,
+	struct xrep_find_ag_btree	*fab,
+	struct xfs_buf			*agfl_bp)
+{
+	struct xfs_agf			*old_agf = XFS_BUF_TO_AGF(agf_bp);
+	int				error;
+
+	/* Go find the root data. */
+	error = xrep_find_ag_btree_roots(sc, agf_bp, fab, agfl_bp);
+	if (error)
+		return error;
+
+	/* We must find the bnobt, cntbt, and rmapbt roots. */
+	if (fab[XREP_AGF_BNOBT].root == NULLAGBLOCK ||
+	    fab[XREP_AGF_BNOBT].height > XFS_BTREE_MAXLEVELS ||
+	    fab[XREP_AGF_CNTBT].root == NULLAGBLOCK ||
+	    fab[XREP_AGF_CNTBT].height > XFS_BTREE_MAXLEVELS ||
+	    fab[XREP_AGF_RMAPBT].root == NULLAGBLOCK ||
+	    fab[XREP_AGF_RMAPBT].height > XFS_BTREE_MAXLEVELS)
+		return -EFSCORRUPTED;
+
+	/*
+	 * We relied on the rmapbt to reconstruct the AGF.  If we get a
+	 * different root then something's seriously wrong.
+	 */
+	if (fab[XREP_AGF_RMAPBT].root !=
+	    be32_to_cpu(old_agf->agf_roots[XFS_BTNUM_RMAPi]))
+		return -EFSCORRUPTED;
+
+	/* We must find the refcountbt root if that feature is enabled. */
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb) &&
+	    (fab[XREP_AGF_REFCOUNTBT].root == NULLAGBLOCK ||
+	     fab[XREP_AGF_REFCOUNTBT].height > XFS_BTREE_MAXLEVELS))
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
+/*
+ * Reinitialize the AGF header, making an in-core copy of the old contents so
+ * that we know which in-core state needs to be reinitialized.
+ */
+STATIC void
+xrep_agf_init_header(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp,
+	struct xfs_agf		*old_agf)
+{
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	memcpy(old_agf, agf, sizeof(*old_agf));
+	memset(agf, 0, BBTOB(agf_bp->b_length));
+	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
+	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
+	agf->agf_seqno = cpu_to_be32(sc->sa.agno);
+	agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
+	agf->agf_flfirst = old_agf->agf_flfirst;
+	agf->agf_fllast = old_agf->agf_fllast;
+	agf->agf_flcount = old_agf->agf_flcount;
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
+
+	/* Mark the incore AGF data stale until we're done fixing things. */
+	ASSERT(sc->sa.pag->pagf_init);
+	sc->sa.pag->pagf_init = 0;
+}
+
+/* Set btree root information in an AGF. */
+STATIC void
+xrep_agf_set_roots(
+	struct xfs_scrub		*sc,
+	struct xfs_agf			*agf,
+	struct xrep_find_ag_btree	*fab)
+{
+	agf->agf_roots[XFS_BTNUM_BNOi] =
+			cpu_to_be32(fab[XREP_AGF_BNOBT].root);
+	agf->agf_levels[XFS_BTNUM_BNOi] =
+			cpu_to_be32(fab[XREP_AGF_BNOBT].height);
+
+	agf->agf_roots[XFS_BTNUM_CNTi] =
+			cpu_to_be32(fab[XREP_AGF_CNTBT].root);
+	agf->agf_levels[XFS_BTNUM_CNTi] =
+			cpu_to_be32(fab[XREP_AGF_CNTBT].height);
+
+	agf->agf_roots[XFS_BTNUM_RMAPi] =
+			cpu_to_be32(fab[XREP_AGF_RMAPBT].root);
+	agf->agf_levels[XFS_BTNUM_RMAPi] =
+			cpu_to_be32(fab[XREP_AGF_RMAPBT].height);
+
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
+		agf->agf_refcount_root =
+				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root);
+		agf->agf_refcount_level =
+				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].height);
+	}
+}
+
+/* Update all AGF fields which derive from btree contents. */
+STATIC int
+xrep_agf_calc_from_btrees(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp)
+{
+	struct xrep_agf_allocbt	raa = { .sc = sc };
+	struct xfs_btree_cur	*cur = NULL;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agblock_t		btreeblks;
+	xfs_agblock_t		blocks;
+	int			error;
+
+	/* Update the AGF counters from the bnobt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_BNO);
+	error = xfs_alloc_query_all(cur, xrep_agf_walk_allocbt, &raa);
+	if (error)
+		goto err;
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+	btreeblks = blocks - 1;
+	agf->agf_freeblks = cpu_to_be32(raa.freeblks);
+	agf->agf_longest = cpu_to_be32(raa.longest);
+
+	/* Update the AGF counters from the cntbt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_CNT);
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+	btreeblks += blocks - 1;
+
+	/* Update the AGF counters from the rmapbt. */
+	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+	agf->agf_rmap_blocks = cpu_to_be32(blocks);
+	btreeblks += blocks - 1;
+
+	agf->agf_btreeblks = cpu_to_be32(btreeblks);
+
+	/* Update the AGF counters from the refcountbt. */
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp,
+				sc->sa.agno);
+		error = xfs_btree_count_blocks(cur, &blocks);
+		if (error)
+			goto err;
+		xfs_btree_del_cursor(cur, error);
+		agf->agf_refcount_blocks = cpu_to_be32(blocks);
+	}
+
+	return 0;
+err:
+	xfs_btree_del_cursor(cur, error);
+	return error;
+}
+
+/* Commit the new AGF and reinitialize the incore state. */
+STATIC int
+xrep_agf_commit_new(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp)
+{
+	struct xfs_perag	*pag;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	/* Trigger fdblocks recalculation */
+	xfs_force_summary_recalc(sc->mp);
+
+	/* Write this to disk. */
+	xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF);
+	xfs_trans_log_buf(sc->tp, agf_bp, 0, BBTOB(agf_bp->b_length) - 1);
+
+	/* Now reinitialize the in-core counters we changed. */
+	pag = sc->sa.pag;
+	pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
+	pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+	pag->pagf_longest = be32_to_cpu(agf->agf_longest);
+	pag->pagf_levels[XFS_BTNUM_BNOi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
+	pag->pagf_levels[XFS_BTNUM_CNTi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+	pag->pagf_levels[XFS_BTNUM_RMAPi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+	pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
+	pag->pagf_init = 1;
+
+	return 0;
+}
+
+/* Repair the AGF. v5 filesystems only. */
+int
+xrep_agf(
+	struct xfs_scrub		*sc)
+{
+	struct xrep_find_ag_btree	fab[XREP_AGF_MAX] = {
+		[XREP_AGF_BNOBT] = {
+			.rmap_owner = XFS_RMAP_OWN_AG,
+			.buf_ops = &xfs_allocbt_buf_ops,
+			.magic = XFS_ABTB_CRC_MAGIC,
+		},
+		[XREP_AGF_CNTBT] = {
+			.rmap_owner = XFS_RMAP_OWN_AG,
+			.buf_ops = &xfs_allocbt_buf_ops,
+			.magic = XFS_ABTC_CRC_MAGIC,
+		},
+		[XREP_AGF_RMAPBT] = {
+			.rmap_owner = XFS_RMAP_OWN_AG,
+			.buf_ops = &xfs_rmapbt_buf_ops,
+			.magic = XFS_RMAP_CRC_MAGIC,
+		},
+		[XREP_AGF_REFCOUNTBT] = {
+			.rmap_owner = XFS_RMAP_OWN_REFC,
+			.buf_ops = &xfs_refcountbt_buf_ops,
+			.magic = XFS_REFC_CRC_MAGIC,
+		},
+		[XREP_AGF_END] = {
+			.buf_ops = NULL,
+		},
+	};
+	struct xfs_agf			old_agf;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*agf_bp;
+	struct xfs_buf			*agfl_bp;
+	struct xfs_agf			*agf;
+	int				error;
+
+	/* We require the rmapbt to rebuild anything. */
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	xchk_perag_get(sc->mp, &sc->sa);
+	/*
+	 * Make sure we have the AGF buffer, as scrub might have decided it
+	 * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED.
+	 */
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
+	if (error)
+		return error;
+	agf_bp->b_ops = &xfs_agf_buf_ops;
+	agf = XFS_BUF_TO_AGF(agf_bp);
+
+	/*
+	 * Load the AGFL so that we can screen out OWN_AG blocks that are on
+	 * the AGFL now; these blocks might have once been part of the
+	 * bno/cnt/rmap btrees but are not now.  This is a chicken and egg
+	 * problem: the AGF is corrupt, so we have to trust the AGFL contents
+	 * because we can't do any serious cross-referencing with any of the
+	 * btrees rooted in the AGF.  If the AGFL contents are obviously bad
+	 * then we'll bail out.
+	 */
+	error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp);
+	if (error)
+		return error;
+
+	/*
+	 * Spot-check the AGFL blocks; if they're obviously corrupt then
+	 * there's nothing we can do but bail out.
+	 */
+	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(agf_bp), agfl_bp,
+			xrep_agf_check_agfl_block, sc);
+	if (error)
+		return error;
+
+	/*
+	 * Find the AGF btree roots.  This is also a chicken-and-egg situation;
+	 * see the function for more details.
+	 */
+	error = xrep_agf_find_btrees(sc, agf_bp, fab, agfl_bp);
+	if (error)
+		return error;
+
+	/* Start rewriting the header and implant the btrees we found. */
+	xrep_agf_init_header(sc, agf_bp, &old_agf);
+	xrep_agf_set_roots(sc, agf, fab);
+	error = xrep_agf_calc_from_btrees(sc, agf_bp);
+	if (error)
+		goto out_revert;
+
+	/* Commit the changes and reinitialize incore state. */
+	return xrep_agf_commit_new(sc, agf_bp);
+
+out_revert:
+	/* Mark the incore AGF state stale and revert the AGF. */
+	sc->sa.pag->pagf_init = 0;
+	memcpy(agf, &old_agf, sizeof(old_agf));
+	return error;
+}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 85b048b341a0..17cf48564390 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -128,9 +128,12 @@  xrep_roll_ag_trans(
 	int			error;
 
 	/* Keep the AG header buffers locked so we can keep going. */
-	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(&sc->tp);
@@ -138,9 +141,12 @@  xrep_roll_ag_trans(
 		goto out_release;
 
 	/* Join AG headers to the new transaction. */
-	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
 
 	return 0;
 
@@ -150,9 +156,12 @@  xrep_roll_ag_trans(
 	 * buffers will be released during teardown on our way out
 	 * of the kernel.
 	 */
-	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
 
 	return error;
 }
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 5a4e92221916..1d283360b5ab 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -58,6 +58,8 @@  int xrep_ino_dqattach(struct xfs_scrub *sc);
 
 int xrep_probe(struct xfs_scrub *sc);
 int xrep_superblock(struct xfs_scrub *sc);
+int xrep_agf(struct xfs_scrub *sc);
+int xrep_agfl(struct xfs_scrub *sc);
 
 #else
 
@@ -81,6 +83,8 @@  xrep_calc_ag_resblks(
 
 #define xrep_probe			xrep_notsupported
 #define xrep_superblock			xrep_notsupported
+#define xrep_agf			xrep_notsupported
+#define xrep_agfl			xrep_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 6efb926f3cf8..1e8a17c8e2b9 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -214,7 +214,7 @@  static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agf,
-		.repair	= xrep_notsupported,
+		.repair	= xrep_agf,
 	},
 	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
 		.type	= ST_PERAG,