diff mbox series

[3/3] xfs: use deferred frees to reap old btree blocks

Message ID 157063973378.2913192.158267929318422892.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs: fix online repair block reaping | expand

Commit Message

Darrick J. Wong Oct. 9, 2019, 4:48 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Use deferred frees (EFIs) to reap the blocks of a btree that we just
replaced.  This helps us to shrink the window in which those old blocks
could be lost due to a system crash, though we try to flush the EFIs
every few hundred blocks so that we don't also overflow the transaction
reservations during and after we commit the new btree.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/scrub/repair.c |   29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

Comments

Brian Foster Oct. 17, 2019, 12:55 p.m. UTC | #1
On Wed, Oct 09, 2019 at 09:48:53AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Use deferred frees (EFIs) to reap the blocks of a btree that we just
> replaced.  This helps us to shrink the window in which those old blocks
> could be lost due to a system crash, though we try to flush the EFIs
> every few hundred blocks so that we don't also overflow the transaction
> reservations during and after we commit the new btree.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/scrub/repair.c |   29 ++++++++++++++++++++++++-----
>  1 file changed, 24 insertions(+), 5 deletions(-)
> 
> 
> diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> index e21faef6db5a..8349694f985d 100644
> --- a/fs/xfs/scrub/repair.c
> +++ b/fs/xfs/scrub/repair.c
...
> @@ -565,14 +568,24 @@ xrep_reap_block(
>  		xrep_reap_invalidate_block(sc, fsbno);
>  		error = xrep_put_freelist(sc, agbno);
>  	} else {
> +		/*
> +		 * Use deferred frees to get rid of the old btree blocks to try
> +		 * to minimize the window in which we could crash and lose the
> +		 * old blocks.  However, we still need to roll the transaction
> +		 * every 100 or so EFIs so that we don't exceed the log
> +		 * reservation.
> +		 */
>  		xrep_reap_invalidate_block(sc, fsbno);
> -		error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
> +		__xfs_bmap_add_free(sc->tp, fsbno, 1, oinfo, true);

xfs_free_extent() sets skip_discard to false and this changes it to
true. Intentional?

Otherwise the rest looks straightforward.

Brian

> +		(*deferred)++;
> +		need_roll = *deferred > 100;
>  	}
>  	if (agf_bp != sc->sa.agf_bp)
>  		xfs_trans_brelse(sc->tp, agf_bp);
> -	if (error)
> +	if (error || !need_roll)
>  		return error;
>  
> +	*deferred = 0;
>  	if (sc->ip)
>  		return xfs_trans_roll_inode(&sc->tp, sc->ip);
>  	return xrep_roll_ag_trans(sc);
> @@ -594,6 +607,7 @@ xrep_reap_extents(
>  	struct xfs_bitmap_range		*bmr;
>  	struct xfs_bitmap_range		*n;
>  	xfs_fsblock_t			fsbno;
> +	unsigned int			deferred = 0;
>  	int				error = 0;
>  
>  	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
> @@ -605,12 +619,17 @@ xrep_reap_extents(
>  				XFS_FSB_TO_AGNO(sc->mp, fsbno),
>  				XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
>  
> -		error = xrep_reap_block(sc, fsbno, oinfo, type);
> +		error = xrep_reap_block(sc, fsbno, oinfo, type, &deferred);
>  		if (error)
>  			break;
>  	}
>  
> -	return error;
> +	if (error || deferred == 0)
> +		return error;
> +
> +	if (sc->ip)
> +		return xfs_trans_roll_inode(&sc->tp, sc->ip);
> +	return xrep_roll_ag_trans(sc);
>  }
>  
>  /*
>
Darrick J. Wong Oct. 17, 2019, 3:06 p.m. UTC | #2
On Thu, Oct 17, 2019 at 08:55:37AM -0400, Brian Foster wrote:
> On Wed, Oct 09, 2019 at 09:48:53AM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Use deferred frees (EFIs) to reap the blocks of a btree that we just
> > replaced.  This helps us to shrink the window in which those old blocks
> > could be lost due to a system crash, though we try to flush the EFIs
> > every few hundred blocks so that we don't also overflow the transaction
> > reservations during and after we commit the new btree.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/scrub/repair.c |   29 ++++++++++++++++++++++++-----
> >  1 file changed, 24 insertions(+), 5 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
> > index e21faef6db5a..8349694f985d 100644
> > --- a/fs/xfs/scrub/repair.c
> > +++ b/fs/xfs/scrub/repair.c
> ...
> > @@ -565,14 +568,24 @@ xrep_reap_block(
> >  		xrep_reap_invalidate_block(sc, fsbno);
> >  		error = xrep_put_freelist(sc, agbno);
> >  	} else {
> > +		/*
> > +		 * Use deferred frees to get rid of the old btree blocks to try
> > +		 * to minimize the window in which we could crash and lose the
> > +		 * old blocks.  However, we still need to roll the transaction
> > +		 * every 100 or so EFIs so that we don't exceed the log
> > +		 * reservation.
> > +		 */
> >  		xrep_reap_invalidate_block(sc, fsbno);
> > -		error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
> > +		__xfs_bmap_add_free(sc->tp, fsbno, 1, oinfo, true);
> 
> xfs_free_extent() sets skip_discard to false and this changes it to
> true. Intentional?

Nope.  Will fix this (and the comment in the previous patch).

--D

> Otherwise the rest looks straightforward.
> 
> Brian
> 
> > +		(*deferred)++;
> > +		need_roll = *deferred > 100;
> >  	}
> >  	if (agf_bp != sc->sa.agf_bp)
> >  		xfs_trans_brelse(sc->tp, agf_bp);
> > -	if (error)
> > +	if (error || !need_roll)
> >  		return error;
> >  
> > +	*deferred = 0;
> >  	if (sc->ip)
> >  		return xfs_trans_roll_inode(&sc->tp, sc->ip);
> >  	return xrep_roll_ag_trans(sc);
> > @@ -594,6 +607,7 @@ xrep_reap_extents(
> >  	struct xfs_bitmap_range		*bmr;
> >  	struct xfs_bitmap_range		*n;
> >  	xfs_fsblock_t			fsbno;
> > +	unsigned int			deferred = 0;
> >  	int				error = 0;
> >  
> >  	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
> > @@ -605,12 +619,17 @@ xrep_reap_extents(
> >  				XFS_FSB_TO_AGNO(sc->mp, fsbno),
> >  				XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
> >  
> > -		error = xrep_reap_block(sc, fsbno, oinfo, type);
> > +		error = xrep_reap_block(sc, fsbno, oinfo, type, &deferred);
> >  		if (error)
> >  			break;
> >  	}
> >  
> > -	return error;
> > +	if (error || deferred == 0)
> > +		return error;
> > +
> > +	if (sc->ip)
> > +		return xfs_trans_roll_inode(&sc->tp, sc->ip);
> > +	return xrep_roll_ag_trans(sc);
> >  }
> >  
> >  /*
> >
diff mbox series

Patch

diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index e21faef6db5a..8349694f985d 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -24,6 +24,7 @@ 
 #include "xfs_extent_busy.h"
 #include "xfs_ag_resv.h"
 #include "xfs_quota.h"
+#include "xfs_bmap.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -512,13 +513,15 @@  xrep_reap_block(
 	struct xfs_scrub		*sc,
 	xfs_fsblock_t			fsbno,
 	const struct xfs_owner_info	*oinfo,
-	enum xfs_ag_resv_type		resv)
+	enum xfs_ag_resv_type		resv,
+	unsigned int			*deferred)
 {
 	struct xfs_btree_cur		*cur;
 	struct xfs_buf			*agf_bp = NULL;
 	xfs_agnumber_t			agno;
 	xfs_agblock_t			agbno;
 	bool				has_other_rmap;
+	bool				need_roll = true;
 	int				error;
 
 	agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
@@ -565,14 +568,24 @@  xrep_reap_block(
 		xrep_reap_invalidate_block(sc, fsbno);
 		error = xrep_put_freelist(sc, agbno);
 	} else {
+		/*
+		 * Use deferred frees to get rid of the old btree blocks to try
+		 * to minimize the window in which we could crash and lose the
+		 * old blocks.  However, we still need to roll the transaction
+		 * every 100 or so EFIs so that we don't exceed the log
+		 * reservation.
+		 */
 		xrep_reap_invalidate_block(sc, fsbno);
-		error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
+		__xfs_bmap_add_free(sc->tp, fsbno, 1, oinfo, true);
+		(*deferred)++;
+		need_roll = *deferred > 100;
 	}
 	if (agf_bp != sc->sa.agf_bp)
 		xfs_trans_brelse(sc->tp, agf_bp);
-	if (error)
+	if (error || !need_roll)
 		return error;
 
+	*deferred = 0;
 	if (sc->ip)
 		return xfs_trans_roll_inode(&sc->tp, sc->ip);
 	return xrep_roll_ag_trans(sc);
@@ -594,6 +607,7 @@  xrep_reap_extents(
 	struct xfs_bitmap_range		*bmr;
 	struct xfs_bitmap_range		*n;
 	xfs_fsblock_t			fsbno;
+	unsigned int			deferred = 0;
 	int				error = 0;
 
 	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
@@ -605,12 +619,17 @@  xrep_reap_extents(
 				XFS_FSB_TO_AGNO(sc->mp, fsbno),
 				XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
 
-		error = xrep_reap_block(sc, fsbno, oinfo, type);
+		error = xrep_reap_block(sc, fsbno, oinfo, type, &deferred);
 		if (error)
 			break;
 	}
 
-	return error;
+	if (error || deferred == 0)
+		return error;
+
+	if (sc->ip)
+		return xfs_trans_roll_inode(&sc->tp, sc->ip);
+	return xrep_roll_ag_trans(sc);
 }
 
 /*