diff mbox series

[29/42] xfs: convert trim to use for_each_perag_range

Message ID 20230118224505.1964941-30-david@fromorbit.com (mailing list archive)
State Superseded
Headers show
Series xfs: per-ag centric allocation alogrithms | expand

Commit Message

Dave Chinner Jan. 18, 2023, 10:44 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

To convert it to using active perag references and hence make it
shrink safe.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_discard.c | 50 ++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

Comments

Darrick J. Wong Feb. 1, 2023, 11:15 p.m. UTC | #1
On Thu, Jan 19, 2023 at 09:44:52AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> To convert it to using active perag references and hence make it
> shrink safe.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_discard.c | 50 ++++++++++++++++++++------------------------
>  1 file changed, 23 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
> index bfc829c07f03..afc4c78b9eed 100644
> --- a/fs/xfs/xfs_discard.c
> +++ b/fs/xfs/xfs_discard.c
> @@ -21,23 +21,20 @@
>  
>  STATIC int
>  xfs_trim_extents(
> -	struct xfs_mount	*mp,
> -	xfs_agnumber_t		agno,
> +	struct xfs_perag	*pag,
>  	xfs_daddr_t		start,
>  	xfs_daddr_t		end,
>  	xfs_daddr_t		minlen,
>  	uint64_t		*blocks_trimmed)
>  {
> +	struct xfs_mount	*mp = pag->pag_mount;
>  	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
>  	struct xfs_btree_cur	*cur;
>  	struct xfs_buf		*agbp;
>  	struct xfs_agf		*agf;
> -	struct xfs_perag	*pag;
>  	int			error;
>  	int			i;
>  
> -	pag = xfs_perag_get(mp, agno);
> -
>  	/*
>  	 * Force out the log.  This means any transactions that might have freed

This is a tangent, but one thing I've wondered is if it's really
necessary to force the log for *every* AG that we want to trim?  Even if
we've just come from trimming the previous AG?

Looks good otherwise,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D


>  	 * space before we take the AGF buffer lock are now on disk, and the
> @@ -47,7 +44,7 @@ xfs_trim_extents(
>  
>  	error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
>  	if (error)
> -		goto out_put_perag;
> +		return error;
>  	agf = agbp->b_addr;
>  
>  	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
> @@ -71,10 +68,10 @@ xfs_trim_extents(
>  
>  		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
>  		if (error)
> -			goto out_del_cursor;
> +			break;
>  		if (XFS_IS_CORRUPT(mp, i != 1)) {
>  			error = -EFSCORRUPTED;
> -			goto out_del_cursor;
> +			break;
>  		}
>  		ASSERT(flen <= be32_to_cpu(agf->agf_longest));
>  
> @@ -83,15 +80,15 @@ xfs_trim_extents(
>  		 * the format the range/len variables are supplied in by
>  		 * userspace.
>  		 */
> -		dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
> +		dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
>  		dlen = XFS_FSB_TO_BB(mp, flen);
>  
>  		/*
>  		 * Too small?  Give up.
>  		 */
>  		if (dlen < minlen) {
> -			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
> -			goto out_del_cursor;
> +			trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
> +			break;
>  		}
>  
>  		/*
> @@ -100,7 +97,7 @@ xfs_trim_extents(
>  		 * down partially overlapping ranges for now.
>  		 */
>  		if (dbno + dlen < start || dbno > end) {
> -			trace_xfs_discard_exclude(mp, agno, fbno, flen);
> +			trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
>  			goto next_extent;
>  		}
>  
> @@ -109,32 +106,30 @@ xfs_trim_extents(
>  		 * discard and try again the next time.
>  		 */
>  		if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
> -			trace_xfs_discard_busy(mp, agno, fbno, flen);
> +			trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
>  			goto next_extent;
>  		}
>  
> -		trace_xfs_discard_extent(mp, agno, fbno, flen);
> +		trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
>  		error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
>  		if (error)
> -			goto out_del_cursor;
> +			break;
>  		*blocks_trimmed += flen;
>  
>  next_extent:
>  		error = xfs_btree_decrement(cur, 0, &i);
>  		if (error)
> -			goto out_del_cursor;
> +			break;
>  
>  		if (fatal_signal_pending(current)) {
>  			error = -ERESTARTSYS;
> -			goto out_del_cursor;
> +			break;
>  		}
>  	}
>  
>  out_del_cursor:
>  	xfs_btree_del_cursor(cur, error);
>  	xfs_buf_relse(agbp);
> -out_put_perag:
> -	xfs_perag_put(pag);
>  	return error;
>  }
>  
> @@ -152,11 +147,12 @@ xfs_ioc_trim(
>  	struct xfs_mount		*mp,
>  	struct fstrim_range __user	*urange)
>  {
> +	struct xfs_perag	*pag;
>  	unsigned int		granularity =
>  		bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
>  	struct fstrim_range	range;
>  	xfs_daddr_t		start, end, minlen;
> -	xfs_agnumber_t		start_agno, end_agno, agno;
> +	xfs_agnumber_t		agno;
>  	uint64_t		blocks_trimmed = 0;
>  	int			error, last_error = 0;
>  
> @@ -193,18 +189,18 @@ xfs_ioc_trim(
>  	end = start + BTOBBT(range.len) - 1;
>  
>  	if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
> -		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
> -
> -	start_agno = xfs_daddr_to_agno(mp, start);
> -	end_agno = xfs_daddr_to_agno(mp, end);
> +		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
>  
> -	for (agno = start_agno; agno <= end_agno; agno++) {
> -		error = xfs_trim_extents(mp, agno, start, end, minlen,
> +	agno = xfs_daddr_to_agno(mp, start);
> +	for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
> +		error = xfs_trim_extents(pag, start, end, minlen,
>  					  &blocks_trimmed);
>  		if (error) {
>  			last_error = error;
> -			if (error == -ERESTARTSYS)
> +			if (error == -ERESTARTSYS) {
> +				xfs_perag_rele(pag);
>  				break;
> +			}
>  		}
>  	}
>  
> -- 
> 2.39.0
>
Dave Chinner Feb. 6, 2023, 11:19 p.m. UTC | #2
On Wed, Feb 01, 2023 at 03:15:15PM -0800, Darrick J. Wong wrote:
> On Thu, Jan 19, 2023 at 09:44:52AM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > To convert it to using active perag references and hence make it
> > shrink safe.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/xfs/xfs_discard.c | 50 ++++++++++++++++++++------------------------
> >  1 file changed, 23 insertions(+), 27 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
> > index bfc829c07f03..afc4c78b9eed 100644
> > --- a/fs/xfs/xfs_discard.c
> > +++ b/fs/xfs/xfs_discard.c
> > @@ -21,23 +21,20 @@
> >  
> >  STATIC int
> >  xfs_trim_extents(
> > -	struct xfs_mount	*mp,
> > -	xfs_agnumber_t		agno,
> > +	struct xfs_perag	*pag,
> >  	xfs_daddr_t		start,
> >  	xfs_daddr_t		end,
> >  	xfs_daddr_t		minlen,
> >  	uint64_t		*blocks_trimmed)
> >  {
> > +	struct xfs_mount	*mp = pag->pag_mount;
> >  	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
> >  	struct xfs_btree_cur	*cur;
> >  	struct xfs_buf		*agbp;
> >  	struct xfs_agf		*agf;
> > -	struct xfs_perag	*pag;
> >  	int			error;
> >  	int			i;
> >  
> > -	pag = xfs_perag_get(mp, agno);
> > -
> >  	/*
> >  	 * Force out the log.  This means any transactions that might have freed
> 
> This is a tangent, but one thing I've wondered is if it's really
> necessary to force the log for *every* AG that we want to trim?  Even if
> we've just come from trimming the previous AG?

I suspect the thought behind this is that TRIM operations can be
really slow, so there can be a big build-up of new busy extents as a
large fragmented AG is trimmed.

I don't think it really matters at this point - if you are running a
multi-AG trim range, a few extra log forces is the least of your
performance worries. If someone reports it as a perf problem, let's
look at it then....

Cheers,

Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index bfc829c07f03..afc4c78b9eed 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -21,23 +21,20 @@ 
 
 STATIC int
 xfs_trim_extents(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
+	struct xfs_perag	*pag,
 	xfs_daddr_t		start,
 	xfs_daddr_t		end,
 	xfs_daddr_t		minlen,
 	uint64_t		*blocks_trimmed)
 {
+	struct xfs_mount	*mp = pag->pag_mount;
 	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
 	struct xfs_btree_cur	*cur;
 	struct xfs_buf		*agbp;
 	struct xfs_agf		*agf;
-	struct xfs_perag	*pag;
 	int			error;
 	int			i;
 
-	pag = xfs_perag_get(mp, agno);
-
 	/*
 	 * Force out the log.  This means any transactions that might have freed
 	 * space before we take the AGF buffer lock are now on disk, and the
@@ -47,7 +44,7 @@  xfs_trim_extents(
 
 	error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
 	if (error)
-		goto out_put_perag;
+		return error;
 	agf = agbp->b_addr;
 
 	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
@@ -71,10 +68,10 @@  xfs_trim_extents(
 
 		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
 		if (error)
-			goto out_del_cursor;
+			break;
 		if (XFS_IS_CORRUPT(mp, i != 1)) {
 			error = -EFSCORRUPTED;
-			goto out_del_cursor;
+			break;
 		}
 		ASSERT(flen <= be32_to_cpu(agf->agf_longest));
 
@@ -83,15 +80,15 @@  xfs_trim_extents(
 		 * the format the range/len variables are supplied in by
 		 * userspace.
 		 */
-		dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
+		dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
 		dlen = XFS_FSB_TO_BB(mp, flen);
 
 		/*
 		 * Too small?  Give up.
 		 */
 		if (dlen < minlen) {
-			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
-			goto out_del_cursor;
+			trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
+			break;
 		}
 
 		/*
@@ -100,7 +97,7 @@  xfs_trim_extents(
 		 * down partially overlapping ranges for now.
 		 */
 		if (dbno + dlen < start || dbno > end) {
-			trace_xfs_discard_exclude(mp, agno, fbno, flen);
+			trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
 			goto next_extent;
 		}
 
@@ -109,32 +106,30 @@  xfs_trim_extents(
 		 * discard and try again the next time.
 		 */
 		if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
-			trace_xfs_discard_busy(mp, agno, fbno, flen);
+			trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
 			goto next_extent;
 		}
 
-		trace_xfs_discard_extent(mp, agno, fbno, flen);
+		trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
 		error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
 		if (error)
-			goto out_del_cursor;
+			break;
 		*blocks_trimmed += flen;
 
 next_extent:
 		error = xfs_btree_decrement(cur, 0, &i);
 		if (error)
-			goto out_del_cursor;
+			break;
 
 		if (fatal_signal_pending(current)) {
 			error = -ERESTARTSYS;
-			goto out_del_cursor;
+			break;
 		}
 	}
 
 out_del_cursor:
 	xfs_btree_del_cursor(cur, error);
 	xfs_buf_relse(agbp);
-out_put_perag:
-	xfs_perag_put(pag);
 	return error;
 }
 
@@ -152,11 +147,12 @@  xfs_ioc_trim(
 	struct xfs_mount		*mp,
 	struct fstrim_range __user	*urange)
 {
+	struct xfs_perag	*pag;
 	unsigned int		granularity =
 		bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
 	struct fstrim_range	range;
 	xfs_daddr_t		start, end, minlen;
-	xfs_agnumber_t		start_agno, end_agno, agno;
+	xfs_agnumber_t		agno;
 	uint64_t		blocks_trimmed = 0;
 	int			error, last_error = 0;
 
@@ -193,18 +189,18 @@  xfs_ioc_trim(
 	end = start + BTOBBT(range.len) - 1;
 
 	if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
-		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
-
-	start_agno = xfs_daddr_to_agno(mp, start);
-	end_agno = xfs_daddr_to_agno(mp, end);
+		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
 
-	for (agno = start_agno; agno <= end_agno; agno++) {
-		error = xfs_trim_extents(mp, agno, start, end, minlen,
+	agno = xfs_daddr_to_agno(mp, start);
+	for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
+		error = xfs_trim_extents(pag, start, end, minlen,
 					  &blocks_trimmed);
 		if (error) {
 			last_error = error;
-			if (error == -ERESTARTSYS)
+			if (error == -ERESTARTSYS) {
+				xfs_perag_rele(pag);
 				break;
+			}
 		}
 	}