[15/24] xfs: allow multiple reclaimers per AG
diff mbox series

Message ID 20200522035029.3022405-16-david@fromorbit.com
State Superseded
Headers show
Series
  • xfs: rework inode flushing to make inode reclaim fully asynchronous
Related show

Commit Message

Dave Chinner May 22, 2020, 3:50 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Inode reclaim will still throttle direct reclaim on the per-ag
reclaim locks. This is no longer necessary as reclaim can run
non-blocking now. Hence we can remove these locks so that we don't
arbitrarily block reclaimers just because there are more direct
reclaimers than there are AGs.

This can result in multiple reclaimers working on the same range of
an AG, but this doesn't cause any apparent issues. Optimising the
spread of concurrent reclaimers for best efficiency can be done in a
future patchset.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_icache.c | 28 +++++++---------------------
 fs/xfs/xfs_mount.c  |  4 ----
 fs/xfs/xfs_mount.h  |  1 -
 3 files changed, 7 insertions(+), 26 deletions(-)

Comments

Darrick J. Wong May 22, 2020, 11:10 p.m. UTC | #1
On Fri, May 22, 2020 at 01:50:20PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Inode reclaim will still throttle direct reclaim on the per-ag
> reclaim locks. This is no longer necessary as reclaim can run
> non-blocking now. Hence we can remove these locks so that we don't
> arbitrarily block reclaimers just because there are more direct
> reclaimers than there are AGs.
> 
> This can result in multiple reclaimers working on the same range of
> an AG, but this doesn't cause any apparent issues. Optimising the
> spread of concurrent reclaimers for best efficiency can be done in a
> future patchset.

"Future patchset" as in "not in the 9 patches that I have left to read"?

> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_icache.c | 28 +++++++---------------------
>  fs/xfs/xfs_mount.c  |  4 ----
>  fs/xfs/xfs_mount.h  |  1 -
>  3 files changed, 7 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index ee9bc82a0dfbe..f44493b2eae77 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -1226,12 +1226,9 @@ xfs_reclaim_inodes_ag(
>  	int			*nr_to_scan)
>  {
>  	struct xfs_perag	*pag;
> -	xfs_agnumber_t		ag;
> -	int			trylock = flags & SYNC_TRYLOCK;
> -	int			skipped;
> +	xfs_agnumber_t		ag = 0;
> +	int			skipped = 0;
>  
> -	ag = 0;
> -	skipped = 0;
>  	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
>  		unsigned long	first_index = 0;
>  		int		done = 0;
> @@ -1239,16 +1236,7 @@ xfs_reclaim_inodes_ag(
>  
>  		ag = pag->pag_agno + 1;
>  
> -		if (trylock) {
> -			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
> -				skipped++;
> -				xfs_perag_put(pag);
> -				continue;
> -			}
> -			first_index = pag->pag_ici_reclaim_cursor;
> -		} else
> -			mutex_lock(&pag->pag_ici_reclaim_lock);
> -
> +		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
>  		do {
>  			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
>  			int	i;
> @@ -1313,11 +1301,9 @@ xfs_reclaim_inodes_ag(
>  
>  		} while (nr_found && !done && *nr_to_scan > 0);
>  
> -		if (trylock && !done)
> -			pag->pag_ici_reclaim_cursor = first_index;
> -		else
> -			pag->pag_ici_reclaim_cursor = 0;
> -		mutex_unlock(&pag->pag_ici_reclaim_lock);
> +		if (done)
> +			first_index = 0;
> +		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
>  		xfs_perag_put(pag);
>  	}
>  	return skipped;
> @@ -1331,7 +1317,7 @@ xfs_reclaim_inodes(
>  	int		nr_to_scan = INT_MAX;
>  	int		skipped;
>  
> -	skipped = xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
> +	xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);

This could go in the previous patch, right?

The rest of this patch looks reasonable, so with that fixed,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

>  	if (!(mode & SYNC_WAIT))
>  		return 0;
>  
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index d5dcf98698600..03158b42a1943 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -148,7 +148,6 @@ xfs_free_perag(
>  		ASSERT(atomic_read(&pag->pag_ref) == 0);
>  		xfs_iunlink_destroy(pag);
>  		xfs_buf_hash_destroy(pag);
> -		mutex_destroy(&pag->pag_ici_reclaim_lock);
>  		call_rcu(&pag->rcu_head, __xfs_free_perag);
>  	}
>  }
> @@ -200,7 +199,6 @@ xfs_initialize_perag(
>  		pag->pag_agno = index;
>  		pag->pag_mount = mp;
>  		spin_lock_init(&pag->pag_ici_lock);
> -		mutex_init(&pag->pag_ici_reclaim_lock);
>  		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
>  		if (xfs_buf_hash_init(pag))
>  			goto out_free_pag;
> @@ -242,7 +240,6 @@ xfs_initialize_perag(
>  out_hash_destroy:
>  	xfs_buf_hash_destroy(pag);
>  out_free_pag:
> -	mutex_destroy(&pag->pag_ici_reclaim_lock);
>  	kmem_free(pag);
>  out_unwind_new_pags:
>  	/* unwind any prior newly initialized pags */
> @@ -252,7 +249,6 @@ xfs_initialize_perag(
>  			break;
>  		xfs_buf_hash_destroy(pag);
>  		xfs_iunlink_destroy(pag);
> -		mutex_destroy(&pag->pag_ici_reclaim_lock);
>  		kmem_free(pag);
>  	}
>  	return error;
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 3725d25ad97e8..a72cfcaa4ad12 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -354,7 +354,6 @@ typedef struct xfs_perag {
>  	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
>  	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
>  	int		pag_ici_reclaimable;	/* reclaimable inodes */
> -	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */
>  	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
>  
>  	/* buffer cache index */
> -- 
> 2.26.2.761.g0e0b3e54be
>
Dave Chinner May 23, 2020, 10:35 p.m. UTC | #2
On Fri, May 22, 2020 at 04:10:07PM -0700, Darrick J. Wong wrote:
> On Fri, May 22, 2020 at 01:50:20PM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Inode reclaim will still throttle direct reclaim on the per-ag
> > reclaim locks. This is no longer necessary as reclaim can run
> > non-blocking now. Hence we can remove these locks so that we don't
> > arbitrarily block reclaimers just because there are more direct
> > reclaimers than there are AGs.
> > 
> > This can result in multiple reclaimers working on the same range of
> > an AG, but this doesn't cause any apparent issues. Optimising the
> > spread of concurrent reclaimers for best efficiency can be done in a
> > future patchset.
> 
> "Future patchset" as in "not in the 9 patches that I have left to read"?

Yes.

-Dave.

Patch
diff mbox series

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index ee9bc82a0dfbe..f44493b2eae77 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1226,12 +1226,9 @@  xfs_reclaim_inodes_ag(
 	int			*nr_to_scan)
 {
 	struct xfs_perag	*pag;
-	xfs_agnumber_t		ag;
-	int			trylock = flags & SYNC_TRYLOCK;
-	int			skipped;
+	xfs_agnumber_t		ag = 0;
+	int			skipped = 0;
 
-	ag = 0;
-	skipped = 0;
 	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
 		unsigned long	first_index = 0;
 		int		done = 0;
@@ -1239,16 +1236,7 @@  xfs_reclaim_inodes_ag(
 
 		ag = pag->pag_agno + 1;
 
-		if (trylock) {
-			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
-				skipped++;
-				xfs_perag_put(pag);
-				continue;
-			}
-			first_index = pag->pag_ici_reclaim_cursor;
-		} else
-			mutex_lock(&pag->pag_ici_reclaim_lock);
-
+		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
 		do {
 			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
 			int	i;
@@ -1313,11 +1301,9 @@  xfs_reclaim_inodes_ag(
 
 		} while (nr_found && !done && *nr_to_scan > 0);
 
-		if (trylock && !done)
-			pag->pag_ici_reclaim_cursor = first_index;
-		else
-			pag->pag_ici_reclaim_cursor = 0;
-		mutex_unlock(&pag->pag_ici_reclaim_lock);
+		if (done)
+			first_index = 0;
+		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
 		xfs_perag_put(pag);
 	}
 	return skipped;
@@ -1331,7 +1317,7 @@  xfs_reclaim_inodes(
 	int		nr_to_scan = INT_MAX;
 	int		skipped;
 
-	skipped = xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+	xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
 	if (!(mode & SYNC_WAIT))
 		return 0;
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d5dcf98698600..03158b42a1943 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -148,7 +148,6 @@  xfs_free_perag(
 		ASSERT(atomic_read(&pag->pag_ref) == 0);
 		xfs_iunlink_destroy(pag);
 		xfs_buf_hash_destroy(pag);
-		mutex_destroy(&pag->pag_ici_reclaim_lock);
 		call_rcu(&pag->rcu_head, __xfs_free_perag);
 	}
 }
@@ -200,7 +199,6 @@  xfs_initialize_perag(
 		pag->pag_agno = index;
 		pag->pag_mount = mp;
 		spin_lock_init(&pag->pag_ici_lock);
-		mutex_init(&pag->pag_ici_reclaim_lock);
 		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
 		if (xfs_buf_hash_init(pag))
 			goto out_free_pag;
@@ -242,7 +240,6 @@  xfs_initialize_perag(
 out_hash_destroy:
 	xfs_buf_hash_destroy(pag);
 out_free_pag:
-	mutex_destroy(&pag->pag_ici_reclaim_lock);
 	kmem_free(pag);
 out_unwind_new_pags:
 	/* unwind any prior newly initialized pags */
@@ -252,7 +249,6 @@  xfs_initialize_perag(
 			break;
 		xfs_buf_hash_destroy(pag);
 		xfs_iunlink_destroy(pag);
-		mutex_destroy(&pag->pag_ici_reclaim_lock);
 		kmem_free(pag);
 	}
 	return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3725d25ad97e8..a72cfcaa4ad12 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -354,7 +354,6 @@  typedef struct xfs_perag {
 	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
 	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
 	int		pag_ici_reclaimable;	/* reclaimable inodes */
-	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */
 	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
 
 	/* buffer cache index */