diff mbox series

[13/14] xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag

Message ID 162259522416.662681.8769645421908758261.stgit@locust (mailing list archive)
State Superseded
Headers show
Series xfs: clean up incore inode walk functions | expand

Commit Message

Darrick J. Wong June 2, 2021, 12:53 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Merge these two inode walk loops together, since they're pretty similar
now.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_icache.c |  151 +++++++++++++--------------------------------------
 fs/xfs/xfs_icache.h |    7 ++
 2 files changed, 45 insertions(+), 113 deletions(-)

Comments

Dave Chinner June 2, 2021, 2:10 a.m. UTC | #1
On Tue, Jun 01, 2021 at 05:53:44PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Merge these two inode walk loops together, since they're pretty similar
> now.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/xfs_icache.c |  151 +++++++++++++--------------------------------------
>  fs/xfs/xfs_icache.h |    7 ++
>  2 files changed, 45 insertions(+), 113 deletions(-)

At last! Nice work.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

> @@ -1678,6 +1577,14 @@ xfs_blockgc_free_quota(
>  
>  /* XFS Incore Inode Walking Code */

FWIW, if we are using "icwalk" as the namespace, I keep saying
"inode cache walk" in my head. Perhaps update this comment somewhere
to match the namespace?

-Dave.
Darrick J. Wong June 2, 2021, 6:16 a.m. UTC | #2
On Wed, Jun 02, 2021 at 12:10:55PM +1000, Dave Chinner wrote:
> On Tue, Jun 01, 2021 at 05:53:44PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Merge these two inode walk loops together, since they're pretty similar
> > now.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/xfs_icache.c |  151 +++++++++++++--------------------------------------
> >  fs/xfs/xfs_icache.h |    7 ++
> >  2 files changed, 45 insertions(+), 113 deletions(-)
> 
> At last! Nice work.
> 
> Reviewed-by: Dave Chinner <dchinner@redhat.com>
> 
> > @@ -1678,6 +1577,14 @@ xfs_blockgc_free_quota(
> >  
> >  /* XFS Incore Inode Walking Code */
> 
> FWIW, if we are using "icwalk" as the namespace, I keep saying
> "inode cache walk" in my head. Perhaps update this comment somewhere
> to match the namespace?

Done.

--D

> 
> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index b17ac2f23909..f6e54e638cf4 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -40,6 +40,7 @@ 
 enum xfs_icwalk_goal {
 	XFS_ICWALK_DQRELE	= -1,
 	XFS_ICWALK_BLOCKGC	= XFS_ICI_BLOCKGC_TAG,
+	XFS_ICWALK_RECLAIM	= XFS_ICI_RECLAIM_TAG,
 };
 
 /* Is there a radix tree tag for this goal? */
@@ -743,17 +744,6 @@  xfs_icache_inode_is_allocated(
 	return 0;
 }
 
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- *
- * XXX: This will be moved closer to xfs_inode_walk* once we get rid of the
- * separate reclaim walk functions.
- */
-#define XFS_LOOKUP_BATCH	32
-
 #ifdef CONFIG_XFS_QUOTA
 /* Decide if we want to grab this inode to drop its dquots. */
 static bool
@@ -865,7 +855,7 @@  xfs_dqrele_all_inodes(
  * Return true if we grabbed it, false otherwise.
  */
 static bool
-xfs_reclaim_inode_grab(
+xfs_reclaim_igrab(
 	struct xfs_inode	*ip)
 {
 	ASSERT(rcu_read_lock_held());
@@ -975,108 +965,13 @@  xfs_reclaim_inode(
 	xfs_iflags_clear(ip, XFS_IRECLAIM);
 }
 
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- *
- * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
- * so that callers that want to block until all dirty inodes are written back
- * and reclaimed can sanely loop.
- */
-static void
-xfs_reclaim_inodes_ag(
-	struct xfs_mount	*mp,
-	int			*nr_to_scan)
-{
-	struct xfs_perag	*pag;
-	xfs_agnumber_t		ag = 0;
-
-	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-		unsigned long	first_index = 0;
-		int		done = 0;
-		int		nr_found = 0;
-
-		ag = pag->pag_agno + 1;
-
-		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
-		do {
-			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-			int	i;
-
-			rcu_read_lock();
-			nr_found = radix_tree_gang_lookup_tag(
-					&pag->pag_ici_root,
-					(void **)batch, first_index,
-					XFS_LOOKUP_BATCH,
-					XFS_ICI_RECLAIM_TAG);
-			if (!nr_found) {
-				done = 1;
-				rcu_read_unlock();
-				break;
-			}
-
-			/*
-			 * Grab the inodes before we drop the lock. if we found
-			 * nothing, nr == 0 and the loop will be skipped.
-			 */
-			for (i = 0; i < nr_found; i++) {
-				struct xfs_inode *ip = batch[i];
-
-				if (done || !xfs_reclaim_inode_grab(ip))
-					batch[i] = NULL;
-
-				/*
-				 * Update the index for the next lookup. Catch
-				 * overflows into the next AG range which can
-				 * occur if we have inodes in the last block of
-				 * the AG and we are currently pointing to the
-				 * last inode.
-				 *
-				 * Because we may see inodes that are from the
-				 * wrong AG due to RCU freeing and
-				 * reallocation, only update the index if it
-				 * lies in this AG. It was a race that lead us
-				 * to see this inode, so another lookup from
-				 * the same index will not find it again.
-				 */
-				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
-								pag->pag_agno)
-					continue;
-				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-					done = 1;
-			}
-
-			/* unlock now we've grabbed the inodes. */
-			rcu_read_unlock();
-
-			for (i = 0; i < nr_found; i++) {
-				if (batch[i])
-					xfs_reclaim_inode(batch[i], pag);
-			}
-
-			*nr_to_scan -= XFS_LOOKUP_BATCH;
-			cond_resched();
-		} while (nr_found && !done && *nr_to_scan > 0);
-
-		if (done)
-			first_index = 0;
-		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
-		xfs_perag_put(pag);
-	}
-}
-
 void
 xfs_reclaim_inodes(
 	struct xfs_mount	*mp)
 {
-	int		nr_to_scan = INT_MAX;
-
 	while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
 		xfs_ail_push_all_sync(mp->m_ail);
-		xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+		xfs_inode_walk(mp, XFS_ICWALK_RECLAIM, NULL);
 	}
 }
 
@@ -1092,11 +987,16 @@  xfs_reclaim_inodes_nr(
 	struct xfs_mount	*mp,
 	int			nr_to_scan)
 {
+	struct xfs_eofblocks	eofb = {
+		.eof_flags	= XFS_EOFB_SCAN_LIMIT,
+		.nr_to_scan	= nr_to_scan,
+	};
+
 	/* kick background reclaimer and push the AIL */
 	xfs_reclaim_work_queue(mp);
 	xfs_ail_push_all(mp->m_ail);
 
-	xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+	xfs_inode_walk(mp, XFS_ICWALK_RECLAIM, &eofb);
 	return 0;
 }
 
@@ -1206,9 +1106,8 @@  xfs_reclaim_worker(
 {
 	struct xfs_mount *mp = container_of(to_delayed_work(work),
 					struct xfs_mount, m_reclaim_work);
-	int		nr_to_scan = INT_MAX;
 
-	xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+	xfs_inode_walk(mp, XFS_ICWALK_RECLAIM, NULL);
 	xfs_reclaim_work_queue(mp);
 }
 
@@ -1678,6 +1577,14 @@  xfs_blockgc_free_quota(
 
 /* XFS Incore Inode Walking Code */
 
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH	32
+
 static inline bool
 xfs_grabbed_for_walk(
 	enum xfs_icwalk_goal	goal,
@@ -1688,6 +1595,8 @@  xfs_grabbed_for_walk(
 		return xfs_blockgc_igrab(ip);
 	case XFS_ICWALK_DQRELE:
 		return xfs_dqrele_igrab(ip);
+	case XFS_ICWALK_RECLAIM:
+		return xfs_reclaim_igrab(ip);
 	default:
 		return false;
 	}
@@ -1713,7 +1622,10 @@  xfs_inode_walk_ag(
 restart:
 	done = false;
 	skipped = 0;
-	first_index = 0;
+	if (goal == XFS_ICWALK_RECLAIM)
+		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
+	else
+		first_index = 0;
 	nr_found = 0;
 	do {
 		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
@@ -1733,6 +1645,7 @@  xfs_inode_walk_ag(
 					XFS_LOOKUP_BATCH, goal);
 
 		if (!nr_found) {
+			done = true;
 			rcu_read_unlock();
 			break;
 		}
@@ -1779,6 +1692,9 @@  xfs_inode_walk_ag(
 			case XFS_ICWALK_BLOCKGC:
 				error = xfs_blockgc_scan_inode(batch[i], eofb);
 				break;
+			case XFS_ICWALK_RECLAIM:
+				xfs_reclaim_inode(batch[i], pag);
+				break;
 			}
 			if (error == -EAGAIN) {
 				skipped++;
@@ -1794,8 +1710,19 @@  xfs_inode_walk_ag(
 
 		cond_resched();
 
+		if (eofb && (eofb->eof_flags & XFS_EOFB_SCAN_LIMIT)) {
+			eofb->nr_to_scan -= XFS_LOOKUP_BATCH;
+			if (eofb->nr_to_scan <= 0)
+				break;
+		}
 	} while (nr_found && !done);
 
+	if (goal == XFS_ICWALK_RECLAIM) {
+		if (done)
+			first_index = 0;
+		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
+	}
+
 	if (skipped) {
 		delay(1);
 		goto restart;
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 6f6260c91ba0..63e116c339a8 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -15,6 +15,7 @@  struct xfs_eofblocks {
 	kgid_t		eof_gid;
 	prid_t		eof_prid;
 	__u64		eof_min_file_size;
+	int		nr_to_scan;
 };
 
 /* Special eof_flags for dropping dquots. */
@@ -22,9 +23,13 @@  struct xfs_eofblocks {
 #define XFS_EOFB_DROP_GDQUOT	(1U << 30)
 #define XFS_EOFB_DROP_PDQUOT	(1U << 29)
 
+/* Stop scanning after nr_to_scan inodes. */
+#define XFS_EOFB_SCAN_LIMIT	(1U << 28)
+
 #define XFS_EOFB_PRIVATE_FLAGS	(XFS_EOFB_DROP_UDQUOT | \
 				 XFS_EOFB_DROP_GDQUOT | \
-				 XFS_EOFB_DROP_PDQUOT)
+				 XFS_EOFB_DROP_PDQUOT | \
+				 XFS_EOFB_SCAN_LIMIT)
 
 /*
  * Flags for xfs_iget()