diff mbox series

[4/4] xfs: reactivate XFS_NEED_INACTIVE inodes from xfs_iget

Message ID 20240201005217.1011010-5-david@fromorbit.com (mailing list archive)
State Superseded, archived
Headers show
Series xfs: reactivate inodes immediately in xfs_iget | expand

Commit Message

Dave Chinner Feb. 1, 2024, 12:30 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

When xfs_iget() finds an inode that is queued for inactivation, it
issues an inodegc flush to trigger the inactivation work and then
retries the lookup.

However, when the filesystem is frozen, inodegc is turned off and
the flush does nothing and does not block. This results in lookup
spinning on NEED_INACTIVE inodes and being unable to make progress
until the filesystem is thawed. This is less than ideal.

The only reason we can't immediately recycle the inode is that it
queued on a lockless list we can't remove it from. However, those
lists now support lazy removal, and so we can now modify the lookup
code to reactivate inode queued for inactivation. The process is
identical to how we recycle reclaimable inodes from xfs_iget(), so
this ends up being a relatively simple change to make.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_icache.c | 98 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 22 deletions(-)

Comments

Darrick J. Wong Feb. 1, 2024, 7:36 p.m. UTC | #1
On Thu, Feb 01, 2024 at 11:30:16AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> When xfs_iget() finds an inode that is queued for inactivation, it
> issues an inodegc flush to trigger the inactivation work and then
> retries the lookup.
> 
> However, when the filesystem is frozen, inodegc is turned off and
> the flush does nothing and does not block. This results in lookup
> spinning on NEED_INACTIVE inodes and being unable to make progress
> until the filesystem is thawed. This is less than ideal.
> 
> The only reason we can't immediately recycle the inode is that it
> queued on a lockless list we can't remove it from. However, those
> lists now support lazy removal, and so we can now modify the lookup
> code to reactivate inode queued for inactivation. The process is
> identical to how we recycle reclaimable inodes from xfs_iget(), so
> this ends up being a relatively simple change to make.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_icache.c | 98 +++++++++++++++++++++++++++++++++++----------
>  1 file changed, 76 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 10588f78f679..1fc55ed0692c 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -64,6 +64,8 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
>  					 XFS_ICWALK_FLAG_RECLAIM_SICK | \
>  					 XFS_ICWALK_FLAG_UNION)
>  
> +static void xfs_inodegc_queue(struct xfs_inode *ip);
> +
>  /*
>   * Allocate and initialise an xfs_inode.
>   */
> @@ -328,6 +330,7 @@ xfs_reinit_inode(
>  	return error;
>  }
>  
> +
>  /*
>   * Carefully nudge an inode whose VFS state has been torn down back into a
>   * usable state.  Drops the i_flags_lock and the rcu read lock.
> @@ -391,7 +394,71 @@ xfs_iget_recycle(
>  	inode->i_state = I_NEW;
>  	spin_unlock(&ip->i_flags_lock);
>  	spin_unlock(&pag->pag_ici_lock);
> +	XFS_STATS_INC(mp, xs_ig_frecycle);
> +	return 0;
> +}
>  
> +static int
> +xfs_iget_reactivate(
> +	struct xfs_perag	*pag,
> +	struct xfs_inode	*ip) __releases(&ip->i_flags_lock)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct inode		*inode = VFS_I(ip);
> +	int			error;
> +
> +	trace_xfs_iget_recycle(ip);
> +
> +	/*
> +	 * Take the ILOCK here to serialise against lookup races with putting
> +	 * the inode back on the inodegc queue during error handling.
> +	 */
> +	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
> +		return -EAGAIN;
> +
> +	/*
> +	 * Move the state to inactivating so both inactivation and racing
> +	 * lookups will skip over this inode until we've finished reactivating
> +	 * it and can return it to the XFS_INEW state.
> +	 */
> +	ip->i_flags &= ~XFS_NEED_INACTIVE;
> +	ip->i_flags |= XFS_INACTIVATING;
> +	spin_unlock(&ip->i_flags_lock);
> +	rcu_read_unlock();
> +
> +	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
> +	error = xfs_reinit_inode(mp, inode);
> +	if (error) {
> +		/*
> +		 * Well, that sucks. Put the inode back on the inactive queue.
> +		 * Do this while still under the ILOCK so that we can set the
> +		 * NEED_INACTIVE flag and clear the INACTIVATING flag an not
> +		 * have another lookup race with us before we've finished
> +		 * putting the inode back on the inodegc queue.
> +		 */
> +		spin_unlock(&ip->i_flags_lock);
> +		ip->i_flags |= XFS_NEED_INACTIVE;
> +		ip->i_flags &= ~XFS_INACTIVATING;
> +		spin_unlock(&ip->i_flags_lock);
> +
> +		xfs_inodegc_queue(ip);
> +		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> +		return error;

Needs a trace_xfs_iget_recycle_fail here.

Do we want/need separate tracepoints for reactivation?  I'm guessing not
really since either way (reclaim/inactivation) we're recreating the vfs
state of an inode that hadn't yet been fully zapped.

The code changes here look good to me otherwise.

--D

> +	}
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> +	/*
> +	 * Reset the inode state to new so that xfs_iget() will complete
> +	 * the required remaining inode initialisation before it returns the
> +	 * inode to the caller.
> +	 */
> +	spin_lock(&ip->i_flags_lock);
> +	ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
> +	ip->i_flags |= XFS_INEW;
> +	inode->i_state = I_NEW;
> +	spin_unlock(&ip->i_flags_lock);
> +	XFS_STATS_INC(mp, xs_ig_frecycle);
>  	return 0;
>  }
>  
> @@ -523,14 +590,6 @@ xfs_iget_cache_hit(
>  	if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING))
>  		goto out_skip;
>  
> -	if (ip->i_flags & XFS_NEED_INACTIVE) {
> -		/* Unlinked inodes cannot be re-grabbed. */
> -		if (VFS_I(ip)->i_nlink == 0) {
> -			error = -ENOENT;
> -			goto out_error;
> -		}
> -		goto out_inodegc_flush;
> -	}
>  
>  	/*
>  	 * Check the inode free state is valid. This also detects lookup
> @@ -542,11 +601,18 @@ xfs_iget_cache_hit(
>  
>  	/* Skip inodes that have no vfs state. */
>  	if ((flags & XFS_IGET_INCORE) &&
> -	    (ip->i_flags & XFS_IRECLAIMABLE))
> +	    (ip->i_flags & (XFS_IRECLAIMABLE | XFS_NEED_INACTIVE)))
>  		goto out_skip;
>  
>  	/* The inode fits the selection criteria; process it. */
> -	if (ip->i_flags & XFS_IRECLAIMABLE) {
> +	if (ip->i_flags & XFS_NEED_INACTIVE) {
> +		/* Drops i_flags_lock and RCU read lock. */
> +		error = xfs_iget_reactivate(pag, ip);
> +		if (error == -EAGAIN)
> +			goto out_skip;
> +		if (error)
> +			return error;
> +	} else if (ip->i_flags & XFS_IRECLAIMABLE) {
>  		/* Drops i_flags_lock and RCU read lock. */
>  		error = xfs_iget_recycle(pag, ip);
>  		if (error == -EAGAIN)
> @@ -575,23 +641,11 @@ xfs_iget_cache_hit(
>  
>  out_skip:
>  	trace_xfs_iget_skip(ip);
> -	XFS_STATS_INC(mp, xs_ig_frecycle);
>  	error = -EAGAIN;
>  out_error:
>  	spin_unlock(&ip->i_flags_lock);
>  	rcu_read_unlock();
>  	return error;
> -
> -out_inodegc_flush:
> -	spin_unlock(&ip->i_flags_lock);
> -	rcu_read_unlock();
> -	/*
> -	 * Do not wait for the workers, because the caller could hold an AGI
> -	 * buffer lock.  We're just going to sleep in a loop anyway.
> -	 */
> -	if (xfs_is_inodegc_enabled(mp))
> -		xfs_inodegc_queue_all(mp);
> -	return -EAGAIN;
>  }
>  
>  static int
> -- 
> 2.43.0
> 
>
kernel test robot Feb. 14, 2024, 4 a.m. UTC | #2
Hello,

kernel test robot noticed "xfstests.xfs.183.fail" on:

commit: 98e62582b2cc1b05a1075ce816256e8f257a6881 ("[PATCH 4/4] xfs: reactivate XFS_NEED_INACTIVE inodes from xfs_iget")
url: https://github.com/intel-lab-lkp/linux/commits/Dave-Chinner/xfs-make-inode-inactivation-state-changes-atomic/20240201-085509
base: https://git.kernel.org/cgit/fs/xfs/xfs-linux.git for-next
patch link: https://lore.kernel.org/all/20240201005217.1011010-5-david@fromorbit.com/
patch subject: [PATCH 4/4] xfs: reactivate XFS_NEED_INACTIVE inodes from xfs_iget

in testcase: xfstests
version: xfstests-x86_64-c46ca4d1-1_20240205
with following parameters:

	disk: 4HDD
	fs: xfs
	test: xfs-183



compiler: gcc-12
test machine: 4 threads Intel(R) Xeon(R) CPU E3-1225 v5 @ 3.30GHz (Skylake) with 16G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)




If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202402141107.4642a3e7-oliver.sang@intel.com

2024-02-08 20:30:36 export TEST_DIR=/fs/sda1
2024-02-08 20:30:36 export TEST_DEV=/dev/sda1
2024-02-08 20:30:36 export FSTYP=xfs
2024-02-08 20:30:36 export SCRATCH_MNT=/fs/scratch
2024-02-08 20:30:36 mkdir /fs/scratch -p
2024-02-08 20:30:36 export SCRATCH_DEV=/dev/sda4
2024-02-08 20:30:36 export SCRATCH_LOGDEV=/dev/sda2
2024-02-08 20:30:36 export SCRATCH_XFS_LIST_METADATA_FIELDS=u3.sfdir3.hdr.parent.i4
2024-02-08 20:30:36 export SCRATCH_XFS_LIST_FUZZ_VERBS=random
2024-02-08 20:30:36 echo xfs/183
2024-02-08 20:30:36 ./check xfs/183
FSTYP         -- xfs (debug)
PLATFORM      -- Linux/x86_64 lkp-skl-d06 6.8.0-rc2-00006-g98e62582b2cc #1 SMP PREEMPT_DYNAMIC Thu Feb  8 18:32:20 CST 2024
MKFS_OPTIONS  -- -f /dev/sda4
MOUNT_OPTIONS -- /dev/sda4 /fs/scratch

xfs/183       - output mismatch (see /lkp/benchmarks/xfstests/results//xfs/183.out.bad)
    --- tests/xfs/183.out	2024-02-05 17:37:40.000000000 +0000
    +++ /lkp/benchmarks/xfstests/results//xfs/183.out.bad	2024-02-08 20:32:06.555798923 +0000
    @@ -1,4 +1,4 @@
     QA output created by 183
     Start original bulkstat_unlink_test with -r switch
     Runing extended checks.
    -Iteration 0 ... (100 files)passed
    +Iteration 0 ... (100 files)ERROR, count(100) != scount(2).
    ...
    (Run 'diff -u /lkp/benchmarks/xfstests/tests/xfs/183.out /lkp/benchmarks/xfstests/results//xfs/183.out.bad'  to see the entire diff)
Ran: xfs/183
Failures: xfs/183
Failed 1 of 1 tests




The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240214/202402141107.4642a3e7-oliver.sang@intel.com
diff mbox series

Patch

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 10588f78f679..1fc55ed0692c 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -64,6 +64,8 @@  static int xfs_icwalk_ag(struct xfs_perag *pag,
 					 XFS_ICWALK_FLAG_RECLAIM_SICK | \
 					 XFS_ICWALK_FLAG_UNION)
 
+static void xfs_inodegc_queue(struct xfs_inode *ip);
+
 /*
  * Allocate and initialise an xfs_inode.
  */
@@ -328,6 +330,7 @@  xfs_reinit_inode(
 	return error;
 }
 
+
 /*
  * Carefully nudge an inode whose VFS state has been torn down back into a
  * usable state.  Drops the i_flags_lock and the rcu read lock.
@@ -391,7 +394,71 @@  xfs_iget_recycle(
 	inode->i_state = I_NEW;
 	spin_unlock(&ip->i_flags_lock);
 	spin_unlock(&pag->pag_ici_lock);
+	XFS_STATS_INC(mp, xs_ig_frecycle);
+	return 0;
+}
 
+static int
+xfs_iget_reactivate(
+	struct xfs_perag	*pag,
+	struct xfs_inode	*ip) __releases(&ip->i_flags_lock)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct inode		*inode = VFS_I(ip);
+	int			error;
+
+	trace_xfs_iget_recycle(ip);
+
+	/*
+	 * Take the ILOCK here to serialise against lookup races with putting
+	 * the inode back on the inodegc queue during error handling.
+	 */
+	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+		return -EAGAIN;
+
+	/*
+	 * Move the state to inactivating so both inactivation and racing
+	 * lookups will skip over this inode until we've finished reactivating
+	 * it and can return it to the XFS_INEW state.
+	 */
+	ip->i_flags &= ~XFS_NEED_INACTIVE;
+	ip->i_flags |= XFS_INACTIVATING;
+	spin_unlock(&ip->i_flags_lock);
+	rcu_read_unlock();
+
+	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
+	error = xfs_reinit_inode(mp, inode);
+	if (error) {
+		/*
+		 * Well, that sucks. Put the inode back on the inactive queue.
+		 * Do this while still under the ILOCK so that we can set the
+		 * NEED_INACTIVE flag and clear the INACTIVATING flag an not
+		 * have another lookup race with us before we've finished
+		 * putting the inode back on the inodegc queue.
+		 */
+		spin_unlock(&ip->i_flags_lock);
+		ip->i_flags |= XFS_NEED_INACTIVE;
+		ip->i_flags &= ~XFS_INACTIVATING;
+		spin_unlock(&ip->i_flags_lock);
+
+		xfs_inodegc_queue(ip);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+		return error;
+	}
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * Reset the inode state to new so that xfs_iget() will complete
+	 * the required remaining inode initialisation before it returns the
+	 * inode to the caller.
+	 */
+	spin_lock(&ip->i_flags_lock);
+	ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
+	ip->i_flags |= XFS_INEW;
+	inode->i_state = I_NEW;
+	spin_unlock(&ip->i_flags_lock);
+	XFS_STATS_INC(mp, xs_ig_frecycle);
 	return 0;
 }
 
@@ -523,14 +590,6 @@  xfs_iget_cache_hit(
 	if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING))
 		goto out_skip;
 
-	if (ip->i_flags & XFS_NEED_INACTIVE) {
-		/* Unlinked inodes cannot be re-grabbed. */
-		if (VFS_I(ip)->i_nlink == 0) {
-			error = -ENOENT;
-			goto out_error;
-		}
-		goto out_inodegc_flush;
-	}
 
 	/*
 	 * Check the inode free state is valid. This also detects lookup
@@ -542,11 +601,18 @@  xfs_iget_cache_hit(
 
 	/* Skip inodes that have no vfs state. */
 	if ((flags & XFS_IGET_INCORE) &&
-	    (ip->i_flags & XFS_IRECLAIMABLE))
+	    (ip->i_flags & (XFS_IRECLAIMABLE | XFS_NEED_INACTIVE)))
 		goto out_skip;
 
 	/* The inode fits the selection criteria; process it. */
-	if (ip->i_flags & XFS_IRECLAIMABLE) {
+	if (ip->i_flags & XFS_NEED_INACTIVE) {
+		/* Drops i_flags_lock and RCU read lock. */
+		error = xfs_iget_reactivate(pag, ip);
+		if (error == -EAGAIN)
+			goto out_skip;
+		if (error)
+			return error;
+	} else if (ip->i_flags & XFS_IRECLAIMABLE) {
 		/* Drops i_flags_lock and RCU read lock. */
 		error = xfs_iget_recycle(pag, ip);
 		if (error == -EAGAIN)
@@ -575,23 +641,11 @@  xfs_iget_cache_hit(
 
 out_skip:
 	trace_xfs_iget_skip(ip);
-	XFS_STATS_INC(mp, xs_ig_frecycle);
 	error = -EAGAIN;
 out_error:
 	spin_unlock(&ip->i_flags_lock);
 	rcu_read_unlock();
 	return error;
-
-out_inodegc_flush:
-	spin_unlock(&ip->i_flags_lock);
-	rcu_read_unlock();
-	/*
-	 * Do not wait for the workers, because the caller could hold an AGI
-	 * buffer lock.  We're just going to sleep in a loop anyway.
-	 */
-	if (xfs_is_inodegc_enabled(mp))
-		xfs_inodegc_queue_all(mp);
-	return -EAGAIN;
 }
 
 static int