Message ID | 20240201005217.1011010-5-david@fromorbit.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: reactivate inodes immediately in xfs_iget | expand |
On Thu, Feb 01, 2024 at 11:30:16AM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > When xfs_iget() finds an inode that is queued for inactivation, it > issues an inodegc flush to trigger the inactivation work and then > retries the lookup. > > However, when the filesystem is frozen, inodegc is turned off and > the flush does nothing and does not block. This results in lookup > spinning on NEED_INACTIVE inodes and being unable to make progress > until the filesystem is thawed. This is less than ideal. > > The only reason we can't immediately recycle the inode is that it > queued on a lockless list we can't remove it from. However, those > lists now support lazy removal, and so we can now modify the lookup > code to reactivate inode queued for inactivation. The process is > identical to how we recycle reclaimable inodes from xfs_iget(), so > this ends up being a relatively simple change to make. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/xfs/xfs_icache.c | 98 +++++++++++++++++++++++++++++++++++---------- > 1 file changed, 76 insertions(+), 22 deletions(-) > > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c > index 10588f78f679..1fc55ed0692c 100644 > --- a/fs/xfs/xfs_icache.c > +++ b/fs/xfs/xfs_icache.c > @@ -64,6 +64,8 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, > XFS_ICWALK_FLAG_RECLAIM_SICK | \ > XFS_ICWALK_FLAG_UNION) > > +static void xfs_inodegc_queue(struct xfs_inode *ip); > + > /* > * Allocate and initialise an xfs_inode. > */ > @@ -328,6 +330,7 @@ xfs_reinit_inode( > return error; > } > > + > /* > * Carefully nudge an inode whose VFS state has been torn down back into a > * usable state. Drops the i_flags_lock and the rcu read lock. > @@ -391,7 +394,71 @@ xfs_iget_recycle( > inode->i_state = I_NEW; > spin_unlock(&ip->i_flags_lock); > spin_unlock(&pag->pag_ici_lock); > + XFS_STATS_INC(mp, xs_ig_frecycle); > + return 0; > +} > > +static int > +xfs_iget_reactivate( > + struct xfs_perag *pag, > + struct xfs_inode *ip) __releases(&ip->i_flags_lock) > +{ > + struct xfs_mount *mp = ip->i_mount; > + struct inode *inode = VFS_I(ip); > + int error; > + > + trace_xfs_iget_recycle(ip); > + > + /* > + * Take the ILOCK here to serialise against lookup races with putting > + * the inode back on the inodegc queue during error handling. > + */ > + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) > + return -EAGAIN; > + > + /* > + * Move the state to inactivating so both inactivation and racing > + * lookups will skip over this inode until we've finished reactivating > + * it and can return it to the XFS_INEW state. > + */ > + ip->i_flags &= ~XFS_NEED_INACTIVE; > + ip->i_flags |= XFS_INACTIVATING; > + spin_unlock(&ip->i_flags_lock); > + rcu_read_unlock(); > + > + ASSERT(!rwsem_is_locked(&inode->i_rwsem)); > + error = xfs_reinit_inode(mp, inode); > + if (error) { > + /* > + * Well, that sucks. Put the inode back on the inactive queue. > + * Do this while still under the ILOCK so that we can set the > + * NEED_INACTIVE flag and clear the INACTIVATING flag an not > + * have another lookup race with us before we've finished > + * putting the inode back on the inodegc queue. > + */ > + spin_unlock(&ip->i_flags_lock); > + ip->i_flags |= XFS_NEED_INACTIVE; > + ip->i_flags &= ~XFS_INACTIVATING; > + spin_unlock(&ip->i_flags_lock); > + > + xfs_inodegc_queue(ip); > + xfs_iunlock(ip, XFS_ILOCK_EXCL); > + > + return error; Needs a trace_xfs_iget_recycle_fail here. Do we want/need separate tracepoints for reactivation? I'm guessing not really since either way (reclaim/inactivation) we're recreating the vfs state of an inode that hadn't yet been fully zapped. The code changes here look good to me otherwise. --D > + } > + xfs_iunlock(ip, XFS_ILOCK_EXCL); > + > + /* > + * Reset the inode state to new so that xfs_iget() will complete > + * the required remaining inode initialisation before it returns the > + * inode to the caller. > + */ > + spin_lock(&ip->i_flags_lock); > + ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; > + ip->i_flags |= XFS_INEW; > + inode->i_state = I_NEW; > + spin_unlock(&ip->i_flags_lock); > + XFS_STATS_INC(mp, xs_ig_frecycle); > return 0; > } > > @@ -523,14 +590,6 @@ xfs_iget_cache_hit( > if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING)) > goto out_skip; > > - if (ip->i_flags & XFS_NEED_INACTIVE) { > - /* Unlinked inodes cannot be re-grabbed. */ > - if (VFS_I(ip)->i_nlink == 0) { > - error = -ENOENT; > - goto out_error; > - } > - goto out_inodegc_flush; > - } > > /* > * Check the inode free state is valid. This also detects lookup > @@ -542,11 +601,18 @@ xfs_iget_cache_hit( > > /* Skip inodes that have no vfs state. */ > if ((flags & XFS_IGET_INCORE) && > - (ip->i_flags & XFS_IRECLAIMABLE)) > + (ip->i_flags & (XFS_IRECLAIMABLE | XFS_NEED_INACTIVE))) > goto out_skip; > > /* The inode fits the selection criteria; process it. */ > - if (ip->i_flags & XFS_IRECLAIMABLE) { > + if (ip->i_flags & XFS_NEED_INACTIVE) { > + /* Drops i_flags_lock and RCU read lock. */ > + error = xfs_iget_reactivate(pag, ip); > + if (error == -EAGAIN) > + goto out_skip; > + if (error) > + return error; > + } else if (ip->i_flags & XFS_IRECLAIMABLE) { > /* Drops i_flags_lock and RCU read lock. */ > error = xfs_iget_recycle(pag, ip); > if (error == -EAGAIN) > @@ -575,23 +641,11 @@ xfs_iget_cache_hit( > > out_skip: > trace_xfs_iget_skip(ip); > - XFS_STATS_INC(mp, xs_ig_frecycle); > error = -EAGAIN; > out_error: > spin_unlock(&ip->i_flags_lock); > rcu_read_unlock(); > return error; > - > -out_inodegc_flush: > - spin_unlock(&ip->i_flags_lock); > - rcu_read_unlock(); > - /* > - * Do not wait for the workers, because the caller could hold an AGI > - * buffer lock. We're just going to sleep in a loop anyway. > - */ > - if (xfs_is_inodegc_enabled(mp)) > - xfs_inodegc_queue_all(mp); > - return -EAGAIN; > } > > static int > -- > 2.43.0 > >
Hello, kernel test robot noticed "xfstests.xfs.183.fail" on: commit: 98e62582b2cc1b05a1075ce816256e8f257a6881 ("[PATCH 4/4] xfs: reactivate XFS_NEED_INACTIVE inodes from xfs_iget") url: https://github.com/intel-lab-lkp/linux/commits/Dave-Chinner/xfs-make-inode-inactivation-state-changes-atomic/20240201-085509 base: https://git.kernel.org/cgit/fs/xfs/xfs-linux.git for-next patch link: https://lore.kernel.org/all/20240201005217.1011010-5-david@fromorbit.com/ patch subject: [PATCH 4/4] xfs: reactivate XFS_NEED_INACTIVE inodes from xfs_iget in testcase: xfstests version: xfstests-x86_64-c46ca4d1-1_20240205 with following parameters: disk: 4HDD fs: xfs test: xfs-183 compiler: gcc-12 test machine: 4 threads Intel(R) Xeon(R) CPU E3-1225 v5 @ 3.30GHz (Skylake) with 16G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202402141107.4642a3e7-oliver.sang@intel.com 2024-02-08 20:30:36 export TEST_DIR=/fs/sda1 2024-02-08 20:30:36 export TEST_DEV=/dev/sda1 2024-02-08 20:30:36 export FSTYP=xfs 2024-02-08 20:30:36 export SCRATCH_MNT=/fs/scratch 2024-02-08 20:30:36 mkdir /fs/scratch -p 2024-02-08 20:30:36 export SCRATCH_DEV=/dev/sda4 2024-02-08 20:30:36 export SCRATCH_LOGDEV=/dev/sda2 2024-02-08 20:30:36 export SCRATCH_XFS_LIST_METADATA_FIELDS=u3.sfdir3.hdr.parent.i4 2024-02-08 20:30:36 export SCRATCH_XFS_LIST_FUZZ_VERBS=random 2024-02-08 20:30:36 echo xfs/183 2024-02-08 20:30:36 ./check xfs/183 FSTYP -- xfs (debug) PLATFORM -- Linux/x86_64 lkp-skl-d06 6.8.0-rc2-00006-g98e62582b2cc #1 SMP PREEMPT_DYNAMIC Thu Feb 8 18:32:20 CST 2024 MKFS_OPTIONS -- -f /dev/sda4 MOUNT_OPTIONS -- /dev/sda4 /fs/scratch xfs/183 - output mismatch (see /lkp/benchmarks/xfstests/results//xfs/183.out.bad) --- tests/xfs/183.out 2024-02-05 17:37:40.000000000 +0000 +++ /lkp/benchmarks/xfstests/results//xfs/183.out.bad 2024-02-08 20:32:06.555798923 +0000 @@ -1,4 +1,4 @@ QA output created by 183 Start original bulkstat_unlink_test with -r switch Runing extended checks. -Iteration 0 ... (100 files)passed +Iteration 0 ... (100 files)ERROR, count(100) != scount(2). ... (Run 'diff -u /lkp/benchmarks/xfstests/tests/xfs/183.out /lkp/benchmarks/xfstests/results//xfs/183.out.bad' to see the entire diff) Ran: xfs/183 Failures: xfs/183 Failed 1 of 1 tests The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20240214/202402141107.4642a3e7-oliver.sang@intel.com
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 10588f78f679..1fc55ed0692c 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -64,6 +64,8 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, XFS_ICWALK_FLAG_RECLAIM_SICK | \ XFS_ICWALK_FLAG_UNION) +static void xfs_inodegc_queue(struct xfs_inode *ip); + /* * Allocate and initialise an xfs_inode. */ @@ -328,6 +330,7 @@ xfs_reinit_inode( return error; } + /* * Carefully nudge an inode whose VFS state has been torn down back into a * usable state. Drops the i_flags_lock and the rcu read lock. @@ -391,7 +394,71 @@ xfs_iget_recycle( inode->i_state = I_NEW; spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); + XFS_STATS_INC(mp, xs_ig_frecycle); + return 0; +} +static int +xfs_iget_reactivate( + struct xfs_perag *pag, + struct xfs_inode *ip) __releases(&ip->i_flags_lock) +{ + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int error; + + trace_xfs_iget_recycle(ip); + + /* + * Take the ILOCK here to serialise against lookup races with putting + * the inode back on the inodegc queue during error handling. + */ + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return -EAGAIN; + + /* + * Move the state to inactivating so both inactivation and racing + * lookups will skip over this inode until we've finished reactivating + * it and can return it to the XFS_INEW state. + */ + ip->i_flags &= ~XFS_NEED_INACTIVE; + ip->i_flags |= XFS_INACTIVATING; + spin_unlock(&ip->i_flags_lock); + rcu_read_unlock(); + + ASSERT(!rwsem_is_locked(&inode->i_rwsem)); + error = xfs_reinit_inode(mp, inode); + if (error) { + /* + * Well, that sucks. Put the inode back on the inactive queue. + * Do this while still under the ILOCK so that we can set the + * NEED_INACTIVE flag and clear the INACTIVATING flag an not + * have another lookup race with us before we've finished + * putting the inode back on the inodegc queue. + */ + spin_unlock(&ip->i_flags_lock); + ip->i_flags |= XFS_NEED_INACTIVE; + ip->i_flags &= ~XFS_INACTIVATING; + spin_unlock(&ip->i_flags_lock); + + xfs_inodegc_queue(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * Reset the inode state to new so that xfs_iget() will complete + * the required remaining inode initialisation before it returns the + * inode to the caller. + */ + spin_lock(&ip->i_flags_lock); + ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; + ip->i_flags |= XFS_INEW; + inode->i_state = I_NEW; + spin_unlock(&ip->i_flags_lock); + XFS_STATS_INC(mp, xs_ig_frecycle); return 0; } @@ -523,14 +590,6 @@ xfs_iget_cache_hit( if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING)) goto out_skip; - if (ip->i_flags & XFS_NEED_INACTIVE) { - /* Unlinked inodes cannot be re-grabbed. */ - if (VFS_I(ip)->i_nlink == 0) { - error = -ENOENT; - goto out_error; - } - goto out_inodegc_flush; - } /* * Check the inode free state is valid. This also detects lookup @@ -542,11 +601,18 @@ xfs_iget_cache_hit( /* Skip inodes that have no vfs state. */ if ((flags & XFS_IGET_INCORE) && - (ip->i_flags & XFS_IRECLAIMABLE)) + (ip->i_flags & (XFS_IRECLAIMABLE | XFS_NEED_INACTIVE))) goto out_skip; /* The inode fits the selection criteria; process it. */ - if (ip->i_flags & XFS_IRECLAIMABLE) { + if (ip->i_flags & XFS_NEED_INACTIVE) { + /* Drops i_flags_lock and RCU read lock. */ + error = xfs_iget_reactivate(pag, ip); + if (error == -EAGAIN) + goto out_skip; + if (error) + return error; + } else if (ip->i_flags & XFS_IRECLAIMABLE) { /* Drops i_flags_lock and RCU read lock. */ error = xfs_iget_recycle(pag, ip); if (error == -EAGAIN) @@ -575,23 +641,11 @@ xfs_iget_cache_hit( out_skip: trace_xfs_iget_skip(ip); - XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; out_error: spin_unlock(&ip->i_flags_lock); rcu_read_unlock(); return error; - -out_inodegc_flush: - spin_unlock(&ip->i_flags_lock); - rcu_read_unlock(); - /* - * Do not wait for the workers, because the caller could hold an AGI - * buffer lock. We're just going to sleep in a loop anyway. - */ - if (xfs_is_inodegc_enabled(mp)) - xfs_inodegc_queue_all(mp); - return -EAGAIN; } static int