diff mbox series

[2/2] xfs: fix handling of RCU freed inodes from other AGs in xrep_iunlink_mark_incore

Message ID 20240812052352.3786445-3-hch@lst.de (mailing list archive)
State New
Headers show
Series [1/2] xfs: fix handling of RCU freed inodes from other AGs in xfs_icwalk_ag | expand

Commit Message

Christoph Hellwig Aug. 12, 2024, 5:23 a.m. UTC
When xrep_iunlink_mark_incore skips an inode because it was RCU freed
from another AG, the slot for the inode in the batch array needs to be
zeroed.  Also un-duplicate the check and remove the need for the
xrep_iunlink_igrab helper.

Fixes: ab97f4b1c030 ("xfs: repair AGI unlinked inode bucket lists")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/scrub/agheader_repair.c | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

Comments

Darrick J. Wong Aug. 12, 2024, 5:42 p.m. UTC | #1
On Mon, Aug 12, 2024 at 07:23:01AM +0200, Christoph Hellwig wrote:
> When xrep_iunlink_mark_incore skips an inode because it was RCU freed
> from another AG, the slot for the inode in the batch array needs to be
> zeroed.  Also un-duplicate the check and remove the need for the
> xrep_iunlink_igrab helper.
> 
> Fixes: ab97f4b1c030 ("xfs: repair AGI unlinked inode bucket lists")
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good to me,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  fs/xfs/scrub/agheader_repair.c | 28 +++++++---------------------
>  1 file changed, 7 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> index 2f98d90d7fd66d..558bc86b1b83c3 100644
> --- a/fs/xfs/scrub/agheader_repair.c
> +++ b/fs/xfs/scrub/agheader_repair.c
> @@ -1108,23 +1108,6 @@ xrep_iunlink_walk_ondisk_bucket(
>  	return 0;
>  }
>  
> -/* Decide if this is an unlinked inode in this AG. */
> -STATIC bool
> -xrep_iunlink_igrab(
> -	struct xfs_perag	*pag,
> -	struct xfs_inode	*ip)
> -{
> -	struct xfs_mount	*mp = pag->pag_mount;
> -
> -	if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
> -		return false;
> -
> -	if (!xfs_inode_on_unlinked_list(ip))
> -		return false;
> -
> -	return true;
> -}
> -
>  /*
>   * Mark the given inode in the lookup batch in our unlinked inode bitmap, and
>   * remember if this inode is the start of the unlinked chain.
> @@ -1196,9 +1179,6 @@ xrep_iunlink_mark_incore(
>  		for (i = 0; i < nr_found; i++) {
>  			struct xfs_inode *ip = ragi->lookup_batch[i];
>  
> -			if (done || !xrep_iunlink_igrab(pag, ip))
> -				ragi->lookup_batch[i] = NULL;
> -
>  			/*
>  			 * Update the index for the next lookup. Catch
>  			 * overflows into the next AG range which can occur if
> @@ -1211,8 +1191,14 @@ xrep_iunlink_mark_incore(
>  			 * us to see this inode, so another lookup from the
>  			 * same index will not find it again.
>  			 */
> -			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
> +			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) {
> +				ragi->lookup_batch[i] = NULL;
>  				continue;
> +			}
> +
> +			if (done || !xfs_inode_on_unlinked_list(ip))
> +				ragi->lookup_batch[i] = NULL;
> +
>  			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
>  			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
>  				done = true;
> -- 
> 2.43.0
> 
>
Dave Chinner Aug. 14, 2024, 10:04 p.m. UTC | #2
On Mon, Aug 12, 2024 at 07:23:01AM +0200, Christoph Hellwig wrote:
> When xrep_iunlink_mark_incore skips an inode because it was RCU freed
> from another AG, the slot for the inode in the batch array needs to be
> zeroed.  Also un-duplicate the check and remove the need for the
> xrep_iunlink_igrab helper.
> 
> Fixes: ab97f4b1c030 ("xfs: repair AGI unlinked inode bucket lists")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/scrub/agheader_repair.c | 28 +++++++---------------------
>  1 file changed, 7 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
> index 2f98d90d7fd66d..558bc86b1b83c3 100644
> --- a/fs/xfs/scrub/agheader_repair.c
> +++ b/fs/xfs/scrub/agheader_repair.c
> @@ -1108,23 +1108,6 @@ xrep_iunlink_walk_ondisk_bucket(
>  	return 0;
>  }
>  
> -/* Decide if this is an unlinked inode in this AG. */
> -STATIC bool
> -xrep_iunlink_igrab(
> -	struct xfs_perag	*pag,
> -	struct xfs_inode	*ip)
> -{
> -	struct xfs_mount	*mp = pag->pag_mount;
> -
> -	if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
> -		return false;
> -
> -	if (!xfs_inode_on_unlinked_list(ip))
> -		return false;
> -
> -	return true;
> -}

This code is wrong. It does not explicitly check for RCU freed
inodes (i.e.  ip->i_ino = 0 or XFS_IRECLAIM being set) and so will
never detect stale RCU freed inodes in AG 0.

It is probably working by chance to avoid stale freed inodes
because ip->i_prev_unlinked will be 0 for such inodes.

*However*, this code does not have the necessary memory barriers to
guarantee it catches the ip->i_ino or ip->i_prev_unlinked writes
prior to freeing. The ip->i_ino check needs to be done under the
ip->i_flags_lock as it is the unlock->lock memory barrier that
the inode cache RCU lookup algorithms rely on for correct detection
for RCU freed inodes.

> -
>  /*
>   * Mark the given inode in the lookup batch in our unlinked inode bitmap, and
>   * remember if this inode is the start of the unlinked chain.
> @@ -1196,9 +1179,6 @@ xrep_iunlink_mark_incore(
>  		for (i = 0; i < nr_found; i++) {
>  			struct xfs_inode *ip = ragi->lookup_batch[i];
>  
> -			if (done || !xrep_iunlink_igrab(pag, ip))
> -				ragi->lookup_batch[i] = NULL;
> -
>  			/*
>  			 * Update the index for the next lookup. Catch
>  			 * overflows into the next AG range which can occur if
> @@ -1211,8 +1191,14 @@ xrep_iunlink_mark_incore(
>  			 * us to see this inode, so another lookup from the
>  			 * same index will not find it again.
>  			 */
> -			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
> +			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) {
> +				ragi->lookup_batch[i] = NULL;
>  				continue;
> +			}
> +
> +			if (done || !xfs_inode_on_unlinked_list(ip))
> +				ragi->lookup_batch[i] = NULL;

Same with this new code - it's not explicitly checking for RCU freed
inodes and doesn't have the correct memory barriers.

Hence I think the fixes for this code are:

1. change xrep_iunlink_igrab() to use the same RCU freed inode
checks as xfs_blockgc_igrab(); and
2. remove the (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
check altogether.

-Dave.
diff mbox series

Patch

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 2f98d90d7fd66d..558bc86b1b83c3 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -1108,23 +1108,6 @@  xrep_iunlink_walk_ondisk_bucket(
 	return 0;
 }
 
-/* Decide if this is an unlinked inode in this AG. */
-STATIC bool
-xrep_iunlink_igrab(
-	struct xfs_perag	*pag,
-	struct xfs_inode	*ip)
-{
-	struct xfs_mount	*mp = pag->pag_mount;
-
-	if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
-		return false;
-
-	if (!xfs_inode_on_unlinked_list(ip))
-		return false;
-
-	return true;
-}
-
 /*
  * Mark the given inode in the lookup batch in our unlinked inode bitmap, and
  * remember if this inode is the start of the unlinked chain.
@@ -1196,9 +1179,6 @@  xrep_iunlink_mark_incore(
 		for (i = 0; i < nr_found; i++) {
 			struct xfs_inode *ip = ragi->lookup_batch[i];
 
-			if (done || !xrep_iunlink_igrab(pag, ip))
-				ragi->lookup_batch[i] = NULL;
-
 			/*
 			 * Update the index for the next lookup. Catch
 			 * overflows into the next AG range which can occur if
@@ -1211,8 +1191,14 @@  xrep_iunlink_mark_incore(
 			 * us to see this inode, so another lookup from the
 			 * same index will not find it again.
 			 */
-			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) {
+				ragi->lookup_batch[i] = NULL;
 				continue;
+			}
+
+			if (done || !xfs_inode_on_unlinked_list(ip))
+				ragi->lookup_batch[i] = NULL;
+
 			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
 			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
 				done = true;