[2/8] xfs: track unlinked inode counts in per-ag data
diff mbox series

Message ID 154897668290.26065.2216454639188570690.stgit@magnolia
State New
Headers show
Series
  • xfs: incore unlinked list
Related show

Commit Message

Darrick J. Wong Jan. 31, 2019, 11:18 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Track the number of unlinked inodes in each AG so that we can use these
decisions to throttle inactivations when the unlinked list gets long.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c       |   40 +++++++++++++++++++++++++++++-----------
 fs/xfs/xfs_log_recover.c |    8 ++++++++
 fs/xfs/xfs_mount.c       |    5 +++++
 fs/xfs/xfs_mount.h       |    4 ++++
 4 files changed, 46 insertions(+), 11 deletions(-)

Comments

Brian Foster Feb. 1, 2019, 6:59 p.m. UTC | #1
On Thu, Jan 31, 2019 at 03:18:03PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Track the number of unlinked inodes in each AG so that we can use these
> decisions to throttle inactivations when the unlinked list gets long.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_inode.c       |   40 +++++++++++++++++++++++++++++-----------
>  fs/xfs/xfs_log_recover.c |    8 ++++++++
>  fs/xfs/xfs_mount.c       |    5 +++++
>  fs/xfs/xfs_mount.h       |    4 ++++
>  4 files changed, 46 insertions(+), 11 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index d18354517320..98355f5f9253 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -1900,6 +1900,7 @@ xfs_iunlink(
>  	struct xfs_dinode	*dip;
>  	struct xfs_buf		*agibp;
>  	struct xfs_buf		*ibp;
> +	struct xfs_perag	*pag;
>  	xfs_agnumber_t		agno;
>  	xfs_agino_t		agino;
>  	short			bucket_index;
> @@ -1912,6 +1913,8 @@ xfs_iunlink(
>  	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
>  	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
>  	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
> +	pag = xfs_perag_get(mp, agno);
> +	mutex_lock(&pag->pagi_unlinked_lock);

Any particular reason for using a mutex over a spinlock or atomic_t?

Brian

>  
>  	/*
>  	 * Get the agi buffer first.  It ensures lock ordering
> @@ -1919,7 +1922,7 @@ xfs_iunlink(
>  	 */
>  	error = xfs_read_agi(mp, tp, agno, &agibp);
>  	if (error)
> -		return error;
> +		goto out_unlock;
>  	agi = XFS_BUF_TO_AGI(agibp);
>  
>  	/*
> @@ -1939,7 +1942,7 @@ xfs_iunlink(
>  		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
>  				       0, 0);
>  		if (error)
> -			return error;
> +			goto out_unlock;
>  
>  		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
>  		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
> @@ -1964,7 +1967,12 @@ xfs_iunlink(
>  		(sizeof(xfs_agino_t) * bucket_index);
>  	xfs_trans_log_buf(tp, agibp, offset,
>  			  (offset + sizeof(xfs_agino_t) - 1));
> -	return 0;
> +	pag->pagi_unlinked_count++;
> +
> +out_unlock:
> +	mutex_unlock(&pag->pagi_unlinked_lock);
> +	xfs_perag_put(pag);
> +	return error;
>  }
>  
>  /*
> @@ -1982,6 +1990,7 @@ xfs_iunlink_remove(
>  	struct xfs_buf		*ibp;
>  	struct xfs_buf		*last_ibp;
>  	struct xfs_dinode	*last_dip = NULL;
> +	struct xfs_perag	*pag;
>  	xfs_ino_t		next_ino;
>  	xfs_agnumber_t		agno;
>  	xfs_agino_t		agino;
> @@ -1997,6 +2006,8 @@ xfs_iunlink_remove(
>  	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
>  	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
>  	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
> +	pag = xfs_perag_get(mp, agno);
> +	mutex_lock(&pag->pagi_unlinked_lock);
>  
>  	/*
>  	 * Get the agi buffer first.  It ensures lock ordering
> @@ -2004,7 +2015,7 @@ xfs_iunlink_remove(
>  	 */
>  	error = xfs_read_agi(mp, tp, agno, &agibp);
>  	if (error)
> -		return error;
> +		goto out_unlock;
>  	agi = XFS_BUF_TO_AGI(agibp);
>  
>  	/*
> @@ -2015,7 +2026,8 @@ xfs_iunlink_remove(
>  			be32_to_cpu(agi->agi_unlinked[bucket_index]))) {
>  		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
>  				agi, sizeof(*agi));
> -		return -EFSCORRUPTED;
> +		error = -EFSCORRUPTED;
> +		goto out_unlock;
>  	}
>  
>  	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
> @@ -2031,7 +2043,7 @@ xfs_iunlink_remove(
>  		if (error) {
>  			xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
>  				__func__, error);
> -			return error;
> +			goto out_unlock;
>  		}
>  		next_agino = be32_to_cpu(dip->di_next_unlinked);
>  		ASSERT(next_agino != 0);
> @@ -2080,7 +2092,7 @@ xfs_iunlink_remove(
>  				xfs_warn(mp,
>  	"%s: xfs_imap returned error %d.",
>  					 __func__, error);
> -				return error;
> +				goto out_unlock;
>  			}
>  
>  			error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
> @@ -2089,7 +2101,7 @@ xfs_iunlink_remove(
>  				xfs_warn(mp,
>  	"%s: xfs_imap_to_bp returned error %d.",
>  					__func__, error);
> -				return error;
> +				goto out_unlock;
>  			}
>  
>  			last_offset = imap.im_boffset;
> @@ -2098,7 +2110,8 @@ xfs_iunlink_remove(
>  				XFS_CORRUPTION_ERROR(__func__,
>  						XFS_ERRLEVEL_LOW, mp,
>  						last_dip, sizeof(*last_dip));
> -				return -EFSCORRUPTED;
> +				error = -EFSCORRUPTED;
> +				goto out_unlock;
>  			}
>  		}
>  
> @@ -2111,7 +2124,7 @@ xfs_iunlink_remove(
>  		if (error) {
>  			xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
>  				__func__, error);
> -			return error;
> +			goto out_unlock;
>  		}
>  		next_agino = be32_to_cpu(dip->di_next_unlinked);
>  		ASSERT(next_agino != 0);
> @@ -2146,7 +2159,12 @@ xfs_iunlink_remove(
>  				  (offset + sizeof(xfs_agino_t) - 1));
>  		xfs_inobp_check(mp, last_ibp);
>  	}
> -	return 0;
> +	pag->pagi_unlinked_count--;
> +
> +out_unlock:
> +	mutex_unlock(&pag->pagi_unlinked_lock);
> +	xfs_perag_put(pag);
> +	return error;
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index ff9a27834c50..c920b8aeba01 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -5054,6 +5054,7 @@ xlog_recover_process_one_iunlink(
>  	struct xfs_buf			*ibp;
>  	struct xfs_dinode		*dip;
>  	struct xfs_inode		*ip;
> +	struct xfs_perag		*pag;
>  	xfs_ino_t			ino;
>  	int				error;
>  
> @@ -5077,6 +5078,13 @@ xlog_recover_process_one_iunlink(
>  	agino = be32_to_cpu(dip->di_next_unlinked);
>  	xfs_buf_relse(ibp);
>  
> +	/* Make sure the in-core data knows about this unlinked inode. */
> +	pag = xfs_perag_get(mp, agno);
> +	mutex_lock(&pag->pagi_unlinked_lock);
> +	pag->pagi_unlinked_count++;
> +	mutex_unlock(&pag->pagi_unlinked_lock);
> +	xfs_perag_put(pag);
> +
>  	/*
>  	 * Prevent any DMAPI event from being sent when the reference on
>  	 * the inode is dropped.
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 10be706ec72e..6bfc985669e0 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -149,6 +149,9 @@ xfs_free_perag(
>  		spin_unlock(&mp->m_perag_lock);
>  		ASSERT(pag);
>  		ASSERT(atomic_read(&pag->pag_ref) == 0);
> +		ASSERT(pag->pagi_unlinked_count == 0 ||
> +		       XFS_FORCED_SHUTDOWN(mp));
> +		mutex_destroy(&pag->pagi_unlinked_lock);
>  		xfs_buf_hash_destroy(pag);
>  		mutex_destroy(&pag->pag_ici_reclaim_lock);
>  		call_rcu(&pag->rcu_head, __xfs_free_perag);
> @@ -227,6 +230,7 @@ xfs_initialize_perag(
>  		/* first new pag is fully initialized */
>  		if (first_initialised == NULLAGNUMBER)
>  			first_initialised = index;
> +		mutex_init(&pag->pagi_unlinked_lock);
>  	}
>  
>  	index = xfs_set_inode_alloc(mp, agcount);
> @@ -249,6 +253,7 @@ xfs_initialize_perag(
>  		if (!pag)
>  			break;
>  		xfs_buf_hash_destroy(pag);
> +		mutex_destroy(&pag->pagi_unlinked_lock);
>  		mutex_destroy(&pag->pag_ici_reclaim_lock);
>  		kmem_free(pag);
>  	}
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index e344b1dfde63..0fcc6b6a4f67 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -388,6 +388,10 @@ typedef struct xfs_perag {
>  
>  	/* reference count */
>  	uint8_t			pagf_refcount_level;
> +
> +	/* unlinked inodes */
> +	struct mutex		pagi_unlinked_lock;
> +	uint32_t		pagi_unlinked_count;
>  } xfs_perag_t;
>  
>  static inline struct xfs_ag_resv *
>
Darrick J. Wong Feb. 1, 2019, 7:33 p.m. UTC | #2
On Fri, Feb 01, 2019 at 01:59:24PM -0500, Brian Foster wrote:
> On Thu, Jan 31, 2019 at 03:18:03PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Track the number of unlinked inodes in each AG so that we can use these
> > decisions to throttle inactivations when the unlinked list gets long.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_inode.c       |   40 +++++++++++++++++++++++++++++-----------
> >  fs/xfs/xfs_log_recover.c |    8 ++++++++
> >  fs/xfs/xfs_mount.c       |    5 +++++
> >  fs/xfs/xfs_mount.h       |    4 ++++
> >  4 files changed, 46 insertions(+), 11 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index d18354517320..98355f5f9253 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -1900,6 +1900,7 @@ xfs_iunlink(
> >  	struct xfs_dinode	*dip;
> >  	struct xfs_buf		*agibp;
> >  	struct xfs_buf		*ibp;
> > +	struct xfs_perag	*pag;
> >  	xfs_agnumber_t		agno;
> >  	xfs_agino_t		agino;
> >  	short			bucket_index;
> > @@ -1912,6 +1913,8 @@ xfs_iunlink(
> >  	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
> >  	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
> >  	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
> > +	pag = xfs_perag_get(mp, agno);
> > +	mutex_lock(&pag->pagi_unlinked_lock);
> 
> Any particular reason for using a mutex over a spinlock or atomic_t?

Thinking this over, the pagi_unlinked_lock protects pagi_unlinked_count,
which is only used to assert that we don't have any unlinked inodes left
over during a clean unmount.  It /was/ used in the patch to make
->destroy_inode callers wait for inactivation, but since I dropped that
patch I don't need the counter anymore.

Maybe we can just drop this patch entirely?  If we leak any unlinked
inodes they'll get cleaned up at next mount... wait, we never did merge
Eric's patch to reap unlinked inodes at mount time, did we?

--D

> Brian
> 
> >  
> >  	/*
> >  	 * Get the agi buffer first.  It ensures lock ordering
> > @@ -1919,7 +1922,7 @@ xfs_iunlink(
> >  	 */
> >  	error = xfs_read_agi(mp, tp, agno, &agibp);
> >  	if (error)
> > -		return error;
> > +		goto out_unlock;
> >  	agi = XFS_BUF_TO_AGI(agibp);
> >  
> >  	/*
> > @@ -1939,7 +1942,7 @@ xfs_iunlink(
> >  		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
> >  				       0, 0);
> >  		if (error)
> > -			return error;
> > +			goto out_unlock;
> >  
> >  		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
> >  		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
> > @@ -1964,7 +1967,12 @@ xfs_iunlink(
> >  		(sizeof(xfs_agino_t) * bucket_index);
> >  	xfs_trans_log_buf(tp, agibp, offset,
> >  			  (offset + sizeof(xfs_agino_t) - 1));
> > -	return 0;
> > +	pag->pagi_unlinked_count++;
> > +
> > +out_unlock:
> > +	mutex_unlock(&pag->pagi_unlinked_lock);
> > +	xfs_perag_put(pag);
> > +	return error;
> >  }
> >  
> >  /*
> > @@ -1982,6 +1990,7 @@ xfs_iunlink_remove(
> >  	struct xfs_buf		*ibp;
> >  	struct xfs_buf		*last_ibp;
> >  	struct xfs_dinode	*last_dip = NULL;
> > +	struct xfs_perag	*pag;
> >  	xfs_ino_t		next_ino;
> >  	xfs_agnumber_t		agno;
> >  	xfs_agino_t		agino;
> > @@ -1997,6 +2006,8 @@ xfs_iunlink_remove(
> >  	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
> >  	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
> >  	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
> > +	pag = xfs_perag_get(mp, agno);
> > +	mutex_lock(&pag->pagi_unlinked_lock);
> >  
> >  	/*
> >  	 * Get the agi buffer first.  It ensures lock ordering
> > @@ -2004,7 +2015,7 @@ xfs_iunlink_remove(
> >  	 */
> >  	error = xfs_read_agi(mp, tp, agno, &agibp);
> >  	if (error)
> > -		return error;
> > +		goto out_unlock;
> >  	agi = XFS_BUF_TO_AGI(agibp);
> >  
> >  	/*
> > @@ -2015,7 +2026,8 @@ xfs_iunlink_remove(
> >  			be32_to_cpu(agi->agi_unlinked[bucket_index]))) {
> >  		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
> >  				agi, sizeof(*agi));
> > -		return -EFSCORRUPTED;
> > +		error = -EFSCORRUPTED;
> > +		goto out_unlock;
> >  	}
> >  
> >  	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
> > @@ -2031,7 +2043,7 @@ xfs_iunlink_remove(
> >  		if (error) {
> >  			xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
> >  				__func__, error);
> > -			return error;
> > +			goto out_unlock;
> >  		}
> >  		next_agino = be32_to_cpu(dip->di_next_unlinked);
> >  		ASSERT(next_agino != 0);
> > @@ -2080,7 +2092,7 @@ xfs_iunlink_remove(
> >  				xfs_warn(mp,
> >  	"%s: xfs_imap returned error %d.",
> >  					 __func__, error);
> > -				return error;
> > +				goto out_unlock;
> >  			}
> >  
> >  			error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
> > @@ -2089,7 +2101,7 @@ xfs_iunlink_remove(
> >  				xfs_warn(mp,
> >  	"%s: xfs_imap_to_bp returned error %d.",
> >  					__func__, error);
> > -				return error;
> > +				goto out_unlock;
> >  			}
> >  
> >  			last_offset = imap.im_boffset;
> > @@ -2098,7 +2110,8 @@ xfs_iunlink_remove(
> >  				XFS_CORRUPTION_ERROR(__func__,
> >  						XFS_ERRLEVEL_LOW, mp,
> >  						last_dip, sizeof(*last_dip));
> > -				return -EFSCORRUPTED;
> > +				error = -EFSCORRUPTED;
> > +				goto out_unlock;
> >  			}
> >  		}
> >  
> > @@ -2111,7 +2124,7 @@ xfs_iunlink_remove(
> >  		if (error) {
> >  			xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
> >  				__func__, error);
> > -			return error;
> > +			goto out_unlock;
> >  		}
> >  		next_agino = be32_to_cpu(dip->di_next_unlinked);
> >  		ASSERT(next_agino != 0);
> > @@ -2146,7 +2159,12 @@ xfs_iunlink_remove(
> >  				  (offset + sizeof(xfs_agino_t) - 1));
> >  		xfs_inobp_check(mp, last_ibp);
> >  	}
> > -	return 0;
> > +	pag->pagi_unlinked_count--;
> > +
> > +out_unlock:
> > +	mutex_unlock(&pag->pagi_unlinked_lock);
> > +	xfs_perag_put(pag);
> > +	return error;
> >  }
> >  
> >  /*
> > diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> > index ff9a27834c50..c920b8aeba01 100644
> > --- a/fs/xfs/xfs_log_recover.c
> > +++ b/fs/xfs/xfs_log_recover.c
> > @@ -5054,6 +5054,7 @@ xlog_recover_process_one_iunlink(
> >  	struct xfs_buf			*ibp;
> >  	struct xfs_dinode		*dip;
> >  	struct xfs_inode		*ip;
> > +	struct xfs_perag		*pag;
> >  	xfs_ino_t			ino;
> >  	int				error;
> >  
> > @@ -5077,6 +5078,13 @@ xlog_recover_process_one_iunlink(
> >  	agino = be32_to_cpu(dip->di_next_unlinked);
> >  	xfs_buf_relse(ibp);
> >  
> > +	/* Make sure the in-core data knows about this unlinked inode. */
> > +	pag = xfs_perag_get(mp, agno);
> > +	mutex_lock(&pag->pagi_unlinked_lock);
> > +	pag->pagi_unlinked_count++;
> > +	mutex_unlock(&pag->pagi_unlinked_lock);
> > +	xfs_perag_put(pag);
> > +
> >  	/*
> >  	 * Prevent any DMAPI event from being sent when the reference on
> >  	 * the inode is dropped.
> > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> > index 10be706ec72e..6bfc985669e0 100644
> > --- a/fs/xfs/xfs_mount.c
> > +++ b/fs/xfs/xfs_mount.c
> > @@ -149,6 +149,9 @@ xfs_free_perag(
> >  		spin_unlock(&mp->m_perag_lock);
> >  		ASSERT(pag);
> >  		ASSERT(atomic_read(&pag->pag_ref) == 0);
> > +		ASSERT(pag->pagi_unlinked_count == 0 ||
> > +		       XFS_FORCED_SHUTDOWN(mp));
> > +		mutex_destroy(&pag->pagi_unlinked_lock);
> >  		xfs_buf_hash_destroy(pag);
> >  		mutex_destroy(&pag->pag_ici_reclaim_lock);
> >  		call_rcu(&pag->rcu_head, __xfs_free_perag);
> > @@ -227,6 +230,7 @@ xfs_initialize_perag(
> >  		/* first new pag is fully initialized */
> >  		if (first_initialised == NULLAGNUMBER)
> >  			first_initialised = index;
> > +		mutex_init(&pag->pagi_unlinked_lock);
> >  	}
> >  
> >  	index = xfs_set_inode_alloc(mp, agcount);
> > @@ -249,6 +253,7 @@ xfs_initialize_perag(
> >  		if (!pag)
> >  			break;
> >  		xfs_buf_hash_destroy(pag);
> > +		mutex_destroy(&pag->pagi_unlinked_lock);
> >  		mutex_destroy(&pag->pag_ici_reclaim_lock);
> >  		kmem_free(pag);
> >  	}
> > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> > index e344b1dfde63..0fcc6b6a4f67 100644
> > --- a/fs/xfs/xfs_mount.h
> > +++ b/fs/xfs/xfs_mount.h
> > @@ -388,6 +388,10 @@ typedef struct xfs_perag {
> >  
> >  	/* reference count */
> >  	uint8_t			pagf_refcount_level;
> > +
> > +	/* unlinked inodes */
> > +	struct mutex		pagi_unlinked_lock;
> > +	uint32_t		pagi_unlinked_count;
> >  } xfs_perag_t;
> >  
> >  static inline struct xfs_ag_resv *
> >
Christoph Hellwig Feb. 2, 2019, 4:14 p.m. UTC | #3
On Fri, Feb 01, 2019 at 11:33:48AM -0800, Darrick J. Wong wrote:
> Maybe we can just drop this patch entirely?  If we leak any unlinked
> inodes they'll get cleaned up at next mount... wait, we never did merge
> Eric's patch to reap unlinked inodes at mount time, did we?

Which patch?  We must recovery unlinked inodes during each mount that
requires log recovery and have done that since the very beginning.

I don't see how we could ever see unlinked inodes during a clean
mount.
Darrick J. Wong Feb. 2, 2019, 7:28 p.m. UTC | #4
On Sat, Feb 02, 2019 at 08:14:04AM -0800, Christoph Hellwig wrote:
> On Fri, Feb 01, 2019 at 11:33:48AM -0800, Darrick J. Wong wrote:
> > Maybe we can just drop this patch entirely?  If we leak any unlinked
> > inodes they'll get cleaned up at next mount... wait, we never did merge
> > Eric's patch to reap unlinked inodes at mount time, did we?
> 
> Which patch?  We must recovery unlinked inodes during each mount that
> requires log recovery and have done that since the very beginning.

I /think/ the patch was "xfs: don't require a dirty log on snapshots".

> I don't see how we could ever see unlinked inodes during a clean
> mount.

/me had thought it was related to something about root filesystems that
are mounted ro and then remounted rw, but <shrug> I can't recall ATM.

Anyway, I'm still not sure we need all this mutex+counter infrastructure
for a debugging assert...

--D

Patch
diff mbox series

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d18354517320..98355f5f9253 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1900,6 +1900,7 @@  xfs_iunlink(
 	struct xfs_dinode	*dip;
 	struct xfs_buf		*agibp;
 	struct xfs_buf		*ibp;
+	struct xfs_perag	*pag;
 	xfs_agnumber_t		agno;
 	xfs_agino_t		agino;
 	short			bucket_index;
@@ -1912,6 +1913,8 @@  xfs_iunlink(
 	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+	pag = xfs_perag_get(mp, agno);
+	mutex_lock(&pag->pagi_unlinked_lock);
 
 	/*
 	 * Get the agi buffer first.  It ensures lock ordering
@@ -1919,7 +1922,7 @@  xfs_iunlink(
 	 */
 	error = xfs_read_agi(mp, tp, agno, &agibp);
 	if (error)
-		return error;
+		goto out_unlock;
 	agi = XFS_BUF_TO_AGI(agibp);
 
 	/*
@@ -1939,7 +1942,7 @@  xfs_iunlink(
 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
 				       0, 0);
 		if (error)
-			return error;
+			goto out_unlock;
 
 		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
@@ -1964,7 +1967,12 @@  xfs_iunlink(
 		(sizeof(xfs_agino_t) * bucket_index);
 	xfs_trans_log_buf(tp, agibp, offset,
 			  (offset + sizeof(xfs_agino_t) - 1));
-	return 0;
+	pag->pagi_unlinked_count++;
+
+out_unlock:
+	mutex_unlock(&pag->pagi_unlinked_lock);
+	xfs_perag_put(pag);
+	return error;
 }
 
 /*
@@ -1982,6 +1990,7 @@  xfs_iunlink_remove(
 	struct xfs_buf		*ibp;
 	struct xfs_buf		*last_ibp;
 	struct xfs_dinode	*last_dip = NULL;
+	struct xfs_perag	*pag;
 	xfs_ino_t		next_ino;
 	xfs_agnumber_t		agno;
 	xfs_agino_t		agino;
@@ -1997,6 +2006,8 @@  xfs_iunlink_remove(
 	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
+	pag = xfs_perag_get(mp, agno);
+	mutex_lock(&pag->pagi_unlinked_lock);
 
 	/*
 	 * Get the agi buffer first.  It ensures lock ordering
@@ -2004,7 +2015,7 @@  xfs_iunlink_remove(
 	 */
 	error = xfs_read_agi(mp, tp, agno, &agibp);
 	if (error)
-		return error;
+		goto out_unlock;
 	agi = XFS_BUF_TO_AGI(agibp);
 
 	/*
@@ -2015,7 +2026,8 @@  xfs_iunlink_remove(
 			be32_to_cpu(agi->agi_unlinked[bucket_index]))) {
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 				agi, sizeof(*agi));
-		return -EFSCORRUPTED;
+		error = -EFSCORRUPTED;
+		goto out_unlock;
 	}
 
 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -2031,7 +2043,7 @@  xfs_iunlink_remove(
 		if (error) {
 			xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
 				__func__, error);
-			return error;
+			goto out_unlock;
 		}
 		next_agino = be32_to_cpu(dip->di_next_unlinked);
 		ASSERT(next_agino != 0);
@@ -2080,7 +2092,7 @@  xfs_iunlink_remove(
 				xfs_warn(mp,
 	"%s: xfs_imap returned error %d.",
 					 __func__, error);
-				return error;
+				goto out_unlock;
 			}
 
 			error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
@@ -2089,7 +2101,7 @@  xfs_iunlink_remove(
 				xfs_warn(mp,
 	"%s: xfs_imap_to_bp returned error %d.",
 					__func__, error);
-				return error;
+				goto out_unlock;
 			}
 
 			last_offset = imap.im_boffset;
@@ -2098,7 +2110,8 @@  xfs_iunlink_remove(
 				XFS_CORRUPTION_ERROR(__func__,
 						XFS_ERRLEVEL_LOW, mp,
 						last_dip, sizeof(*last_dip));
-				return -EFSCORRUPTED;
+				error = -EFSCORRUPTED;
+				goto out_unlock;
 			}
 		}
 
@@ -2111,7 +2124,7 @@  xfs_iunlink_remove(
 		if (error) {
 			xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
 				__func__, error);
-			return error;
+			goto out_unlock;
 		}
 		next_agino = be32_to_cpu(dip->di_next_unlinked);
 		ASSERT(next_agino != 0);
@@ -2146,7 +2159,12 @@  xfs_iunlink_remove(
 				  (offset + sizeof(xfs_agino_t) - 1));
 		xfs_inobp_check(mp, last_ibp);
 	}
-	return 0;
+	pag->pagi_unlinked_count--;
+
+out_unlock:
+	mutex_unlock(&pag->pagi_unlinked_lock);
+	xfs_perag_put(pag);
+	return error;
 }
 
 /*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ff9a27834c50..c920b8aeba01 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -5054,6 +5054,7 @@  xlog_recover_process_one_iunlink(
 	struct xfs_buf			*ibp;
 	struct xfs_dinode		*dip;
 	struct xfs_inode		*ip;
+	struct xfs_perag		*pag;
 	xfs_ino_t			ino;
 	int				error;
 
@@ -5077,6 +5078,13 @@  xlog_recover_process_one_iunlink(
 	agino = be32_to_cpu(dip->di_next_unlinked);
 	xfs_buf_relse(ibp);
 
+	/* Make sure the in-core data knows about this unlinked inode. */
+	pag = xfs_perag_get(mp, agno);
+	mutex_lock(&pag->pagi_unlinked_lock);
+	pag->pagi_unlinked_count++;
+	mutex_unlock(&pag->pagi_unlinked_lock);
+	xfs_perag_put(pag);
+
 	/*
 	 * Prevent any DMAPI event from being sent when the reference on
 	 * the inode is dropped.
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 10be706ec72e..6bfc985669e0 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -149,6 +149,9 @@  xfs_free_perag(
 		spin_unlock(&mp->m_perag_lock);
 		ASSERT(pag);
 		ASSERT(atomic_read(&pag->pag_ref) == 0);
+		ASSERT(pag->pagi_unlinked_count == 0 ||
+		       XFS_FORCED_SHUTDOWN(mp));
+		mutex_destroy(&pag->pagi_unlinked_lock);
 		xfs_buf_hash_destroy(pag);
 		mutex_destroy(&pag->pag_ici_reclaim_lock);
 		call_rcu(&pag->rcu_head, __xfs_free_perag);
@@ -227,6 +230,7 @@  xfs_initialize_perag(
 		/* first new pag is fully initialized */
 		if (first_initialised == NULLAGNUMBER)
 			first_initialised = index;
+		mutex_init(&pag->pagi_unlinked_lock);
 	}
 
 	index = xfs_set_inode_alloc(mp, agcount);
@@ -249,6 +253,7 @@  xfs_initialize_perag(
 		if (!pag)
 			break;
 		xfs_buf_hash_destroy(pag);
+		mutex_destroy(&pag->pagi_unlinked_lock);
 		mutex_destroy(&pag->pag_ici_reclaim_lock);
 		kmem_free(pag);
 	}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e344b1dfde63..0fcc6b6a4f67 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -388,6 +388,10 @@  typedef struct xfs_perag {
 
 	/* reference count */
 	uint8_t			pagf_refcount_level;
+
+	/* unlinked inodes */
+	struct mutex		pagi_unlinked_lock;
+	uint32_t		pagi_unlinked_count;
 } xfs_perag_t;
 
 static inline struct xfs_ag_resv *