diff mbox series

[RFC,v4,3/3] xfs: insert unlinked inodes from tail

Message ID 20200818133015.25398-4-hsiangkao@redhat.com (mailing list archive)
State New
Headers show
Series xfs: more unlinked inode list optimization v4 | expand

Commit Message

Gao Xiang Aug. 18, 2020, 1:30 p.m. UTC
Currently, AGI buffer is always touched since xfs_iunlink()
adds unlinked inodes from head unconditionally, but since we
have the only one unlinked list now and if we insert unlinked
inodes from tail instead and there're more than 1 inode, AGI
buffer could be untouched.

With this change, it shows that only 938 of 10000 operations
modifies the head of unlinked list with the following workload:
 seq 1 10000 | xargs touch
 find . | xargs -n3 -P100 rm

Note that xfs_iunlink_insert_lock() is slightly different from
xfs_iunlink_remove_lock() due to whiteout path, refer to inlined
comments for more details.

Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 fs/xfs/xfs_inode.c | 99 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 74 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f32a1172b5cd..0add263d21a8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1975,30 +1975,88 @@  xfs_iunlink_update_bucket(
 
 static int
 xfs_iunlink_insert_inode(
+	struct xfs_perag	*pag,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agibp,
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_inode	*nip;
-	xfs_agino_t		next_agino = NULLAGINO;
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 
-	nip = list_first_entry_or_null(&agibp->b_pag->pag_ici_unlink_list,
-					struct xfs_inode, i_unlink);
-	if (nip) {
+	if (!list_empty(&pag->pag_ici_unlink_list)) {
+		struct xfs_inode	*pip;
 
 		/*
-		 * There is already another inode in the bucket, so point this
-		 * inode to the current head of the list.
+		 * There is already another inode in the bucket, so point
+		 * the last inode to this inode.
 		 */
-		next_agino = XFS_INO_TO_AGINO(mp, nip->i_ino);
-		xfs_iunlink_log(tp, ip, NULLAGINO, next_agino);
+		pip = list_last_entry(&pag->pag_ici_unlink_list,
+				struct xfs_inode, i_unlink);
+		xfs_iunlink_log(tp, pip, NULLAGINO, agino);
+		return 0;
 	}
 
+	ASSERT(agibp);
 	/* Point the head of the list to point to this inode. */
-	return xfs_iunlink_update_bucket(tp, agno, agibp, next_agino, agino);
+	return xfs_iunlink_update_bucket(tp, agno, agibp, NULLAGINO, agino);
+}
+
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well
+ * for xfs_iunlink_insert_inode(). As for the details of locking order,
+ * refer to the comments of xfs_iunlink_remove_lock().
+ */
+static struct xfs_perag *
+xfs_iunlink_insert_lock(
+	xfs_agino_t		agno,
+	struct xfs_trans        *tp,
+	struct xfs_inode	*ip,
+	struct xfs_buf		**agibpp)
+{
+	struct xfs_mount        *mp = tp->t_mountp;
+	bool			locked = true;
+	struct xfs_perag	*pag;
+	int			error;
+
+	pag = xfs_perag_get(mp, agno);
+	/* paired with smp_store_release() in xfs_iunlink_unlock() */
+	if (smp_load_acquire(&pag->pag_iunlink_trans) == tp) {
+		/*
+		 * if pag_iunlink_trans is the current trans, we're
+		 * in the current process context, so it's safe here.
+		 */
+		ASSERT(mutex_is_locked(&pag->pag_iunlink_mutex));
+		/*
+		 * slightly different from xfs_iunlink_remove_lock(),
+		 * the path of xfs_iunlink_remove() and then xfs_iunlink()
+		 * on the same AG needs to be considered (e.g. whiteout
+		 * rename), so lock AGI first in xfs_iunlink_remove(),
+		 * and recursively get AGI safely below.
+		 */
+		if (!list_empty(&pag->pag_ici_unlink_list))
+			goto out;
+	} else {
+		mutex_lock(&pag->pag_iunlink_mutex);
+		if (!list_empty(&pag->pag_ici_unlink_list))
+			goto out;
+		mutex_unlock(&pag->pag_iunlink_mutex);
+		locked = false;
+	}
+
+	/* and see comments in xfs_iunlink_remove_lock() on locking order */
+	error = xfs_read_agi(mp, tp, agno, agibpp);
+	if (error) {
+		xfs_perag_put(pag);
+		return ERR_PTR(error);
+	}
+
+	if (!locked)
+		mutex_lock(&pag->pag_iunlink_mutex);
+out:
+	WRITE_ONCE(pag->pag_iunlink_trans, tp);
+	++pag->pag_iunlink_refcnt;
+	return pag;
 }
 
 void
@@ -2026,7 +2084,7 @@  xfs_iunlink(
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_buf		*agibp;
+	struct xfs_buf		*agibp = NULL;
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	int			error;
 	struct xfs_perag	*pag;
@@ -2035,18 +2093,9 @@  xfs_iunlink(
 	ASSERT(VFS_I(ip)->i_mode != 0);
 	trace_xfs_iunlink(ip);
 
-	/* Get the agi buffer first.  It ensures lock ordering on the list. */
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		return error;
-
-	/* XXX: will be shortly removed instead in the next commit. */
-	pag = xfs_perag_get(mp, agno);
-	/* paired with smp_store_release() in xfs_iunlink_unlock() */
-	if (smp_load_acquire(&pag->pag_iunlink_trans) != tp)
-		mutex_lock(&pag->pag_iunlink_mutex);
-	WRITE_ONCE(pag->pag_iunlink_trans, tp);
-	++pag->pag_iunlink_refcnt;
+	pag = xfs_iunlink_insert_lock(agno, tp, ip, &agibp);
+	if (IS_ERR(pag))
+		return PTR_ERR(pag);
 
 	/*
 	 * Insert the inode into the on disk unlinked list, and if that
@@ -2054,9 +2103,9 @@  xfs_iunlink(
 	 * order so that the modifications required to the on disk list are not
 	 * impacted by already having this inode in the list.
 	 */
-	error = xfs_iunlink_insert_inode(tp, agibp, ip);
+	error = xfs_iunlink_insert_inode(pag, tp, agibp, ip);
 	if (!error)
-		list_add(&ip->i_unlink, &agibp->b_pag->pag_ici_unlink_list);
+		list_add_tail(&ip->i_unlink, &pag->pag_ici_unlink_list);
 
 	xfs_iunlink_unlock(pag);
 	return error;