diff mbox series

[1/2] xfs: load rtbitmap and rtsummary extent mapping btrees at mount time

Message ID 166473480249.1083697.13081552727850377113.stgit@magnolia (mailing list archive)
State Accepted
Headers show
Series xfs: improve rt metadata use for scrub | expand

Commit Message

Darrick J. Wong Oct. 2, 2022, 6:20 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

It turns out that GETFSMAP and online fsck have had a bug for years due
to their use of ILOCK_SHARED to coordinate their linear scans of the
realtime bitmap.  If the bitmap file's data fork happens to be in BTREE
format and the scan occurs immediately after mounting, the incore bmbt
will not be populated, leading to ASSERTs tripping over the incorrect
inode state.  Because the bitmap scans always lock bitmap buffers in
increasing order of file offset, it is appropriate for these two callers
to take a shared ILOCK to improve scalability.

To fix this problem, load both data and attr fork state into memory when
mounting the realtime inodes.  Realtime metadata files aren't supposed
to have an attr fork so the second step is likely a nop.

On most filesystems this is unlikely since the rtbitmap data fork is
usually in extents format, but it's possible to craft a filesystem that
will by fragmenting the free space in the data section and growfsing the
rt section.

Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap")
Also-Fixes: 46d9bfb5e706 ("xfs: cross-reference the realtime bitmap")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_rtalloc.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 4 deletions(-)

Comments

Dave Chinner Oct. 14, 2022, 3:49 a.m. UTC | #1
On Sun, Oct 02, 2022 at 11:20:02AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> It turns out that GETFSMAP and online fsck have had a bug for years due
> to their use of ILOCK_SHARED to coordinate their linear scans of the
> realtime bitmap.  If the bitmap file's data fork happens to be in BTREE
> format and the scan occurs immediately after mounting, the incore bmbt
> will not be populated, leading to ASSERTs tripping over the incorrect
> inode state.  Because the bitmap scans always lock bitmap buffers in
> increasing order of file offset, it is appropriate for these two callers
> to take a shared ILOCK to improve scalability.
> 
> To fix this problem, load both data and attr fork state into memory when
> mounting the realtime inodes.  Realtime metadata files aren't supposed
> to have an attr fork so the second step is likely a nop.
> 
> On most filesystems this is unlikely since the rtbitmap data fork is
> usually in extents format, but it's possible to craft a filesystem that
> will by fragmenting the free space in the data section and growfsing the
> rt section.
> 
> Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap")
> Also-Fixes: 46d9bfb5e706 ("xfs: cross-reference the realtime bitmap")
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/xfs_rtalloc.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 52 insertions(+), 4 deletions(-)

Looks fine.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 292d5e54a92c..b0846204c436 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1325,6 +1325,41 @@  xfs_rtalloc_reinit_frextents(
 	return 0;
 }
 
+/*
+ * Read in the bmbt of an rt metadata inode so that we never have to load them
+ * at runtime.  This enables the use of shared ILOCKs for rtbitmap scans.  Use
+ * an empty transaction to avoid deadlocking on loops in the bmbt.
+ */
+static inline int
+xfs_rtmount_iread_extents(
+	struct xfs_inode	*ip,
+	unsigned int		lock_class)
+{
+	struct xfs_trans	*tp;
+	int			error;
+
+	error = xfs_trans_alloc_empty(ip->i_mount, &tp);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
+
+	error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+	if (error)
+		goto out_unlock;
+
+	if (xfs_inode_has_attr_fork(ip)) {
+		error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
+		if (error)
+			goto out_unlock;
+	}
+
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
+	xfs_trans_cancel(tp);
+	return error;
+}
+
 /*
  * Get the bitmap and summary inodes and the summary cache into the mount
  * structure at mount time.
@@ -1342,14 +1377,27 @@  xfs_rtmount_inodes(
 		return error;
 	ASSERT(mp->m_rbmip != NULL);
 
+	error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
+	if (error)
+		goto out_rele_bitmap;
+
 	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
-	if (error) {
-		xfs_irele(mp->m_rbmip);
-		return error;
-	}
+	if (error)
+		goto out_rele_bitmap;
 	ASSERT(mp->m_rsumip != NULL);
+
+	error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
+	if (error)
+		goto out_rele_summary;
+
 	xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
 	return 0;
+
+out_rele_summary:
+	xfs_irele(mp->m_rsumip);
+out_rele_bitmap:
+	xfs_irele(mp->m_rbmip);
+	return error;
 }
 
 void