diff mbox series

[2/4] xfs: set bnobt/cntbt numrecs correctly when formatting new AGs

Message ID 168263574554.1717721.6730628291355995988.stgit@frogsfrogsfrogs (mailing list archive)
State Superseded, archived
Headers show
Series xfs: bug fixes for 6.4-rc1 | expand

Commit Message

Darrick J. Wong April 27, 2023, 10:49 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Through generic/300, I discovered that mkfs.xfs creates corrupt
filesystems when given these parameters:

# mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported
Filesystems formatted with --unsupported are not supported!!
meta-data=/dev/sda               isize=512    agcount=8, agsize=16352 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=1
         =                       reflink=1    bigtime=1 inobtcount=1 nrext64=1
data     =                       bsize=4096   blocks=130816, imaxpct=25
         =                       sunit=32     swidth=128 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=8192, version=2
         =                       sectsz=512   sunit=32 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
         =                       rgcount=0    rgsize=0 blks
Discarding blocks...Done.
# xfs_repair -n /dev/sda
Phase 1 - find and verify superblock...
        - reporting progress in intervals of 15 minutes
Phase 2 - using internal log
        - zero log...
        - 16:30:50: zeroing log - 16320 of 16320 blocks done
        - scan filesystem freespace and inode maps...
agf_freeblks 25, counted 0 in ag 4
sb_fdblocks 8823, counted 8798

The root cause of this problem is the numrecs handling in
xfs_freesp_init_recs, which is used to initialize a new AG.  Prior to
calling the function, we set up the new bnobt block with numrecs == 1
and rely on _freesp_init_recs to format that new record.  If the last
record created has a blockcount of zero, then it sets numrecs = 0.

That last bit isn't correct if the AG contains the log, the start of the
log is not immediately after the initial blocks due to stripe alignment,
and the end of the log is perfectly aligned with the end of the AG.  For
this case, we actually formatted a single bnobt record to handle the
free space before the start of the (stripe aligned) log, and incremented
arec to try to format a second record.  That second record turned out to
be unnecessary, so what we really want is to leave numrecs at 1.

The numrecs handling itself is overly complicated because a different
function sets numrecs == 1.  Change the bnobt creation code to start
with numrecs set to zero and only increment it after successfully
formatting a free space extent into the btree block.

Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_ag.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Comments

Dave Chinner April 28, 2023, 2:11 a.m. UTC | #1
On Thu, Apr 27, 2023 at 03:49:05PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Through generic/300, I discovered that mkfs.xfs creates corrupt
> filesystems when given these parameters:
> 
> # mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported
> Filesystems formatted with --unsupported are not supported!!
> meta-data=/dev/sda               isize=512    agcount=8, agsize=16352 blks
>          =                       sectsz=512   attr=2, projid32bit=1
>          =                       crc=1        finobt=1, sparse=1, rmapbt=1
>          =                       reflink=1    bigtime=1 inobtcount=1 nrext64=1
> data     =                       bsize=4096   blocks=130816, imaxpct=25
>          =                       sunit=32     swidth=128 blks
> naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
> log      =internal log           bsize=4096   blocks=8192, version=2
>          =                       sectsz=512   sunit=32 blks, lazy-count=1
> realtime =none                   extsz=4096   blocks=0, rtextents=0
>          =                       rgcount=0    rgsize=0 blks
> Discarding blocks...Done.
> # xfs_repair -n /dev/sda
> Phase 1 - find and verify superblock...
>         - reporting progress in intervals of 15 minutes
> Phase 2 - using internal log
>         - zero log...
>         - 16:30:50: zeroing log - 16320 of 16320 blocks done
>         - scan filesystem freespace and inode maps...
> agf_freeblks 25, counted 0 in ag 4
> sb_fdblocks 8823, counted 8798
> 
> The root cause of this problem is the numrecs handling in
> xfs_freesp_init_recs, which is used to initialize a new AG.  Prior to
> calling the function, we set up the new bnobt block with numrecs == 1
> and rely on _freesp_init_recs to format that new record.  If the last
> record created has a blockcount of zero, then it sets numrecs = 0.
> 
> That last bit isn't correct if the AG contains the log, the start of the
> log is not immediately after the initial blocks due to stripe alignment,
> and the end of the log is perfectly aligned with the end of the AG.  For
> this case, we actually formatted a single bnobt record to handle the
> free space before the start of the (stripe aligned) log, and incremented
> arec to try to format a second record.  That second record turned out to
> be unnecessary, so what we really want is to leave numrecs at 1.
> 
> The numrecs handling itself is overly complicated because a different
> function sets numrecs == 1.  Change the bnobt creation code to start
> with numrecs set to zero and only increment it after successfully
> formatting a free space extent into the btree block.
> 
> Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs")
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/libxfs/xfs_ag.c |   10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index 1b078bbbf225..4481ce8ead9d 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -499,6 +499,7 @@ xfs_freesp_init_recs(
>  			 */
>  			arec->ar_blockcount = cpu_to_be32(start -
>  						mp->m_ag_prealloc_blocks);
> +			be16_add_cpu(&block->bb_numrecs, 1);
>  			nrec = arec + 1;
>  
>  			/*
> @@ -509,7 +510,6 @@ xfs_freesp_init_recs(
>  					be32_to_cpu(arec->ar_startblock) +
>  					be32_to_cpu(arec->ar_blockcount));
>  			arec = nrec;
> -			be16_add_cpu(&block->bb_numrecs, 1);
>  		}
>  		/*
>  		 * Change record start to after the internal log
> @@ -525,8 +525,8 @@ xfs_freesp_init_recs(
>  	 */
>  	arec->ar_blockcount = cpu_to_be32(id->agsize -
>  					  be32_to_cpu(arec->ar_startblock));
> -	if (!arec->ar_blockcount)
> -		block->bb_numrecs = 0;
> +	if (arec->ar_blockcount)
> +		be16_add_cpu(&block->bb_numrecs, 1);

Ok, but I think the comment above this about resetting the count
back to zero needs to be updated as we aren't resetting anything
back to zero anymore.

-Dave.
Darrick J. Wong April 28, 2023, 4:29 a.m. UTC | #2
On Fri, Apr 28, 2023 at 12:11:25PM +1000, Dave Chinner wrote:
> On Thu, Apr 27, 2023 at 03:49:05PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Through generic/300, I discovered that mkfs.xfs creates corrupt
> > filesystems when given these parameters:
> > 
> > # mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported
> > Filesystems formatted with --unsupported are not supported!!
> > meta-data=/dev/sda               isize=512    agcount=8, agsize=16352 blks
> >          =                       sectsz=512   attr=2, projid32bit=1
> >          =                       crc=1        finobt=1, sparse=1, rmapbt=1
> >          =                       reflink=1    bigtime=1 inobtcount=1 nrext64=1
> > data     =                       bsize=4096   blocks=130816, imaxpct=25
> >          =                       sunit=32     swidth=128 blks
> > naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
> > log      =internal log           bsize=4096   blocks=8192, version=2
> >          =                       sectsz=512   sunit=32 blks, lazy-count=1
> > realtime =none                   extsz=4096   blocks=0, rtextents=0
> >          =                       rgcount=0    rgsize=0 blks
> > Discarding blocks...Done.
> > # xfs_repair -n /dev/sda
> > Phase 1 - find and verify superblock...
> >         - reporting progress in intervals of 15 minutes
> > Phase 2 - using internal log
> >         - zero log...
> >         - 16:30:50: zeroing log - 16320 of 16320 blocks done
> >         - scan filesystem freespace and inode maps...
> > agf_freeblks 25, counted 0 in ag 4
> > sb_fdblocks 8823, counted 8798
> > 
> > The root cause of this problem is the numrecs handling in
> > xfs_freesp_init_recs, which is used to initialize a new AG.  Prior to
> > calling the function, we set up the new bnobt block with numrecs == 1
> > and rely on _freesp_init_recs to format that new record.  If the last
> > record created has a blockcount of zero, then it sets numrecs = 0.
> > 
> > That last bit isn't correct if the AG contains the log, the start of the
> > log is not immediately after the initial blocks due to stripe alignment,
> > and the end of the log is perfectly aligned with the end of the AG.  For
> > this case, we actually formatted a single bnobt record to handle the
> > free space before the start of the (stripe aligned) log, and incremented
> > arec to try to format a second record.  That second record turned out to
> > be unnecessary, so what we really want is to leave numrecs at 1.
> > 
> > The numrecs handling itself is overly complicated because a different
> > function sets numrecs == 1.  Change the bnobt creation code to start
> > with numrecs set to zero and only increment it after successfully
> > formatting a free space extent into the btree block.
> > 
> > Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs")
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/libxfs/xfs_ag.c |   10 +++++-----
> >  1 file changed, 5 insertions(+), 5 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> > index 1b078bbbf225..4481ce8ead9d 100644
> > --- a/fs/xfs/libxfs/xfs_ag.c
> > +++ b/fs/xfs/libxfs/xfs_ag.c
> > @@ -499,6 +499,7 @@ xfs_freesp_init_recs(
> >  			 */
> >  			arec->ar_blockcount = cpu_to_be32(start -
> >  						mp->m_ag_prealloc_blocks);
> > +			be16_add_cpu(&block->bb_numrecs, 1);
> >  			nrec = arec + 1;
> >  
> >  			/*
> > @@ -509,7 +510,6 @@ xfs_freesp_init_recs(
> >  					be32_to_cpu(arec->ar_startblock) +
> >  					be32_to_cpu(arec->ar_blockcount));
> >  			arec = nrec;
> > -			be16_add_cpu(&block->bb_numrecs, 1);
> >  		}
> >  		/*
> >  		 * Change record start to after the internal log
> > @@ -525,8 +525,8 @@ xfs_freesp_init_recs(
> >  	 */
> >  	arec->ar_blockcount = cpu_to_be32(id->agsize -
> >  					  be32_to_cpu(arec->ar_startblock));
> > -	if (!arec->ar_blockcount)
> > -		block->bb_numrecs = 0;
> > +	if (arec->ar_blockcount)
> > +		be16_add_cpu(&block->bb_numrecs, 1);
> 
> Ok, but I think the comment above this about resetting the count
> back to zero needs to be updated as we aren't resetting anything
> back to zero anymore.

Done:

	/*
	 * Calculate the block count of this record; if it is nonzero,
	 * increment the record count.
	 */

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 1b078bbbf225..4481ce8ead9d 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -499,6 +499,7 @@  xfs_freesp_init_recs(
 			 */
 			arec->ar_blockcount = cpu_to_be32(start -
 						mp->m_ag_prealloc_blocks);
+			be16_add_cpu(&block->bb_numrecs, 1);
 			nrec = arec + 1;
 
 			/*
@@ -509,7 +510,6 @@  xfs_freesp_init_recs(
 					be32_to_cpu(arec->ar_startblock) +
 					be32_to_cpu(arec->ar_blockcount));
 			arec = nrec;
-			be16_add_cpu(&block->bb_numrecs, 1);
 		}
 		/*
 		 * Change record start to after the internal log
@@ -525,8 +525,8 @@  xfs_freesp_init_recs(
 	 */
 	arec->ar_blockcount = cpu_to_be32(id->agsize -
 					  be32_to_cpu(arec->ar_startblock));
-	if (!arec->ar_blockcount)
-		block->bb_numrecs = 0;
+	if (arec->ar_blockcount)
+		be16_add_cpu(&block->bb_numrecs, 1);
 }
 
 /*
@@ -538,7 +538,7 @@  xfs_bnoroot_init(
 	struct xfs_buf		*bp,
 	struct aghdr_init_data	*id)
 {
-	xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+	xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
 	xfs_freesp_init_recs(mp, bp, id);
 }
 
@@ -548,7 +548,7 @@  xfs_cntroot_init(
 	struct xfs_buf		*bp,
 	struct aghdr_init_data	*id)
 {
-	xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+	xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
 	xfs_freesp_init_recs(mp, bp, id);
 }