diff mbox series

[21/24] xfs: factor out a xfs_growfs_check_rtgeom helper

Message ID 172437087611.59588.7898768503459548119.stgit@frogsfrogsfrogs (mailing list archive)
State Accepted, archived
Headers show
Series [01/24] xfs: clean up the ISVALID macro in xfs_bmap_adjacent | expand

Commit Message

Darrick J. Wong Aug. 23, 2024, 12:20 a.m. UTC
From: Christoph Hellwig <hch@lst.de>

Split the check that the rtsummary fits into the log into a separate
helper, and use xfs_growfs_rt_alloc_fake_mount to calculate the new RT
geometry.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: avoid division for the 0-rtx growfs check]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_rtalloc.c |   43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

Comments

Dave Chinner Aug. 26, 2024, 2:06 a.m. UTC | #1
On Thu, Aug 22, 2024 at 05:20:07PM -0700, Darrick J. Wong wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> Split the check that the rtsummary fits into the log into a separate
> helper, and use xfs_growfs_rt_alloc_fake_mount to calculate the new RT
> geometry.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> [djwong: avoid division for the 0-rtx growfs check]
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/xfs_rtalloc.c |   43 +++++++++++++++++++++++++++++--------------
>  1 file changed, 29 insertions(+), 14 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> index 61231b1dc4b79..78a3879ad6193 100644
> --- a/fs/xfs/xfs_rtalloc.c
> +++ b/fs/xfs/xfs_rtalloc.c
> @@ -1023,6 +1023,31 @@ xfs_growfs_rtg(
>  	return error;
>  }
>  
> +static int
> +xfs_growfs_check_rtgeom(
> +	const struct xfs_mount	*mp,
> +	xfs_rfsblock_t		rblocks,
> +	xfs_extlen_t		rextsize)
> +{
> +	struct xfs_mount	*nmp;
> +	int			error = 0;
> +
> +	nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
> +	if (!nmp)
> +		return -ENOMEM;
> +
> +	/*
> +	 * New summary size can't be more than half the size of the log.  This
> +	 * prevents us from getting a log overflow, since we'll log basically
> +	 * the whole summary file at once.
> +	 */
> +	if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
> +		error = -EINVAL;

FWIW, the new size needs to be smaller than that, because the "half
the log size" must to include all the log metadata needed to
encapsulate that object. The grwofs transaction also logs inodes and
the superblock, so that also takes away from the maximum size of
the summary file....

-Dave.
Darrick J. Wong Aug. 26, 2024, 6:27 p.m. UTC | #2
On Mon, Aug 26, 2024 at 12:06:58PM +1000, Dave Chinner wrote:
> On Thu, Aug 22, 2024 at 05:20:07PM -0700, Darrick J. Wong wrote:
> > From: Christoph Hellwig <hch@lst.de>
> > 
> > Split the check that the rtsummary fits into the log into a separate
> > helper, and use xfs_growfs_rt_alloc_fake_mount to calculate the new RT
> > geometry.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > [djwong: avoid division for the 0-rtx growfs check]
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/xfs_rtalloc.c |   43 +++++++++++++++++++++++++++++--------------
> >  1 file changed, 29 insertions(+), 14 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> > index 61231b1dc4b79..78a3879ad6193 100644
> > --- a/fs/xfs/xfs_rtalloc.c
> > +++ b/fs/xfs/xfs_rtalloc.c
> > @@ -1023,6 +1023,31 @@ xfs_growfs_rtg(
> >  	return error;
> >  }
> >  
> > +static int
> > +xfs_growfs_check_rtgeom(
> > +	const struct xfs_mount	*mp,
> > +	xfs_rfsblock_t		rblocks,
> > +	xfs_extlen_t		rextsize)
> > +{
> > +	struct xfs_mount	*nmp;
> > +	int			error = 0;
> > +
> > +	nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
> > +	if (!nmp)
> > +		return -ENOMEM;
> > +
> > +	/*
> > +	 * New summary size can't be more than half the size of the log.  This
> > +	 * prevents us from getting a log overflow, since we'll log basically
> > +	 * the whole summary file at once.
> > +	 */
> > +	if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
> > +		error = -EINVAL;
> 
> FWIW, the new size needs to be smaller than that, because the "half
> the log size" must to include all the log metadata needed to
> encapsulate that object. The grwofs transaction also logs inodes and
> the superblock, so that also takes away from the maximum size of
> the summary file....

<shrug> It's the same logic as what's there now, and there haven't been
any bug reports, have there?  Though I suppose that's just a reduction
of what?  One block for the rtbitmap, and (conservatively) two inodes
and a superblock?

n = nmp->m_rsumblocks + 1 + howmany(inodesize * 2, blocksize) + 1;
if (n > (logblocks / 2))
	return -EINVAL;

--D

> -Dave.
> 
> -- 
> Dave Chinner
> david@fromorbit.com
>
Dave Chinner Aug. 27, 2024, 1:29 a.m. UTC | #3
On Mon, Aug 26, 2024 at 11:27:34AM -0700, Darrick J. Wong wrote:
> On Mon, Aug 26, 2024 at 12:06:58PM +1000, Dave Chinner wrote:
> > On Thu, Aug 22, 2024 at 05:20:07PM -0700, Darrick J. Wong wrote:
> > > From: Christoph Hellwig <hch@lst.de>
> > > 
> > > Split the check that the rtsummary fits into the log into a separate
> > > helper, and use xfs_growfs_rt_alloc_fake_mount to calculate the new RT
> > > geometry.
> > > 
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > [djwong: avoid division for the 0-rtx growfs check]
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  fs/xfs/xfs_rtalloc.c |   43 +++++++++++++++++++++++++++++--------------
> > >  1 file changed, 29 insertions(+), 14 deletions(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> > > index 61231b1dc4b79..78a3879ad6193 100644
> > > --- a/fs/xfs/xfs_rtalloc.c
> > > +++ b/fs/xfs/xfs_rtalloc.c
> > > @@ -1023,6 +1023,31 @@ xfs_growfs_rtg(
> > >  	return error;
> > >  }
> > >  
> > > +static int
> > > +xfs_growfs_check_rtgeom(
> > > +	const struct xfs_mount	*mp,
> > > +	xfs_rfsblock_t		rblocks,
> > > +	xfs_extlen_t		rextsize)
> > > +{
> > > +	struct xfs_mount	*nmp;
> > > +	int			error = 0;
> > > +
> > > +	nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
> > > +	if (!nmp)
> > > +		return -ENOMEM;
> > > +
> > > +	/*
> > > +	 * New summary size can't be more than half the size of the log.  This
> > > +	 * prevents us from getting a log overflow, since we'll log basically
> > > +	 * the whole summary file at once.
> > > +	 */
> > > +	if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
> > > +		error = -EINVAL;
> > 
> > FWIW, the new size needs to be smaller than that, because the "half
> > the log size" must to include all the log metadata needed to
> > encapsulate that object. The grwofs transaction also logs inodes and
> > the superblock, so that also takes away from the maximum size of
> > the summary file....
> 
> <shrug> It's the same logic as what's there now, and there haven't been
> any bug reports, have there? 

No, none that I know of - it was just an observation that the code
doesn't actually guarantee what the comment says it should do.

> Though I suppose that's just a reduction
> of what?  One block for the rtbitmap, and (conservatively) two inodes
> and a superblock?

The rtbitmap update might touch a lot more than one block. The newly
allocated space in the rtbitmap inode is initialised to zeros, and
so the xfs_rtfree_range() call from the growfs code to mark the new
space free has to write all 1s to that range of the rtbitmap. This
is all done in a single transaction, so we might actually be logging
a *lot* of rtbitmap buffers here.

IIRC, there is a bit per rtextent, so in a 4kB buffer we can mark
32768 rtextents free. If they are 4kB each, then that's 128MB of
space tracked per rtbitmap block. This adds up to roughly 3.5MB of
log space for the rtbitmap updates per TB of grown rtdev space....

So, yeah, I think that calculation and comment is inaccurate, but we
don't have to fix this right now.

-Dave.
Darrick J. Wong Aug. 27, 2024, 4:27 a.m. UTC | #4
On Tue, Aug 27, 2024 at 11:29:40AM +1000, Dave Chinner wrote:
> On Mon, Aug 26, 2024 at 11:27:34AM -0700, Darrick J. Wong wrote:
> > On Mon, Aug 26, 2024 at 12:06:58PM +1000, Dave Chinner wrote:
> > > On Thu, Aug 22, 2024 at 05:20:07PM -0700, Darrick J. Wong wrote:
> > > > From: Christoph Hellwig <hch@lst.de>
> > > > 
> > > > Split the check that the rtsummary fits into the log into a separate
> > > > helper, and use xfs_growfs_rt_alloc_fake_mount to calculate the new RT
> > > > geometry.
> > > > 
> > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > [djwong: avoid division for the 0-rtx growfs check]
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  fs/xfs/xfs_rtalloc.c |   43 +++++++++++++++++++++++++++++--------------
> > > >  1 file changed, 29 insertions(+), 14 deletions(-)
> > > > 
> > > > 
> > > > diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> > > > index 61231b1dc4b79..78a3879ad6193 100644
> > > > --- a/fs/xfs/xfs_rtalloc.c
> > > > +++ b/fs/xfs/xfs_rtalloc.c
> > > > @@ -1023,6 +1023,31 @@ xfs_growfs_rtg(
> > > >  	return error;
> > > >  }
> > > >  
> > > > +static int
> > > > +xfs_growfs_check_rtgeom(
> > > > +	const struct xfs_mount	*mp,
> > > > +	xfs_rfsblock_t		rblocks,
> > > > +	xfs_extlen_t		rextsize)
> > > > +{
> > > > +	struct xfs_mount	*nmp;
> > > > +	int			error = 0;
> > > > +
> > > > +	nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
> > > > +	if (!nmp)
> > > > +		return -ENOMEM;
> > > > +
> > > > +	/*
> > > > +	 * New summary size can't be more than half the size of the log.  This
> > > > +	 * prevents us from getting a log overflow, since we'll log basically
> > > > +	 * the whole summary file at once.
> > > > +	 */
> > > > +	if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
> > > > +		error = -EINVAL;
> > > 
> > > FWIW, the new size needs to be smaller than that, because the "half
> > > the log size" must to include all the log metadata needed to
> > > encapsulate that object. The grwofs transaction also logs inodes and
> > > the superblock, so that also takes away from the maximum size of
> > > the summary file....
> > 
> > <shrug> It's the same logic as what's there now, and there haven't been
> > any bug reports, have there? 
> 
> No, none that I know of - it was just an observation that the code
> doesn't actually guarantee what the comment says it should do.
> 
> > Though I suppose that's just a reduction
> > of what?  One block for the rtbitmap, and (conservatively) two inodes
> > and a superblock?
> 
> The rtbitmap update might touch a lot more than one block. The newly
> allocated space in the rtbitmap inode is initialised to zeros, and
> so the xfs_rtfree_range() call from the growfs code to mark the new
> space free has to write all 1s to that range of the rtbitmap. This
> is all done in a single transaction, so we might actually be logging
> a *lot* of rtbitmap buffers here.
> 
> IIRC, there is a bit per rtextent, so in a 4kB buffer we can mark
> 32768 rtextents free. If they are 4kB each, then that's 128MB of
> space tracked per rtbitmap block. This adds up to roughly 3.5MB of
> log space for the rtbitmap updates per TB of grown rtdev space....
> 
> So, yeah, I think that calculation and comment is inaccurate, but we
> don't have to fix this right now.

The kernel only "frees" the new space one rbmblock at a time, so I think
that's why this calculation has never misfired.  I /think/ that means
that each transaction only ends up logging two rtsummary blocks at a
time?  One to decrement a counter, and again to increment one another
level up?

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Dave Chinner Aug. 27, 2024, 10:16 p.m. UTC | #5
On Mon, Aug 26, 2024 at 09:27:24PM -0700, Darrick J. Wong wrote:
> On Tue, Aug 27, 2024 at 11:29:40AM +1000, Dave Chinner wrote:
> > On Mon, Aug 26, 2024 at 11:27:34AM -0700, Darrick J. Wong wrote:
> > > On Mon, Aug 26, 2024 at 12:06:58PM +1000, Dave Chinner wrote:
> > > > On Thu, Aug 22, 2024 at 05:20:07PM -0700, Darrick J. Wong wrote:
> > > > > From: Christoph Hellwig <hch@lst.de>
> > > > > 
> > > > > Split the check that the rtsummary fits into the log into a separate
> > > > > helper, and use xfs_growfs_rt_alloc_fake_mount to calculate the new RT
> > > > > geometry.
> > > > > 
> > > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > [djwong: avoid division for the 0-rtx growfs check]
> > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > > ---
> > > > >  fs/xfs/xfs_rtalloc.c |   43 +++++++++++++++++++++++++++++--------------
> > > > >  1 file changed, 29 insertions(+), 14 deletions(-)
> > > > > 
> > > > > 
> > > > > diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
> > > > > index 61231b1dc4b79..78a3879ad6193 100644
> > > > > --- a/fs/xfs/xfs_rtalloc.c
> > > > > +++ b/fs/xfs/xfs_rtalloc.c
> > > > > @@ -1023,6 +1023,31 @@ xfs_growfs_rtg(
> > > > >  	return error;
> > > > >  }
> > > > >  
> > > > > +static int
> > > > > +xfs_growfs_check_rtgeom(
> > > > > +	const struct xfs_mount	*mp,
> > > > > +	xfs_rfsblock_t		rblocks,
> > > > > +	xfs_extlen_t		rextsize)
> > > > > +{
> > > > > +	struct xfs_mount	*nmp;
> > > > > +	int			error = 0;
> > > > > +
> > > > > +	nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
> > > > > +	if (!nmp)
> > > > > +		return -ENOMEM;
> > > > > +
> > > > > +	/*
> > > > > +	 * New summary size can't be more than half the size of the log.  This
> > > > > +	 * prevents us from getting a log overflow, since we'll log basically
> > > > > +	 * the whole summary file at once.
> > > > > +	 */
> > > > > +	if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
> > > > > +		error = -EINVAL;
> > > > 
> > > > FWIW, the new size needs to be smaller than that, because the "half
> > > > the log size" must to include all the log metadata needed to
> > > > encapsulate that object. The grwofs transaction also logs inodes and
> > > > the superblock, so that also takes away from the maximum size of
> > > > the summary file....
> > > 
> > > <shrug> It's the same logic as what's there now, and there haven't been
> > > any bug reports, have there? 
> > 
> > No, none that I know of - it was just an observation that the code
> > doesn't actually guarantee what the comment says it should do.
> > 
> > > Though I suppose that's just a reduction
> > > of what?  One block for the rtbitmap, and (conservatively) two inodes
> > > and a superblock?
> > 
> > The rtbitmap update might touch a lot more than one block. The newly
> > allocated space in the rtbitmap inode is initialised to zeros, and
> > so the xfs_rtfree_range() call from the growfs code to mark the new
> > space free has to write all 1s to that range of the rtbitmap. This
> > is all done in a single transaction, so we might actually be logging
> > a *lot* of rtbitmap buffers here.
> > 
> > IIRC, there is a bit per rtextent, so in a 4kB buffer we can mark
> > 32768 rtextents free. If they are 4kB each, then that's 128MB of
> > space tracked per rtbitmap block. This adds up to roughly 3.5MB of
> > log space for the rtbitmap updates per TB of grown rtdev space....
> > 
> > So, yeah, I think that calculation and comment is inaccurate, but we
> > don't have to fix this right now.
> 
> The kernel only "frees" the new space one rbmblock at a time, so I think
> that's why this calculation has never misfired.

not quite. It iterates all the rbmblocks in the given range (i.e.
the entire extent being freed) in xfs_rtmodify_range() in
a single transaction, but...

> I /think/ that means
> that each transaction only ends up logging two rtsummary blocks at a
> time?  One to decrement a counter, and again to increment one another
> level up?

... we only do one update of the summary blocks per extent being
freed (i.e. in xfs_rtfree_range() after the call to
xfs_rtmodify_range()). So, yes, we should only end up logging two
rtsummary blocks per extent being freed, but the number of rbmblocks
logged in that same transaction is O(extent length).

-Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 61231b1dc4b79..78a3879ad6193 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1023,6 +1023,31 @@  xfs_growfs_rtg(
 	return error;
 }
 
+static int
+xfs_growfs_check_rtgeom(
+	const struct xfs_mount	*mp,
+	xfs_rfsblock_t		rblocks,
+	xfs_extlen_t		rextsize)
+{
+	struct xfs_mount	*nmp;
+	int			error = 0;
+
+	nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
+	if (!nmp)
+		return -ENOMEM;
+
+	/*
+	 * New summary size can't be more than half the size of the log.  This
+	 * prevents us from getting a log overflow, since we'll log basically
+	 * the whole summary file at once.
+	 */
+	if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
+		error = -EINVAL;
+
+	kfree(nmp);
+	return error;
+}
+
 /*
  * Grow the realtime area of the filesystem.
  */
@@ -1031,9 +1056,6 @@  xfs_growfs_rt(
 	xfs_mount_t	*mp,		/* mount point for filesystem */
 	xfs_growfs_rt_t	*in)		/* growfs rt input struct */
 {
-	xfs_rtxnum_t		nrextents;
-	xfs_extlen_t		nrbmblocks;
-	xfs_extlen_t		nrsumblocks;
 	struct xfs_buf		*bp;
 	xfs_agblock_t		old_rextsize = mp->m_sb.sb_rextsize;
 	int			error;
@@ -1082,20 +1104,13 @@  xfs_growfs_rt(
 	/*
 	 * Calculate new parameters.  These are the final values to be reached.
 	 */
-	nrextents = div_u64(in->newblocks, in->extsize);
 	error = -EINVAL;
-	if (nrextents == 0)
+	if (in->newblocks < in->extsize)
 		goto out_unlock;
-	nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents);
-	nrsumblocks = xfs_rtsummary_blockcount(mp,
-			xfs_compute_rextslog(nrextents) + 1, nrbmblocks);
 
-	/*
-	 * New summary size can't be more than half the size of
-	 * the log.  This prevents us from getting a log overflow,
-	 * since we'll log basically the whole summary file at once.
-	 */
-	if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
+	/* Make sure the new fs size won't cause problems with the log. */
+	error = xfs_growfs_check_rtgeom(mp, in->newblocks, in->extsize);
+	if (error)
 		goto out_unlock;
 
 	error = xfs_growfs_rtg(mp, in->newblocks, in->extsize);