diff mbox series

[08/10] xfs: hoist the code that moves the incore inode fork broot memory

Message ID 172480131644.2291268.12671154009132010264.stgit@frogsfrogsfrogs (mailing list archive)
State Not Applicable, archived
Headers show
Series [01/10] xfs: fix C++ compilation errors in xfs_fs.h | expand

Commit Message

Darrick J. Wong Aug. 27, 2024, 11:35 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Whenever we change the size of the memory buffer holding an inode fork
btree root block, we have to copy the contents over.  Refactor all this
into a single function that handles both, in preparation for making
xfs_iroot_realloc more generic.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_inode_fork.c |   87 ++++++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 31 deletions(-)

Comments

Christoph Hellwig Aug. 28, 2024, 4:20 a.m. UTC | #1
On Tue, Aug 27, 2024 at 04:35:48PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Whenever we change the size of the memory buffer holding an inode fork
> btree root block, we have to copy the contents over.  Refactor all this
> into a single function that handles both, in preparation for making
> xfs_iroot_realloc more generic.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/libxfs/xfs_inode_fork.c |   87 ++++++++++++++++++++++++++--------------
>  1 file changed, 56 insertions(+), 31 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> index 60646a6c32ec7..307207473abdb 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.c
> +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> @@ -387,6 +387,50 @@ xfs_iroot_free(
>  	ifp->if_broot = NULL;
>  }
>  
> +/* Move the bmap btree root from one incore buffer to another. */
> +static void
> +xfs_ifork_move_broot(
> +	struct xfs_inode	*ip,
> +	int			whichfork,
> +	struct xfs_btree_block	*dst_broot,
> +	size_t			dst_bytes,
> +	struct xfs_btree_block	*src_broot,
> +	size_t			src_bytes,
> +	unsigned int		numrecs)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	void			*dptr;
> +	void			*sptr;
> +
> +	ASSERT(xfs_bmap_bmdr_space(src_broot) <= xfs_inode_fork_size(ip, whichfork));

Overly long line.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Dave Chinner Aug. 29, 2024, 2:54 a.m. UTC | #2
On Tue, Aug 27, 2024 at 04:35:48PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Whenever we change the size of the memory buffer holding an inode fork
> btree root block, we have to copy the contents over.  Refactor all this
> into a single function that handles both, in preparation for making
> xfs_iroot_realloc more generic.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/libxfs/xfs_inode_fork.c |   87 ++++++++++++++++++++++++++--------------
>  1 file changed, 56 insertions(+), 31 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> index 60646a6c32ec7..307207473abdb 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.c
> +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> @@ -387,6 +387,50 @@ xfs_iroot_free(
>  	ifp->if_broot = NULL;
>  }
>  
> +/* Move the bmap btree root from one incore buffer to another. */
> +static void
> +xfs_ifork_move_broot(
> +	struct xfs_inode	*ip,
> +	int			whichfork,
> +	struct xfs_btree_block	*dst_broot,
> +	size_t			dst_bytes,
> +	struct xfs_btree_block	*src_broot,
> +	size_t			src_bytes,
> +	unsigned int		numrecs)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	void			*dptr;
> +	void			*sptr;
> +
> +	ASSERT(xfs_bmap_bmdr_space(src_broot) <= xfs_inode_fork_size(ip, whichfork));

We pass whichfork just for this debug check. Can you pull this up
to the callers?

> +
> +	/*
> +	 * We always have to move the pointers because they are not butted
> +	 * against the btree block header.
> +	 */
> +	if (numrecs) {
> +		sptr = xfs_bmap_broot_ptr_addr(mp, src_broot, 1, src_bytes);
> +		dptr = xfs_bmap_broot_ptr_addr(mp, dst_broot, 1, dst_bytes);
> +		memmove(dptr, sptr, numrecs * sizeof(xfs_fsblock_t));
> +	}
> +
> +	if (src_broot == dst_broot)
> +		return;

Urk. So this is encoding caller logic directly into this function.
ie. the grow cases uses krealloc() which copies the keys and
pointers but still needs the pointers moved. The buffer is large
enough for that, so it passes src and dst as the same buffer and
this code then jumps out after copying the ptrs (a second time) to
their final resting place.

> +	/*
> +	 * If the root is being totally relocated, we have to migrate the block
> +	 * header and the keys that come after it.
> +	 */
> +	memcpy(dst_broot, src_broot, xfs_bmbt_block_len(mp));
> +
> +	/* Now copy the keys, which come right after the header. */
> +	if (numrecs) {
> +		sptr = xfs_bmbt_key_addr(mp, src_broot, 1);
> +		dptr = xfs_bmbt_key_addr(mp, dst_broot, 1);
> +		memcpy(dptr, sptr, numrecs * sizeof(struct xfs_bmbt_key));
> +	}

And here we do the key copy for the shrink case where we technically
don't need separate buffers but we really want to minimise memory
usage if we can so we reallocate a smaller buffer and free the
original larger one.

Given this, I think this code is more natural by doing all the
allocate/free/copy ourselves instead of using krealloc() and it's
implicit copy for one of the cases.

i.e. rename this function xfs_ifork_realloc_broot() and make it do
this:

{
	struct xfs_btree_block *src = ifp->if_broot;
	struct xfs_btree_block *dst = NULL;

	if (!numrecs)
		goto out_free_src;

	dst = kmalloc(new_size);

	/* copy block header */
	memcpy(dst, src, xfs_bmbt_block_len(mp));

	/* copy records */
	sptr = xfs_bmbt_key_addr(mp, src, 1);
	dptr = xfs_bmbt_key_addr(mp, dst, 1);
	memcpy(dptr, sptr, numrecs * sizeof(struct xfs_bmbt_key));

	/* copy pointers */
	sptr = xfs_bmap_broot_ptr_addr(mp, src_broot, 1, src_bytes);
	dptr = xfs_bmap_broot_ptr_addr(mp, dst_broot, 1, dst_bytes);
	memmove(dptr, sptr, numrecs * sizeof(xfs_fsblock_t));

out_free_src:
	kfree(src);
	ifp->if_broot = dst;
	ifp->if_broot_bytes = new_size;
}

And the callers are now both:

	xfs_ifork_realloc_broot(mp, ifp, new_size, old_size, numrecs);

This also naturally handles the "reduce to zero size" without
needing any special case code, it avoids the double pointer copy on
grow, and the operation logic is simple, obvious and easy to
understand...

-Dave.
Darrick J. Wong Aug. 29, 2024, 11:35 p.m. UTC | #3
On Thu, Aug 29, 2024 at 12:54:32PM +1000, Dave Chinner wrote:
> On Tue, Aug 27, 2024 at 04:35:48PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Whenever we change the size of the memory buffer holding an inode fork
> > btree root block, we have to copy the contents over.  Refactor all this
> > into a single function that handles both, in preparation for making
> > xfs_iroot_realloc more generic.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/libxfs/xfs_inode_fork.c |   87 ++++++++++++++++++++++++++--------------
> >  1 file changed, 56 insertions(+), 31 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
> > index 60646a6c32ec7..307207473abdb 100644
> > --- a/fs/xfs/libxfs/xfs_inode_fork.c
> > +++ b/fs/xfs/libxfs/xfs_inode_fork.c
> > @@ -387,6 +387,50 @@ xfs_iroot_free(
> >  	ifp->if_broot = NULL;
> >  }
> >  
> > +/* Move the bmap btree root from one incore buffer to another. */
> > +static void
> > +xfs_ifork_move_broot(
> > +	struct xfs_inode	*ip,
> > +	int			whichfork,
> > +	struct xfs_btree_block	*dst_broot,
> > +	size_t			dst_bytes,
> > +	struct xfs_btree_block	*src_broot,
> > +	size_t			src_bytes,
> > +	unsigned int		numrecs)
> > +{
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	void			*dptr;
> > +	void			*sptr;
> > +
> > +	ASSERT(xfs_bmap_bmdr_space(src_broot) <= xfs_inode_fork_size(ip, whichfork));
> 
> We pass whichfork just for this debug check. Can you pull this up
> to the callers?

I guess I could do that, but the rtrmap patchset adds its own broot
shrink/grow function specific to rtrmap btree inodes:

static void
xfs_rtrmapbt_broot_move(...)
{
	...
	ASSERT(xfs_rtrmap_droot_space(src_broot) <=
	       xfs_inode_fork_size(ip, whichfork));

so I didn't want to add yet another indirect call just for an assertion.

> > +
> > +	/*
> > +	 * We always have to move the pointers because they are not butted
> > +	 * against the btree block header.
> > +	 */
> > +	if (numrecs) {
> > +		sptr = xfs_bmap_broot_ptr_addr(mp, src_broot, 1, src_bytes);
> > +		dptr = xfs_bmap_broot_ptr_addr(mp, dst_broot, 1, dst_bytes);
> > +		memmove(dptr, sptr, numrecs * sizeof(xfs_fsblock_t));
> > +	}
> > +
> > +	if (src_broot == dst_broot)
> > +		return;
> 
> Urk. So this is encoding caller logic directly into this function.
> ie. the grow cases uses krealloc() which copies the keys and
> pointers but still needs the pointers moved. The buffer is large
> enough for that, so it passes src and dst as the same buffer and
> this code then jumps out after copying the ptrs (a second time) to
> their final resting place.

<nod>

> > +	/*
> > +	 * If the root is being totally relocated, we have to migrate the block
> > +	 * header and the keys that come after it.
> > +	 */
> > +	memcpy(dst_broot, src_broot, xfs_bmbt_block_len(mp));
> > +
> > +	/* Now copy the keys, which come right after the header. */
> > +	if (numrecs) {
> > +		sptr = xfs_bmbt_key_addr(mp, src_broot, 1);
> > +		dptr = xfs_bmbt_key_addr(mp, dst_broot, 1);
> > +		memcpy(dptr, sptr, numrecs * sizeof(struct xfs_bmbt_key));
> > +	}
> 
> And here we do the key copy for the shrink case where we technically
> don't need separate buffers but we really want to minimise memory
> usage if we can so we reallocate a smaller buffer and free the
> original larger one.
> 
> Given this, I think this code is more natural by doing all the
> allocate/free/copy ourselves instead of using krealloc() and it's
> implicit copy for one of the cases.
> 
> i.e. rename this function xfs_ifork_realloc_broot() and make it do
> this:
> 
> {
> 	struct xfs_btree_block *src = ifp->if_broot;
> 	struct xfs_btree_block *dst = NULL;
> 
> 	if (!numrecs)
> 		goto out_free_src;
> 
> 	dst = kmalloc(new_size);
> 
> 	/* copy block header */
> 	memcpy(dst, src, xfs_bmbt_block_len(mp));

I'm not sure I like replacing krealloc with kmalloc here.  For a grow
operation, if the new and old object sizes are close enough that we
reuse the existing slab object, then we only have to move the pointers.
In the best case, the object expands, so all the bytes we had before are
still live and we touch fewer cachelines.  In the worst case we get a
new object, but that's roughly exponential.

For a shrink operation, we definitely want the alloc -> copy -> free
logic because there's no way to guarantee that krealloc-down isn't a nop
operation, which wastes memory.

But I see that this function isn't very cohesive and could be split into
separate ->grow and ->shrink functions that do their own allocations.

Or seeing how the only callers of xfs_iroot_realloc are the btree code
itself, maybe I should just refactor this into a single ->broot_realloc
function in the btree ops which will cut out a lot of indirect calls
from the iroot code.

Yeah.  I'm gonna go do that.  Disregard patch 5 onwards.

> 	/* copy records */
> 	sptr = xfs_bmbt_key_addr(mp, src, 1);
> 	dptr = xfs_bmbt_key_addr(mp, dst, 1);
> 	memcpy(dptr, sptr, numrecs * sizeof(struct xfs_bmbt_key));
> 
> 	/* copy pointers */
> 	sptr = xfs_bmap_broot_ptr_addr(mp, src_broot, 1, src_bytes);
> 	dptr = xfs_bmap_broot_ptr_addr(mp, dst_broot, 1, dst_bytes);
> 	memmove(dptr, sptr, numrecs * sizeof(xfs_fsblock_t));
> 
> out_free_src:
> 	kfree(src);
> 	ifp->if_broot = dst;
> 	ifp->if_broot_bytes = new_size;
> }
> 
> And the callers are now both:
> 
> 	xfs_ifork_realloc_broot(mp, ifp, new_size, old_size, numrecs);
> 
> This also naturally handles the "reduce to zero size" without
> needing any special case code, it avoids the double pointer copy on
> grow, and the operation logic is simple, obvious and easy to
> understand...

Hmm.  The rtrmap patchset starts by moving xfs_ifork_move_broot to
xfs_bmap_btree.c and virtualizes the broot grow/shrink operation to
become a per-btree type operation.  The rtreflink series expands this
usage.

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 60646a6c32ec7..307207473abdb 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -387,6 +387,50 @@  xfs_iroot_free(
 	ifp->if_broot = NULL;
 }
 
+/* Move the bmap btree root from one incore buffer to another. */
+static void
+xfs_ifork_move_broot(
+	struct xfs_inode	*ip,
+	int			whichfork,
+	struct xfs_btree_block	*dst_broot,
+	size_t			dst_bytes,
+	struct xfs_btree_block	*src_broot,
+	size_t			src_bytes,
+	unsigned int		numrecs)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	void			*dptr;
+	void			*sptr;
+
+	ASSERT(xfs_bmap_bmdr_space(src_broot) <= xfs_inode_fork_size(ip, whichfork));
+
+	/*
+	 * We always have to move the pointers because they are not butted
+	 * against the btree block header.
+	 */
+	if (numrecs) {
+		sptr = xfs_bmap_broot_ptr_addr(mp, src_broot, 1, src_bytes);
+		dptr = xfs_bmap_broot_ptr_addr(mp, dst_broot, 1, dst_bytes);
+		memmove(dptr, sptr, numrecs * sizeof(xfs_fsblock_t));
+	}
+
+	if (src_broot == dst_broot)
+		return;
+
+	/*
+	 * If the root is being totally relocated, we have to migrate the block
+	 * header and the keys that come after it.
+	 */
+	memcpy(dst_broot, src_broot, xfs_bmbt_block_len(mp));
+
+	/* Now copy the keys, which come right after the header. */
+	if (numrecs) {
+		sptr = xfs_bmbt_key_addr(mp, src_broot, 1);
+		dptr = xfs_bmbt_key_addr(mp, dst_broot, 1);
+		memcpy(dptr, sptr, numrecs * sizeof(struct xfs_bmbt_key));
+	}
+}
+
 /*
  * Reallocate the space for if_broot based on the number of records
  * being added or deleted as indicated in rec_diff.  Move the records
@@ -413,12 +457,11 @@  xfs_iroot_realloc(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	int			cur_max;
-	struct xfs_ifork	*ifp;
+	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 	struct xfs_btree_block	*new_broot;
 	int			new_max;
 	size_t			new_size;
-	char			*np;
-	char			*op;
+	size_t			old_size = ifp->if_broot_bytes;
 
 	/*
 	 * Handle the degenerate case quietly.
@@ -427,13 +470,12 @@  xfs_iroot_realloc(
 		return;
 	}
 
-	ifp = xfs_ifork_ptr(ip, whichfork);
 	if (rec_diff > 0) {
 		/*
 		 * If there wasn't any memory allocated before, just
 		 * allocate it now and get out.
 		 */
-		if (ifp->if_broot_bytes == 0) {
+		if (old_size == 0) {
 			new_size = xfs_bmap_broot_space_calc(mp, rec_diff);
 			xfs_iroot_alloc(ip, whichfork, new_size);
 			return;
@@ -442,22 +484,16 @@  xfs_iroot_realloc(
 		/*
 		 * If there is already an existing if_broot, then we need
 		 * to realloc() it and shift the pointers to their new
-		 * location.  The records don't change location because
-		 * they are kept butted up against the btree block header.
+		 * location.
 		 */
-		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+		cur_max = xfs_bmbt_maxrecs(mp, old_size, 0);
 		new_max = cur_max + rec_diff;
 		new_size = xfs_bmap_broot_space_calc(mp, new_max);
 		ifp->if_broot = krealloc(ifp->if_broot, new_size,
 					 GFP_KERNEL | __GFP_NOFAIL);
-		op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1,
-						     ifp->if_broot_bytes);
-		np = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1,
-						     (int)new_size);
-		ifp->if_broot_bytes = (int)new_size;
-		ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <=
-			xfs_inode_fork_size(ip, whichfork));
-		memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
+		ifp->if_broot_bytes = new_size;
+		xfs_ifork_move_broot(ip, whichfork, ifp->if_broot, new_size,
+				ifp->if_broot, old_size, cur_max);
 		return;
 	}
 
@@ -466,8 +502,8 @@  xfs_iroot_realloc(
 	 * if_broot buffer.  It must already exist.  If we go to zero
 	 * records, just get rid of the root and clear the status bit.
 	 */
-	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
-	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+	ASSERT((ifp->if_broot != NULL) && (old_size > 0));
+	cur_max = xfs_bmbt_maxrecs(mp, old_size, 0);
 	new_max = cur_max + rec_diff;
 	ASSERT(new_max >= 0);
 
@@ -478,22 +514,11 @@  xfs_iroot_realloc(
 	}
 
 	new_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL);
-	memcpy(new_broot, ifp->if_broot, xfs_bmbt_block_len(ip->i_mount));
-
-	op = (char *)xfs_bmbt_key_addr(mp, ifp->if_broot, 1);
-	np = (char *)xfs_bmbt_key_addr(mp, new_broot, 1);
-	memcpy(np, op, new_max * sizeof(xfs_bmbt_key_t));
-
-	op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1,
-			ifp->if_broot_bytes);
-	np = (char *)xfs_bmap_broot_ptr_addr(mp, new_broot, 1, (int)new_size);
-	memcpy(np, op, new_max * sizeof(xfs_fsblock_t));
-
+	xfs_ifork_move_broot(ip, whichfork, new_broot, new_size, ifp->if_broot,
+			old_size, new_max);
 	kfree(ifp->if_broot);
 	ifp->if_broot = new_broot;
 	ifp->if_broot_bytes = (int)new_size;
-	ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <=
-	       xfs_inode_fork_size(ip, whichfork));
 }