diff mbox series

[14/16] xfs: optimize adding the first 8-byte inode to a shortform directory

Message ID 20240430124926.1775355-15-hch@lst.de (mailing list archive)
State Accepted, archived
Headers show
Series [01/16] xfs: allow non-empty forks in xfs_bmap_local_to_extents_empty | expand

Commit Message

Christoph Hellwig April 30, 2024, 12:49 p.m. UTC
When adding a new entry to a shortform directory we have to convert the
format of the entire inode fork if the new entry is the first 8-byte
inode number.

Instead of allocating a new buffer to convert the format, and then
possible another one when doing an insert in the middle of the directory,
simply add the new entry while converting the format and avoid the
extra allocation.

For this to work, xfs_dir2_sf_addname_pick also has to return the
offset for the hard case, but this is entirely trivial.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_dir2_sf.c | 46 +++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

Comments

Darrick J. Wong May 1, 2024, 9:50 p.m. UTC | #1
On Tue, Apr 30, 2024 at 02:49:24PM +0200, Christoph Hellwig wrote:
> When adding a new entry to a shortform directory we have to convert the
> format of the entire inode fork if the new entry is the first 8-byte
> inode number.
> 
> Instead of allocating a new buffer to convert the format, and then
> possible another one when doing an insert in the middle of the directory,
> simply add the new entry while converting the format and avoid the
> extra allocation.
> 
> For this to work, xfs_dir2_sf_addname_pick also has to return the
> offset for the hard case, but this is entirely trivial.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_dir2_sf.c | 46 +++++++++++++++++++++++++++++++++----
>  1 file changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
> index a9d614dfb9e43b..02aa176348a795 100644
> --- a/fs/xfs/libxfs/xfs_dir2_sf.c
> +++ b/fs/xfs/libxfs/xfs_dir2_sf.c
> @@ -35,7 +35,8 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
>  #endif /* DEBUG */
>  
>  static void xfs_dir2_sf_toino4(struct xfs_da_args *args, bool remove);
> -static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
> +static void xfs_dir2_sf_toino8(struct xfs_da_args *args,
> +		xfs_dir2_data_aoff_t offset);

I noticed a few places where we pass offset == 0 here.  That's ok as a
null value because the start of a shortform directory is always the
header, correct?

>  
>  int
>  xfs_dir2_sf_entsize(
> @@ -450,6 +451,16 @@ xfs_dir2_sf_addname(
>  	 */
>  	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
>  		return 0;
> +
> +	/*
> +	 * If we need convert to 8-byte inodes, piggy back adding the new entry
> +	 * to the rewrite of the fork to fit the large inode number.
> +	 */
> +	if (objchange) {
> +		xfs_dir2_sf_toino8(args, offset);
> +		return 0;
> +	}
> +
>  	/*
>  	 * Do it the easy way - just add it at the end.
>  	 */
> @@ -461,8 +472,6 @@ xfs_dir2_sf_addname(
>  	 */
>  	else {
>  		ASSERT(pick == 2);
> -		if (objchange)
> -			xfs_dir2_sf_toino8(args);

Ok, so this isn't needed anymore because the ino8 conversion now adds
the new dirent?

>  		xfs_dir2_sf_addname_hard(args, objchange, new_isize);
>  	}
>  
> @@ -622,6 +631,8 @@ xfs_dir2_sf_addname_pick(
>  	for (i = 0; i < sfp->count; i++) {
>  		if (!holefit)
>  			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
> +		if (holefit)
> +			*offsetp = offset;
>  		offset = xfs_dir2_sf_get_offset(sfep) +
>  			 xfs_dir2_data_entsize(mp, sfep->namelen);
>  		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
> @@ -1053,7 +1064,7 @@ xfs_dir2_sf_replace(
>  		/*
>  		 * Still fits, convert to 8-byte now.
>  		 */
> -		xfs_dir2_sf_toino8(args);
> +		xfs_dir2_sf_toino8(args, 0);

This is a replace, so we pass 0 here effectively as a null value?

>  		i8elevated = 1;
>  		sfp = dp->i_df.if_data;
>  	} else
> @@ -1205,7 +1216,8 @@ xfs_dir2_sf_toino4(
>   */
>  static void
>  xfs_dir2_sf_toino8(
> -	xfs_da_args_t		*args)		/* operation arguments */
> +	struct xfs_da_args	*args,
> +	xfs_dir2_data_aoff_t	new_offset)

Yeah, the comment for this function should note that new_offset!=0 means
to add the entry referenced in the args structure.

>  {
>  	struct xfs_inode	*dp = args->dp;
>  	struct xfs_mount	*mp = dp->i_mount;
> @@ -1213,6 +1225,7 @@ xfs_dir2_sf_toino8(
>  	int			oldsize = dp->i_df.if_bytes;
>  	int			i;		/* entry index */
>  	int			newsize;	/* new inode size */
> +	unsigned int		newent_size;
>  	xfs_dir2_sf_entry_t	*oldsfep;	/* old sf entry */
>  	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
>  	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
> @@ -1225,6 +1238,18 @@ xfs_dir2_sf_toino8(
>  	 * Compute the new inode size (nb: entry count + 1 for parent)
>  	 */
>  	newsize = oldsize + (oldsfp->count + 1) * XFS_INO64_DIFF;
> +	if (new_offset) {
> +		/*
> +		 * Account for the bytes actually used.
> +		 */
> +		newsize += xfs_dir2_sf_entsize(mp, oldsfp, args->namelen);
> +
> +		/*
> +		 * But for the offset calculation use the bigger data entry
> +		 * format.
> +		 */
> +		newent_size = xfs_dir2_data_entsize(mp, args->namelen);

		/*
		 * Bump up the buffer size by the size of the new
		 * dirent.  Now that we've set i8count, compute the size
		 * of the new dirent.
		 */
		newsize += xfs_dir2_sf_entsize(mp, oldsfp, args->namelen);
		newent_size = xfs_dir2_data_entsize(mp, args->namelen);

> +	}
>  
>  	dp->i_df.if_data = sfp = kmalloc(newsize, GFP_KERNEL | __GFP_NOFAIL);
>  	dp->i_df.if_bytes = newsize;
> @@ -1250,6 +1275,17 @@ xfs_dir2_sf_toino8(
>  				xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep));
>  		xfs_dir2_sf_put_ftype(mp, sfep,
>  				xfs_dir2_sf_get_ftype(mp, oldsfep));
> +
> +		/*
> +		 * If there is a new entry to add it once we reach the specified
> +		 * offset.

It took me a minute of staring at the if test logic to figure out what
we're doing here.  If, after, reformatting a directory entry, the next
entry is the offset where _pick wants us to place the new dirent, we
should jump sfep to the next entry, and then add the new entry.

Is that right?  And we can't simplify the logic to:

	if (new_offset && new_offset = xfs_dir2_sf_get_offset(sfep))

Because _pick might want us to add the entry at the end of the directory
but we haven't incremented sfp->count yet, so the loop body will not be
executed in that case.

Is it ever the case that the entry get added in the middle of a
shortform directory?

--D

> +		 */
> +		if (new_offset &&
> +		    new_offset == xfs_dir2_sf_get_offset(sfep) + newent_size) {
> +			sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
> +			xfs_dir2_sf_addname_common(args, sfep, new_offset,
> +					true);
> +		}
>  	}
>  
>  	kfree(oldsfp);
> -- 
> 2.39.2
> 
>
Christoph Hellwig May 2, 2024, 4:25 a.m. UTC | #2
On Wed, May 01, 2024 at 02:50:56PM -0700, Darrick J. Wong wrote:
> I noticed a few places where we pass offset == 0 here.  That's ok as a
> null value because the start of a shortform directory is always the
> header, correct?

The start of the "physical" layout has the header, but offset is the
"logic" d_offset offset.  The start of it it reserved for (but not
actually used by) the "." and ".." entries that will occupy the space
when converted out of the short form.  Probably also needs a comment.

> Ok, so this isn't needed anymore because the ino8 conversion now adds
> the new dirent?

Yes.

> > -		xfs_dir2_sf_toino8(args);
> > +		xfs_dir2_sf_toino8(args, 0);
> 
> This is a replace, so we pass 0 here effectively as a null value?

Exactly.

> > @@ -1250,6 +1275,17 @@ xfs_dir2_sf_toino8(
> >  				xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep));
> >  		xfs_dir2_sf_put_ftype(mp, sfep,
> >  				xfs_dir2_sf_get_ftype(mp, oldsfep));
> > +
> > +		/*
> > +		 * If there is a new entry to add it once we reach the specified
> > +		 * offset.
> 
> It took me a minute of staring at the if test logic to figure out what
> we're doing here.  If, after, reformatting a directory entry, the next
> entry is the offset where _pick wants us to place the new dirent, we
> should jump sfep to the next entry, and then add the new entry.
> 
> Is that right?  And we can't simplify the logic to:
> 
> 	if (new_offset && new_offset = xfs_dir2_sf_get_offset(sfep))

== ?

> Because _pick might want us to add the entry at the end of the directory
> but we haven't incremented sfp->count yet, so the loop body will not be
> executed in that case.
> 
> Is it ever the case that the entry get added in the middle of a
> shortform directory?

Yes, that is the hard case.  There is no good reason to add it in
the middle, but we've encoded that the "logical" offset for a
shortform directly needs to fit into the physical size of a single
directory block when converted to block format in asserts and verifiers
and are stuck with it.  Otherwise we could have just always added it
at the end..
Darrick J. Wong May 2, 2024, 2:43 p.m. UTC | #3
On Thu, May 02, 2024 at 06:25:09AM +0200, Christoph Hellwig wrote:
> On Wed, May 01, 2024 at 02:50:56PM -0700, Darrick J. Wong wrote:
> > I noticed a few places where we pass offset == 0 here.  That's ok as a
> > null value because the start of a shortform directory is always the
> > header, correct?
> 
> The start of the "physical" layout has the header, but offset is the
> "logic" d_offset offset.  The start of it it reserved for (but not
> actually used by) the "." and ".." entries that will occupy the space
> when converted out of the short form.  Probably also needs a comment.
> 
> > Ok, so this isn't needed anymore because the ino8 conversion now adds
> > the new dirent?
> 
> Yes.
> 
> > > -		xfs_dir2_sf_toino8(args);
> > > +		xfs_dir2_sf_toino8(args, 0);
> > 
> > This is a replace, so we pass 0 here effectively as a null value?
> 
> Exactly.

ok good.

> > > @@ -1250,6 +1275,17 @@ xfs_dir2_sf_toino8(
> > >  				xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep));
> > >  		xfs_dir2_sf_put_ftype(mp, sfep,
> > >  				xfs_dir2_sf_get_ftype(mp, oldsfep));
> > > +
> > > +		/*
> > > +		 * If there is a new entry to add it once we reach the specified
> > > +		 * offset.
> > 
> > It took me a minute of staring at the if test logic to figure out what
> > we're doing here.  If, after, reformatting a directory entry, the next
> > entry is the offset where _pick wants us to place the new dirent, we
> > should jump sfep to the next entry, and then add the new entry.
> > 
> > Is that right?  And we can't simplify the logic to:
> > 
> > 	if (new_offset && new_offset = xfs_dir2_sf_get_offset(sfep))
> 
> == ?

Yes, double-equals, not single-equals.

> > Because _pick might want us to add the entry at the end of the directory
> > but we haven't incremented sfp->count yet, so the loop body will not be
> > executed in that case.
> > 
> > Is it ever the case that the entry get added in the middle of a
> > shortform directory?
> 
> Yes, that is the hard case.  There is no good reason to add it in
> the middle, but we've encoded that the "logical" offset for a
> shortform directly needs to fit into the physical size of a single
> directory block when converted to block format in asserts and verifiers
> and are stuck with it.  Otherwise we could have just always added it
> at the end..

<nod> I think the mechanics of this patch look ok, but this:

		xfs_dir2_sf_toino8(args, 0);

worries me because the reader has to know that zero is never a valid
offset for adding a dirent, vs:

#define XFS_DIR2_DATA_AOFF_NULL	((xfs_dir2_data_aoff_t)0)

		xfs_dir2_sf_toino8(args, XFS_DIR2_DATA_AOFF_NULL);

shouts that we're not trying to add anything.

--D
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index a9d614dfb9e43b..02aa176348a795 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -35,7 +35,8 @@  static void xfs_dir2_sf_check(xfs_da_args_t *args);
 #endif /* DEBUG */
 
 static void xfs_dir2_sf_toino4(struct xfs_da_args *args, bool remove);
-static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
+static void xfs_dir2_sf_toino8(struct xfs_da_args *args,
+		xfs_dir2_data_aoff_t offset);
 
 int
 xfs_dir2_sf_entsize(
@@ -450,6 +451,16 @@  xfs_dir2_sf_addname(
 	 */
 	if (args->op_flags & XFS_DA_OP_JUSTCHECK)
 		return 0;
+
+	/*
+	 * If we need convert to 8-byte inodes, piggy back adding the new entry
+	 * to the rewrite of the fork to fit the large inode number.
+	 */
+	if (objchange) {
+		xfs_dir2_sf_toino8(args, offset);
+		return 0;
+	}
+
 	/*
 	 * Do it the easy way - just add it at the end.
 	 */
@@ -461,8 +472,6 @@  xfs_dir2_sf_addname(
 	 */
 	else {
 		ASSERT(pick == 2);
-		if (objchange)
-			xfs_dir2_sf_toino8(args);
 		xfs_dir2_sf_addname_hard(args, objchange, new_isize);
 	}
 
@@ -622,6 +631,8 @@  xfs_dir2_sf_addname_pick(
 	for (i = 0; i < sfp->count; i++) {
 		if (!holefit)
 			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
+		if (holefit)
+			*offsetp = offset;
 		offset = xfs_dir2_sf_get_offset(sfep) +
 			 xfs_dir2_data_entsize(mp, sfep->namelen);
 		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
@@ -1053,7 +1064,7 @@  xfs_dir2_sf_replace(
 		/*
 		 * Still fits, convert to 8-byte now.
 		 */
-		xfs_dir2_sf_toino8(args);
+		xfs_dir2_sf_toino8(args, 0);
 		i8elevated = 1;
 		sfp = dp->i_df.if_data;
 	} else
@@ -1205,7 +1216,8 @@  xfs_dir2_sf_toino4(
  */
 static void
 xfs_dir2_sf_toino8(
-	xfs_da_args_t		*args)		/* operation arguments */
+	struct xfs_da_args	*args,
+	xfs_dir2_data_aoff_t	new_offset)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_mount	*mp = dp->i_mount;
@@ -1213,6 +1225,7 @@  xfs_dir2_sf_toino8(
 	int			oldsize = dp->i_df.if_bytes;
 	int			i;		/* entry index */
 	int			newsize;	/* new inode size */
+	unsigned int		newent_size;
 	xfs_dir2_sf_entry_t	*oldsfep;	/* old sf entry */
 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
 	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
@@ -1225,6 +1238,18 @@  xfs_dir2_sf_toino8(
 	 * Compute the new inode size (nb: entry count + 1 for parent)
 	 */
 	newsize = oldsize + (oldsfp->count + 1) * XFS_INO64_DIFF;
+	if (new_offset) {
+		/*
+		 * Account for the bytes actually used.
+		 */
+		newsize += xfs_dir2_sf_entsize(mp, oldsfp, args->namelen);
+
+		/*
+		 * But for the offset calculation use the bigger data entry
+		 * format.
+		 */
+		newent_size = xfs_dir2_data_entsize(mp, args->namelen);
+	}
 
 	dp->i_df.if_data = sfp = kmalloc(newsize, GFP_KERNEL | __GFP_NOFAIL);
 	dp->i_df.if_bytes = newsize;
@@ -1250,6 +1275,17 @@  xfs_dir2_sf_toino8(
 				xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep));
 		xfs_dir2_sf_put_ftype(mp, sfep,
 				xfs_dir2_sf_get_ftype(mp, oldsfep));
+
+		/*
+		 * If there is a new entry to add it once we reach the specified
+		 * offset.
+		 */
+		if (new_offset &&
+		    new_offset == xfs_dir2_sf_get_offset(sfep) + newent_size) {
+			sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
+			xfs_dir2_sf_addname_common(args, sfep, new_offset,
+					true);
+		}
 	}
 
 	kfree(oldsfp);