diff mbox series

[16/16] xfs: make the hard case in xfs_dir2_sf_addname less hard

Message ID 20240430124926.1775355-17-hch@lst.de (mailing list archive)
State Accepted, archived
Headers show
Series [01/16] xfs: allow non-empty forks in xfs_bmap_local_to_extents_empty | expand

Commit Message

Christoph Hellwig April 30, 2024, 12:49 p.m. UTC
xfs_dir2_sf_addname tries and easy addition first where the new entry
is added to an end and a new higher offset is allocated to it.  If an
arbitrary limit on the offset is trigger it goes down the "hard" path
and instead looks for a hole in the offset space, which then also
implies inserting the new entry in the middle as the entries are sorted
by the d_offset offset.

The code to add an entry the hard way is way to complicated and
inefficient as it duplicates the linear search of the directory to
find the entry, full frees the old inode fork data and reallocates
it just to copy data into it from a temporary buffer.  Rewrite all
this to use the offset from the initial scan, do the usual idata
realloc and then just move the entries past the insertion point out
of the way in place using memmove.  This also happens to allow sharing
the code entirely with the easy case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_dir2_sf.c | 319 ++++++++++--------------------------
 1 file changed, 89 insertions(+), 230 deletions(-)

Comments

Darrick J. Wong May 1, 2024, 10:10 p.m. UTC | #1
On Tue, Apr 30, 2024 at 02:49:26PM +0200, Christoph Hellwig wrote:
> xfs_dir2_sf_addname tries and easy addition first where the new entry
> is added to an end and a new higher offset is allocated to it.  If an
> arbitrary limit on the offset is trigger it goes down the "hard" path
> and instead looks for a hole in the offset space, which then also
> implies inserting the new entry in the middle as the entries are sorted
> by the d_offset offset.

/me smacks forehead.  Ahh, right.  This is how _pick can end up telling
us to insert a dirent in the middle of the shortform dirent.  The bytes
of the dirent are packed together, but the d_offset "address space" can
be sparse.  So if a directory contains:

u.sfdir2.list[0].namelen = 15
u.sfdir2.list[0].offset = 0x30
u.sfdir2.list[0].name = ”frame000000.tst”
u.sfdir2.list[0].inumber.i4 = 25165953

u.sfdir2.list[1].namelen = 15
u.sfdir2.list[1].offset = 0x90
u.sfdir2.list[1].name = ”frame000001.tst”
u.sfdir2.list[1].inumber.i4 = 25165954

Then the _pick function can decide that we should insert the dirent
at "offset" 0x50, which involves memmove'ing u.sfdir2.list[1] upwards,
and then writing a new dirent between the two:

u.sfdir2.list[0].namelen = 15
u.sfdir2.list[0].offset = 0x30
u.sfdir2.list[0].name = ”frame000000.tst”
u.sfdir2.list[0].inumber.i4 = 25165953

u.sfdir2.list[1].namelen = 15
u.sfdir2.list[1].offset = 0x50
u.sfdir2.list[1].name = ”mugwumpquux.tst”
u.sfdir2.list[1].inumber.i4 = 25165955

u.sfdir2.list[2].namelen = 15
u.sfdir2.list[2].offset = 0x90
u.sfdir2.list[2].name = ”frame000001.tst”
u.sfdir2.list[2].inumber.i4 = 25165954

and this is really what the "hard" case is doing.

> The code to add an entry the hard way is way to complicated and
> inefficient as it duplicates the linear search of the directory to
> find the entry, full frees the old inode fork data and reallocates
> it just to copy data into it from a temporary buffer.  Rewrite all
> this to use the offset from the initial scan, do the usual idata
> realloc and then just move the entries past the insertion point out
> of the way in place using memmove.  This also happens to allow sharing
> the code entirely with the easy case.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

If my understanding of that is correct, then
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  fs/xfs/libxfs/xfs_dir2_sf.c | 319 ++++++++++--------------------------
>  1 file changed, 89 insertions(+), 230 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
> index 0598465357cc3a..4ba93835dd847f 100644
> --- a/fs/xfs/libxfs/xfs_dir2_sf.c
> +++ b/fs/xfs/libxfs/xfs_dir2_sf.c
> @@ -16,18 +16,6 @@
>  #include "xfs_dir2_priv.h"
>  #include "xfs_trace.h"
>  
> -/*
> - * Prototypes for internal functions.
> - */
> -static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args,
> -				     xfs_dir2_sf_entry_t *sfep,
> -				     xfs_dir2_data_aoff_t offset,
> -				     int new_isize);
> -static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange,
> -				     int new_isize);
> -static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
> -				    xfs_dir2_sf_entry_t **sfepp,
> -				    xfs_dir2_data_aoff_t *offsetp);
>  #ifdef DEBUG
>  static void xfs_dir2_sf_check(xfs_da_args_t *args);
>  #else
> @@ -361,6 +349,61 @@ xfs_dir2_try_block_to_sf(
>  	return error;
>  }
>  
> +static xfs_dir2_data_aoff_t
> +xfs_dir2_sf_addname_pick_offset(
> +	struct xfs_da_args	*args,
> +	unsigned int		*byteoffp)
> +{
> +	struct xfs_inode	*dp = args->dp;
> +	struct xfs_mount	*mp = dp->i_mount;
> +	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
> +	xfs_dir2_data_aoff_t	offset = args->geo->data_first_offset;
> +	struct xfs_dir2_sf_entry *sfep = xfs_dir2_sf_firstentry(sfp);
> +	unsigned int		data_size =
> +		xfs_dir2_data_entsize(mp, args->namelen);
> +	unsigned int		data_overhead =
> +		xfs_dir2_block_overhead(sfp->count + 1);
> +	xfs_dir2_data_aoff_t	hole_offset = 0;
> +	unsigned int		byteoff = 0;
> +	unsigned int		i;
> +
> +	/*
> +	 * Scan the directory to find the last offset and find a suitable
> +	 * hole that we can use if needed.
> +	 */
> +	for (i = 0; i < sfp->count; i++) {
> +		if (!hole_offset &&
> +		    offset + data_size <= xfs_dir2_sf_get_offset(sfep)) {
> +			hole_offset = offset;
> +			byteoff = (void *)sfep - dp->i_df.if_data;
> +		}
> +		offset = xfs_dir2_sf_get_offset(sfep) +
> +			xfs_dir2_data_entsize(mp, sfep->namelen);
> +		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
> +	}
> +
> +	/*
> +	 * If just appending the entry would make the offset too large, use the
> +	 * first hole that fits it if there is one or else give up and convert
> +	 * to block format.
> +	 *
> +	 * Note that "too large" here is a completely arbitrary limit.  The
> +	 * offset is logical concept not related to the physical byteoff and
> +	 * there is no fundamental underlying limit to it.  But it has been
> +	 * encoded in asserts and verifiers for a long time so we have to
> +	 * respect it.
> +	 */
> +	if (offset + data_size + data_overhead > args->geo->blksize) {
> +		if (!hole_offset)
> +			return 0;
> +		*byteoffp = byteoff;
> +		return hole_offset;
> +	}
> +
> +	*byteoffp = dp->i_disk_size;
> +	return offset;
> +}
> +
>  static void
>  xfs_dir2_sf_addname_common(
>  	struct xfs_da_args	*args,
> @@ -384,23 +427,29 @@ xfs_dir2_sf_addname_common(
>  
>  /*
>   * Add a name to a shortform directory.
> - * There are two algorithms, "easy" and "hard" which we decide on
> - * before changing anything.
> - * Convert to block form if necessary, if the new entry won't fit.
> + *
> + * Shortform directories are always tighty packed, and we simply allocate
> + * a new higher offset and add the entry at the end.
> + *
> + * While we could search for a hole in the offset space there really isn't
> + * much of a a point in doing so and complicating the algorithm.
> + *
> + * Convert to block form if the new entry won't fit.
>   */
> -int						/* error */
> +int
>  xfs_dir2_sf_addname(
> -	xfs_da_args_t		*args)		/* operation arguments */
> +	struct xfs_da_args	*args)
>  {
>  	struct xfs_inode	*dp = args->dp;
> +	struct xfs_mount	*mp = dp->i_mount;
>  	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
> -	int			error;		/* error return value */
> -	int			incr_isize;	/* total change in size */
> -	int			new_isize;	/* size after adding name */
> -	int			objchange;	/* changing to 8-byte inodes */
> -	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */
> -	int			pick;		/* which algorithm to use */
> -	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */
> +	struct xfs_dir2_sf_entry *sfep;
> +	xfs_dir2_data_aoff_t	offset;
> +	unsigned int		entsize;
> +	unsigned int		new_isize;
> +	unsigned int		byteoff;
> +	bool			objchange = false;
> +	int			error;
>  
>  	trace_xfs_dir2_sf_addname(args);
>  
> @@ -408,11 +457,12 @@ xfs_dir2_sf_addname(
>  	ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL);
>  	ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
>  	ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
> +
>  	/*
> -	 * Compute entry (and change in) size.
> +	 * Compute entry size and new inode size.
>  	 */
> -	incr_isize = xfs_dir2_sf_entsize(dp->i_mount, sfp, args->namelen);
> -	objchange = 0;
> +	entsize = xfs_dir2_sf_entsize(mp, sfp, args->namelen);
> +	new_isize = dp->i_disk_size + entsize;
>  
>  	/*
>  	 * Do we have to change to 8 byte inodes?
> @@ -421,19 +471,17 @@ xfs_dir2_sf_addname(
>  		/*
>  		 * Yes, adjust the inode size.  old count + (parent + new)
>  		 */
> -		incr_isize += (sfp->count + 2) * XFS_INO64_DIFF;
> -		objchange = 1;
> +		new_isize += (sfp->count + 2) * XFS_INO64_DIFF;
> +		objchange = true;
>  	}
>  
> -	new_isize = (int)dp->i_disk_size + incr_isize;
> -
>  	/*
>  	 * Too large to fit into the inode fork or too large offset?
>  	 */
>  	if (new_isize > xfs_inode_data_fork_size(dp))
>  		goto convert;
> -	pick = xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset);
> -	if (pick == 0)
> +	offset = xfs_dir2_sf_addname_pick_offset(args, &byteoff);
> +	if (!offset)
>  		goto convert;
>  
>  	/*
> @@ -451,20 +499,17 @@ xfs_dir2_sf_addname(
>  		return 0;
>  	}
>  
> +	sfp = xfs_idata_realloc(dp, entsize, XFS_DATA_FORK);
> +	sfep = dp->i_df.if_data + byteoff;
> +
>  	/*
> -	 * Do it the easy way - just add it at the end.
> -	 */
> -	if (pick == 1)
> -		xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize);
> -	/*
> -	 * Do it the hard way - look for a place to insert the new entry.
> -	 * Convert to 8 byte inode numbers first if necessary.
> +	 * If we're inserting in the middle, move the tail out of the way first.
>  	 */
> -	else {
> -		ASSERT(pick == 2);
> -		xfs_dir2_sf_addname_hard(args, objchange, new_isize);
> +	if (byteoff < dp->i_disk_size) {
> +		memmove(sfep, (void *)sfep + entsize,
> +			dp->i_disk_size - byteoff);
>  	}
> -
> +	xfs_dir2_sf_addname_common(args, sfep, offset, objchange);
>  	dp->i_disk_size = new_isize;
>  	xfs_dir2_sf_check(args);
>  	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
> @@ -482,192 +527,6 @@ xfs_dir2_sf_addname(
>  	return xfs_dir2_block_addname(args);
>  }
>  
> -/*
> - * Add the new entry the "easy" way.
> - * This is copying the old directory and adding the new entry at the end.
> - * Since it's sorted by "offset" we need room after the last offset
> - * that's already there, and then room to convert to a block directory.
> - * This is already checked by the pick routine.
> - */
> -static void
> -xfs_dir2_sf_addname_easy(
> -	xfs_da_args_t		*args,		/* operation arguments */
> -	xfs_dir2_sf_entry_t	*sfep,		/* pointer to new entry */
> -	xfs_dir2_data_aoff_t	offset,		/* offset to use for new ent */
> -	int			new_isize)	/* new directory size */
> -{
> -	struct xfs_inode	*dp = args->dp;
> -	struct xfs_mount	*mp = dp->i_mount;
> -	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
> -	int			byteoff = (int)((char *)sfep - (char *)sfp);
> -
> -	/*
> -	 * Grow the in-inode space.
> -	 */
> -	sfp = xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen),
> -			  XFS_DATA_FORK);
> -	/*
> -	 * Need to set up again due to realloc of the inode data.
> -	 */
> -	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
> -	xfs_dir2_sf_addname_common(args, sfep, offset, false);
> -}
> -
> -/*
> - * Add the new entry the "hard" way.
> - * The caller has already converted to 8 byte inode numbers if necessary,
> - * in which case we need to leave the i8count at 1.
> - * Find a hole that the new entry will fit into, and copy
> - * the first part of the entries, the new entry, and the last part of
> - * the entries.
> - */
> -/* ARGSUSED */
> -static void
> -xfs_dir2_sf_addname_hard(
> -	xfs_da_args_t		*args,		/* operation arguments */
> -	int			objchange,	/* changing inode number size */
> -	int			new_isize)	/* new directory size */
> -{
> -	struct xfs_inode	*dp = args->dp;
> -	struct xfs_mount	*mp = dp->i_mount;
> -	int			add_datasize;	/* data size need for new ent */
> -	char			*buf;		/* buffer for old */
> -	int			eof;		/* reached end of old dir */
> -	int			nbytes;		/* temp for byte copies */
> -	xfs_dir2_data_aoff_t	new_offset;	/* next offset value */
> -	xfs_dir2_data_aoff_t	offset;		/* current offset value */
> -	int			old_isize;	/* previous size */
> -	xfs_dir2_sf_entry_t	*oldsfep;	/* entry in original dir */
> -	xfs_dir2_sf_hdr_t	*oldsfp;	/* original shortform dir */
> -	xfs_dir2_sf_entry_t	*sfep;		/* entry in new dir */
> -	xfs_dir2_sf_hdr_t	*sfp;		/* new shortform dir */
> -
> -	/*
> -	 * Copy the old directory to the stack buffer.
> -	 */
> -	old_isize = (int)dp->i_disk_size;
> -	buf = kmalloc(old_isize, GFP_KERNEL | __GFP_NOFAIL);
> -	oldsfp = (xfs_dir2_sf_hdr_t *)buf;
> -	memcpy(oldsfp, dp->i_df.if_data, old_isize);
> -	/*
> -	 * Loop over the old directory finding the place we're going
> -	 * to insert the new entry.
> -	 * If it's going to end up at the end then oldsfep will point there.
> -	 */
> -	for (offset = args->geo->data_first_offset,
> -	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
> -	      add_datasize = xfs_dir2_data_entsize(mp, args->namelen),
> -	      eof = (char *)oldsfep == &buf[old_isize];
> -	     !eof;
> -	     offset = new_offset + xfs_dir2_data_entsize(mp, oldsfep->namelen),
> -	      oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep),
> -	      eof = (char *)oldsfep == &buf[old_isize]) {
> -		new_offset = xfs_dir2_sf_get_offset(oldsfep);
> -		if (offset + add_datasize <= new_offset)
> -			break;
> -	}
> -	/*
> -	 * Get rid of the old directory, then allocate space for
> -	 * the new one.  We do this so xfs_idata_realloc won't copy
> -	 * the data.
> -	 */
> -	xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
> -	sfp = xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
> -
> -	/*
> -	 * Copy the first part of the directory, including the header.
> -	 */
> -	nbytes = (int)((char *)oldsfep - (char *)oldsfp);
> -	memcpy(sfp, oldsfp, nbytes);
> -	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
> -
> -	/*
> -	 * Fill in the new entry, and update the header counts.
> -	 */
> -	xfs_dir2_sf_addname_common(args, sfep, offset, objchange);
> -
> -	/*
> -	 * If there's more left to copy, do that.
> -	 */
> -	if (!eof) {
> -		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
> -		memcpy(sfep, oldsfep, old_isize - nbytes);
> -	}
> -	kfree(buf);
> -}
> -
> -/*
> - * Decide if the new entry will fit at all.
> - * If it will fit, pick between adding the new entry to the end (easy)
> - * or somewhere else (hard).
> - * Return 0 (won't fit), 1 (easy), 2 (hard).
> - */
> -/*ARGSUSED*/
> -static int					/* pick result */
> -xfs_dir2_sf_addname_pick(
> -	xfs_da_args_t		*args,		/* operation arguments */
> -	int			objchange,	/* inode # size changes */
> -	xfs_dir2_sf_entry_t	**sfepp,	/* out(1): new entry ptr */
> -	xfs_dir2_data_aoff_t	*offsetp)	/* out(1): new offset */
> -{
> -	struct xfs_inode	*dp = args->dp;
> -	struct xfs_mount	*mp = dp->i_mount;
> -	int			holefit;	/* found hole it will fit in */
> -	int			i;		/* entry number */
> -	xfs_dir2_data_aoff_t	offset;		/* data block offset */
> -	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
> -	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
> -	int			size;		/* entry's data size */
> -	int			used;		/* data bytes used */
> -
> -	size = xfs_dir2_data_entsize(mp, args->namelen);
> -	offset = args->geo->data_first_offset;
> -	sfep = xfs_dir2_sf_firstentry(sfp);
> -	holefit = 0;
> -	/*
> -	 * Loop over sf entries.
> -	 * Keep track of data offset and whether we've seen a place
> -	 * to insert the new entry.
> -	 */
> -	for (i = 0; i < sfp->count; i++) {
> -		if (!holefit)
> -			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
> -		if (holefit)
> -			*offsetp = offset;
> -		offset = xfs_dir2_sf_get_offset(sfep) +
> -			 xfs_dir2_data_entsize(mp, sfep->namelen);
> -		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
> -	}
> -	/*
> -	 * Calculate data bytes used excluding the new entry, if this
> -	 * was a data block (block form directory).
> -	 */
> -	used = offset + xfs_dir2_block_overhead(sfp->count + 1);
> -	/*
> -	 * If it won't fit in a block form then we can't insert it,
> -	 * we'll go back, convert to block, then try the insert and convert
> -	 * to leaf.
> -	 */
> -	if (used + (holefit ? 0 : size) > args->geo->blksize)
> -		return 0;
> -	/*
> -	 * If changing the inode number size, do it the hard way.
> -	 */
> -	if (objchange)
> -		return 2;
> -	/*
> -	 * If it won't fit at the end then do it the hard way (use the hole).
> -	 */
> -	if (used + size > args->geo->blksize)
> -		return 2;
> -	/*
> -	 * Do it the easy way.
> -	 */
> -	*sfepp = sfep;
> -	*offsetp = offset;
> -	return 1;
> -}
> -
>  #ifdef DEBUG
>  /*
>   * Check consistency of shortform directory, assert if bad.
> -- 
> 2.39.2
> 
>
Yujie Liu May 10, 2024, 6:29 a.m. UTC | #2
Hello,

kernel test robot noticed "dmesg.WARNING:at_fs/xfs/xfs_message.c:#asswarn[xfs]" on:

commit: cbc81dd4e3e501eb8888bf412f7d4fdd8c416927 ("[PATCH 16/16] xfs: make the hard case in xfs_dir2_sf_addname less hard")
url: https://github.com/intel-lab-lkp/linux/commits/Christoph-Hellwig/xfs-remove-an-extra-buffer-allocation-in-xfs_attr_shortform_to_leaf/20240430-214950
base: https://git.kernel.org/cgit/fs/xfs/xfs-linux.git for-next
patch link: https://lore.kernel.org/all/20240430124926.1775355-17-hch@lst.de/
patch subject: [PATCH 16/16] xfs: make the hard case in xfs_dir2_sf_addname less hard

in testcase: filebench
version: filebench-x86_64-22620e6-1_20240224
with following parameters:

	disk: 1HDD
	fs: xfs
	test: fileserver.f
	cpufreq_governor: performance

compiler: gcc-13
test machine: 128 threads 2 sockets Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz (Ice Lake) with 128G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <yujie.liu@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202405101352.bcf759f-lkp@intel.com


[   41.583616][ T4699] XFS: Assertion failed: xfs_dir2_sf_lookup(args) == -ENOENT, file: fs/xfs/libxfs/xfs_dir2_sf.c, line: 456
[   41.595114][ T4699] ------------[ cut here ]------------
[ 41.600675][ T4699] WARNING: CPU: 66 PID: 4699 at fs/xfs/xfs_message.c:89 asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[   41.609938][ T4699] Modules linked in: xfs device_dax(+) nd_pmem nd_btt dax_pmem intel_rapl_msr intel_rapl_common btrfs x86_pkg_temp_thermal intel_powerclamp blake2b_generic xor coretemp raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 kvm_intel sg kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ahci ipmi_ssif ast libahci intel_cstate binfmt_misc acpi_ipmi drm_shmem_helper mei_me ipmi_si i2c_i801 ioatdma dax_hmem intel_uncore libata drm_kms_helper mei i2c_smbus ipmi_devintf intel_pch_thermal nfit wmi dca ipmi_msghandler libnvdimm acpi_pad acpi_power_meter joydev drm loop fuse dm_mod ip_tables
[   41.669439][ T4699] CPU: 66 PID: 4699 Comm: filebench Not tainted 6.9.0-rc4-00238-gcbc81dd4e3e5 #1
[ 41.678673][ T4699] RIP: 0010:asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 41.684114][ T4699] Code: 90 90 66 0f 1f 00 0f 1f 44 00 00 49 89 d0 41 89 c9 48 c7 c2 18 cd 45 c1 48 89 f1 48 89 fe 48 c7 c7 c8 e6 44 c1 e8 18 fd ff ff <0f> 0b c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
All code
========
   0:   90                      nop
   1:   90                      nop
   2:   66 0f 1f 00             nopw   (%rax)
   6:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
   b:   49 89 d0                mov    %rdx,%r8
   e:   41 89 c9                mov    %ecx,%r9d
  11:   48 c7 c2 18 cd 45 c1    mov    $0xffffffffc145cd18,%rdx
  18:   48 89 f1                mov    %rsi,%rcx
  1b:   48 89 fe                mov    %rdi,%rsi
  1e:   48 c7 c7 c8 e6 44 c1    mov    $0xffffffffc144e6c8,%rdi
  25:   e8 18 fd ff ff          call   0xfffffffffffffd42
  2a:*  0f 0b                   ud2             <-- trapping instruction
  2c:   c3                      ret
  2d:   cc                      int3
  2e:   cc                      int3
  2f:   cc                      int3
  30:   cc                      int3
  31:   90                      nop
  32:   90                      nop
  33:   90                      nop
  34:   90                      nop
  35:   90                      nop
  36:   90                      nop
  37:   90                      nop
  38:   90                      nop
  39:   90                      nop
  3a:   90                      nop
  3b:   90                      nop
  3c:   90                      nop
  3d:   90                      nop
  3e:   90                      nop
  3f:   90                      nop

Code starting with the faulting instruction
===========================================
   0:   0f 0b                   ud2
   2:   c3                      ret
   3:   cc                      int3
   4:   cc                      int3
   5:   cc                      int3
   6:   cc                      int3
   7:   90                      nop
   8:   90                      nop
   9:   90                      nop
   a:   90                      nop
   b:   90                      nop
   c:   90                      nop
   d:   90                      nop
   e:   90                      nop
   f:   90                      nop
  10:   90                      nop
  11:   90                      nop
  12:   90                      nop
  13:   90                      nop
  14:   90                      nop
  15:   90                      nop
[   41.704055][ T4699] RSP: 0018:ffa0000020c4faa0 EFLAGS: 00010246
[   41.710227][ T4699] RAX: 0000000000000000 RBX: ff11000105112300 RCX: 000000007fffffff
[   41.718304][ T4699] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc144e6c8
[   41.726378][ T4699] RBP: ff1100010067c000 R08: 0000000000000000 R09: 000000000000000a
[   41.734449][ T4699] R10: 000000000000000a R11: 0fffffffffffffff R12: ffa0000020c4fc28
[   41.742526][ T4699] R13: ff1100013d163200 R14: ff11000107e5f000 R15: 0000000000000023
[   41.750622][ T4699] FS:  00007fffd14006c0(0000) GS:ff1100103fa80000(0000) knlGS:0000000000000000
[   41.759686][ T4699] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   41.766378][ T4699] CR2: 00007ffff7d50000 CR3: 0000000159594005 CR4: 0000000000771ef0
[   41.774460][ T4699] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   41.782532][ T4699] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   41.790619][ T4699] PKRU: 55555554
[   41.794261][ T4699] Call Trace:
[   41.797669][ T4699]  <TASK>
[ 41.800737][ T4699] ? __warn (kernel/panic.c:694)
[ 41.804898][ T4699] ? asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 41.809761][ T4699] ? report_bug (lib/bug.c:180 lib/bug.c:219)
[ 41.814357][ T4699] ? handle_bug (arch/x86/kernel/traps.c:239)
[ 41.818819][ T4699] ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1))
[ 41.823593][ T4699] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:621)
[ 41.828743][ T4699] ? asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 41.833541][ T4699] xfs_dir2_sf_addname (fs/xfs/libxfs/xfs_dir2_sf.c:457 (discriminator 1)) xfs
[ 41.839390][ T4699] xfs_dir_createname (fs/xfs/libxfs/xfs_dir2.c:357) xfs
[ 41.845125][ T4699] xfs_create (fs/xfs/xfs_inode.c:1116) xfs
[ 41.850168][ T4699] ? generic_permission (fs/namei.c:346 (discriminator 1) fs/namei.c:407 (discriminator 1))
[ 41.855344][ T4699] xfs_generic_create (fs/xfs/xfs_iops.c:202 (discriminator 1)) xfs
[ 41.861071][ T4699] ? generic_permission (fs/namei.c:346 (discriminator 1) fs/namei.c:407 (discriminator 1))
[ 41.866237][ T4699] lookup_open+0x4c8/0x570
[ 41.871311][ T4699] open_last_lookups (fs/namei.c:3567 (discriminator 1))
[ 41.876301][ T4699] path_openat (fs/namei.c:3796)
[ 41.880726][ T4699] do_filp_open (fs/namei.c:3826)
[ 41.885194][ T4699] do_sys_openat2 (fs/open.c:1406)
[ 41.889761][ T4699] __x64_sys_openat (fs/open.c:1432)
[ 41.894487][ T4699] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
[ 41.899034][ T4699] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[   41.904966][ T4699] RIP: 0033:0x7ffff7df0f80
[ 41.909418][ T4699] Code: 48 89 44 24 20 75 93 44 89 54 24 0c e8 39 d8 f8 ff 44 8b 54 24 0c 89 da 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 38 44 89 c7 89 44 24 0c e8 8c d8 f8 ff 8b 44
All code
========
   0:   48 89 44 24 20          mov    %rax,0x20(%rsp)
   5:   75 93                   jne    0xffffffffffffff9a
   7:   44 89 54 24 0c          mov    %r10d,0xc(%rsp)
   c:   e8 39 d8 f8 ff          call   0xfffffffffff8d84a
  11:   44 8b 54 24 0c          mov    0xc(%rsp),%r10d
  16:   89 da                   mov    %ebx,%edx
  18:   48 89 ee                mov    %rbp,%rsi
  1b:   41 89 c0                mov    %eax,%r8d
  1e:   bf 9c ff ff ff          mov    $0xffffff9c,%edi
  23:   b8 01 01 00 00          mov    $0x101,%eax
  28:   0f 05                   syscall
  2a:*  48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax         <-- trapping instruction
  30:   77 38                   ja     0x6a
  32:   44 89 c7                mov    %r8d,%edi
  35:   89 44 24 0c             mov    %eax,0xc(%rsp)
  39:   e8 8c d8 f8 ff          call   0xfffffffffff8d8ca
  3e:   8b                      .byte 0x8b
  3f:   44                      rex.R

Code starting with the faulting instruction
===========================================
   0:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
   6:   77 38                   ja     0x40
   8:   44 89 c7                mov    %r8d,%edi
   b:   89 44 24 0c             mov    %eax,0xc(%rsp)
   f:   e8 8c d8 f8 ff          call   0xfffffffffff8d8a0
  14:   8b                      .byte 0x8b
  15:   44                      rex.R
[   41.929230][ T4699] RSP: 002b:00007fffd13fdcd0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101
[   41.937705][ T4699] RAX: ffffffffffffffda RBX: 0000000000000042 RCX: 00007ffff7df0f80
[   41.945760][ T4699] RDX: 0000000000000042 RSI: 00007fffd13fde00 RDI: 00000000ffffff9c
[   41.953766][ T4699] RBP: 00007fffd13fde00 R08: 0000000000000000 R09: 00007ffeb4000b70
[   41.961774][ T4699] R10: 00000000000001b6 R11: 0000000000000293 R12: 0000000000000000
[   41.969773][ T4699] R13: 00007fffef5b6148 R14: 00007fffd13fee00 R15: 0000000000000040
[   41.977770][ T4699]  </TASK>
[   41.980813][ T4699] ---[ end trace 0000000000000000 ]---
[   42.196053][ T4696] XFS: Assertion failed: error != -ENOENT, file: fs/xfs/xfs_inode.c, line: 2827
[   42.205189][ T4696] ------------[ cut here ]------------
[ 42.210734][ T4696] WARNING: CPU: 28 PID: 4696 at fs/xfs/xfs_message.c:89 asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[   42.219964][ T4696] Modules linked in: xfs device_dax(+) nd_pmem nd_btt dax_pmem intel_rapl_msr intel_rapl_common btrfs x86_pkg_temp_thermal intel_powerclamp blake2b_generic xor coretemp raid6_pq libcrc32c sd_mod t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 kvm_intel sg kvm crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sha512_ssse3 rapl ahci ipmi_ssif ast libahci intel_cstate binfmt_misc acpi_ipmi drm_shmem_helper mei_me ipmi_si i2c_i801 ioatdma dax_hmem intel_uncore libata drm_kms_helper mei i2c_smbus ipmi_devintf intel_pch_thermal nfit wmi dca ipmi_msghandler libnvdimm acpi_pad acpi_power_meter joydev drm loop fuse dm_mod ip_tables
[   42.279151][ T4696] CPU: 28 PID: 4696 Comm: filebench Tainted: G        W          6.9.0-rc4-00238-gcbc81dd4e3e5 #1
[ 42.289767][ T4696] RIP: 0010:asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 42.295150][ T4696] Code: 90 90 66 0f 1f 00 0f 1f 44 00 00 49 89 d0 41 89 c9 48 c7 c2 18 cd 45 c1 48 89 f1 48 89 fe 48 c7 c7 c8 e6 44 c1 e8 18 fd ff ff <0f> 0b c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
All code
========
   0:   90                      nop
   1:   90                      nop
   2:   66 0f 1f 00             nopw   (%rax)
   6:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
   b:   49 89 d0                mov    %rdx,%r8
   e:   41 89 c9                mov    %ecx,%r9d
  11:   48 c7 c2 18 cd 45 c1    mov    $0xffffffffc145cd18,%rdx
  18:   48 89 f1                mov    %rsi,%rcx
  1b:   48 89 fe                mov    %rdi,%rsi
  1e:   48 c7 c7 c8 e6 44 c1    mov    $0xffffffffc144e6c8,%rdi
  25:   e8 18 fd ff ff          call   0xfffffffffffffd42
  2a:*  0f 0b                   ud2             <-- trapping instruction
  2c:   c3                      ret
  2d:   cc                      int3
  2e:   cc                      int3
  2f:   cc                      int3
  30:   cc                      int3
  31:   90                      nop
  32:   90                      nop
  33:   90                      nop
  34:   90                      nop
  35:   90                      nop
  36:   90                      nop
  37:   90                      nop
  38:   90                      nop
  39:   90                      nop
  3a:   90                      nop
  3b:   90                      nop
  3c:   90                      nop
  3d:   90                      nop
  3e:   90                      nop
  3f:   90                      nop

Code starting with the faulting instruction
===========================================
   0:   0f 0b                   ud2
   2:   c3                      ret
   3:   cc                      int3
   4:   cc                      int3
   5:   cc                      int3
   6:   cc                      int3
   7:   90                      nop
   8:   90                      nop
   9:   90                      nop
   a:   90                      nop
   b:   90                      nop
   c:   90                      nop
   d:   90                      nop
   e:   90                      nop
   f:   90                      nop
  10:   90                      nop
  11:   90                      nop
  12:   90                      nop
  13:   90                      nop
  14:   90                      nop
  15:   90                      nop
[   42.314977][ T4696] RSP: 0018:ffa0000020c37db8 EFLAGS: 00010246
[   42.321096][ T4696] RAX: 0000000000000000 RBX: ff1100017eccc400 RCX: 000000007fffffff
[   42.329126][ T4696] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffc144e6c8
[   42.337154][ T4696] RBP: ffa0000020c37e40 R08: 0000000000000000 R09: 000000000000000a
[   42.345178][ T4696] R10: 000000000000000a R11: 0fffffffffffffff R12: ff11000107e5f000
[   42.353204][ T4696] R13: 0000000000008000 R14: 0000000000000000 R15: ff1100010067c000
[   42.361240][ T4696] FS:  00007fffd32006c0(0000) GS:ff1100103f900000(0000) knlGS:0000000000000000
[   42.370230][ T4696] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   42.376878][ T4696] CR2: 0000555555579cf0 CR3: 0000000159594005 CR4: 0000000000771ef0
[   42.384913][ T4696] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   42.392951][ T4696] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   42.400990][ T4696] PKRU: 55555554
[   42.404617][ T4696] Call Trace:
[   42.407980][ T4696]  <TASK>
[ 42.410992][ T4696] ? __warn (kernel/panic.c:694)
[ 42.415133][ T4696] ? asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 42.419927][ T4696] ? report_bug (lib/bug.c:180 lib/bug.c:219)
[ 42.424498][ T4696] ? handle_bug (arch/x86/kernel/traps.c:239)
[ 42.428892][ T4696] ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1))
[ 42.433649][ T4696] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:621)
[ 42.438775][ T4696] ? asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 42.443560][ T4696] ? asswarn (fs/xfs/xfs_message.c:89 (discriminator 1)) xfs
[ 42.448341][ T4696] xfs_remove (fs/xfs/xfs_inode.c:2827 (discriminator 1)) xfs
[ 42.453379][ T4696] xfs_vn_unlink (fs/xfs/xfs_iops.c:404) xfs
[ 42.458499][ T4696] vfs_unlink (fs/namei.c:4335)
[ 42.462895][ T4696] do_unlinkat (fs/namei.c:4399 (discriminator 1))
[ 42.467367][ T4696] __x64_sys_unlink (fs/namei.c:4445)
[ 42.472097][ T4696] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
[ 42.476661][ T4696] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[   42.482607][ T4696] RIP: 0033:0x7ffff7df2a07
[ 42.487068][ T4696] Code: f0 ff ff 73 01 c3 48 8b 0d f6 83 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 57 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 83 0d 00 f7 d8 64 89 01 48
All code
========
   0:   f0 ff                   lock (bad)
   2:   ff 73 01                push   0x1(%rbx)
   5:   c3                      ret
   6:   48 8b 0d f6 83 0d 00    mov    0xd83f6(%rip),%rcx        # 0xd8403
   d:   f7 d8                   neg    %eax
   f:   64 89 01                mov    %eax,%fs:(%rcx)
  12:   48 83 c8 ff             or     $0xffffffffffffffff,%rax
  16:   c3                      ret
  17:   66 2e 0f 1f 84 00 00    cs nopw 0x0(%rax,%rax,1)
  1e:   00 00 00
  21:   66 90                   xchg   %ax,%ax
  23:   b8 57 00 00 00          mov    $0x57,%eax
  28:   0f 05                   syscall
  2a:*  48 3d 01 f0 ff ff       cmp    $0xfffffffffffff001,%rax         <-- trapping instruction
  30:   73 01                   jae    0x33
  32:   c3                      ret
  33:   48 8b 0d c9 83 0d 00    mov    0xd83c9(%rip),%rcx        # 0xd8403
  3a:   f7 d8                   neg    %eax
  3c:   64 89 01                mov    %eax,%fs:(%rcx)
  3f:   48                      rex.W

Code starting with the faulting instruction
===========================================
   0:   48 3d 01 f0 ff ff       cmp    $0xfffffffffffff001,%rax
   6:   73 01                   jae    0x9
   8:   c3                      ret
   9:   48 8b 0d c9 83 0d 00    mov    0xd83c9(%rip),%rcx        # 0xd83d9
  10:   f7 d8                   neg    %eax
  12:   64 89 01                mov    %eax,%fs:(%rcx)
  15:   48                      rex.W
[   42.506970][ T4696] RSP: 002b:00007fffd31fee38 EFLAGS: 00000206 ORIG_RAX: 0000000000000057
[   42.515426][ T4696] RAX: ffffffffffffffda RBX: 00007fffef5b5c08 RCX: 00007ffff7df2a07
[   42.523454][ T4696] RDX: 0000000000000000 RSI: 000000006638299e RDI: 00007fffd31fee60
[   42.531478][ T4696] RBP: 00007fffd31fee60 R08: 0000000000000007 R09: 00007ffed4000b70
[   42.539501][ T4696] R10: 00007fffd31fee00 R11: 0000000000000206 R12: 00007ffed4000b70
[   42.547525][ T4696] R13: 00007ffff55e3ae0 R14: 00007ffff5940c08 R15: 00007fffd2a00000
[   42.555540][ T4696]  </TASK>
[   42.558619][ T4696] ---[ end trace 0000000000000000 ]---
[ 42.564118][ T4696] XFS (sdd1): Internal error xfs_trans_cancel at line 1112 of file fs/xfs/xfs_trans.c. Caller xfs_remove (fs/xfs/xfs_inode.c:2865) xfs
[   42.577304][ T4696] CPU: 28 PID: 4696 Comm: filebench Tainted: G        W          6.9.0-rc4-00238-gcbc81dd4e3e5 #1
[   42.587946][ T4696] Call Trace:
[   42.591293][ T4696]  <TASK>
[ 42.594294][ T4696] dump_stack_lvl (lib/dump_stack.c:117)
[ 42.598846][ T4696] xfs_trans_cancel (fs/xfs/xfs_trans.c:1113) xfs
[ 42.604396][ T4696] xfs_remove (fs/xfs/xfs_inode.c:2865) xfs
[ 42.609419][ T4696] xfs_vn_unlink (fs/xfs/xfs_iops.c:404) xfs
[ 42.614520][ T4696] vfs_unlink (fs/namei.c:4335)
[ 42.618888][ T4696] do_unlinkat (fs/namei.c:4399 (discriminator 1))
[ 42.623340][ T4696] __x64_sys_unlink (fs/namei.c:4445)
[ 42.628051][ T4696] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
[ 42.632585][ T4696] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[   42.638504][ T4696] RIP: 0033:0x7ffff7df2a07
[ 42.642945][ T4696] Code: f0 ff ff 73 01 c3 48 8b 0d f6 83 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 57 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 83 0d 00 f7 d8 64 89 01 48
All code
========
   0:   f0 ff                   lock (bad)
   2:   ff 73 01                push   0x1(%rbx)
   5:   c3                      ret
   6:   48 8b 0d f6 83 0d 00    mov    0xd83f6(%rip),%rcx        # 0xd8403
   d:   f7 d8                   neg    %eax
   f:   64 89 01                mov    %eax,%fs:(%rcx)
  12:   48 83 c8 ff             or     $0xffffffffffffffff,%rax
  16:   c3                      ret
  17:   66 2e 0f 1f 84 00 00    cs nopw 0x0(%rax,%rax,1)
  1e:   00 00 00
  21:   66 90                   xchg   %ax,%ax
  23:   b8 57 00 00 00          mov    $0x57,%eax
  28:   0f 05                   syscall
  2a:*  48 3d 01 f0 ff ff       cmp    $0xfffffffffffff001,%rax         <-- trapping instruction
  30:   73 01                   jae    0x33
  32:   c3                      ret
  33:   48 8b 0d c9 83 0d 00    mov    0xd83c9(%rip),%rcx        # 0xd8403
  3a:   f7 d8                   neg    %eax
  3c:   64 89 01                mov    %eax,%fs:(%rcx)
  3f:   48                      rex.W

Code starting with the faulting instruction
===========================================
   0:   48 3d 01 f0 ff ff       cmp    $0xfffffffffffff001,%rax
   6:   73 01                   jae    0x9
   8:   c3                      ret
   9:   48 8b 0d c9 83 0d 00    mov    0xd83c9(%rip),%rcx        # 0xd83d9
  10:   f7 d8                   neg    %eax
  12:   64 89 01                mov    %eax,%fs:(%rcx)
  15:   48                      rex.W
[   42.662759][ T4696] RSP: 002b:00007fffd31fee38 EFLAGS: 00000206 ORIG_RAX: 0000000000000057
[   42.671206][ T4696] RAX: ffffffffffffffda RBX: 00007fffef5b5c08 RCX: 00007ffff7df2a07
[   42.679223][ T4696] RDX: 0000000000000000 RSI: 000000006638299e RDI: 00007fffd31fee60
[   42.687242][ T4696] RBP: 00007fffd31fee60 R08: 0000000000000007 R09: 00007ffed4000b70
[   42.695260][ T4696] R10: 00007fffd31fee00 R11: 0000000000000206 R12: 00007ffed4000b70
[   42.703276][ T4696] R13: 00007ffff55e3ae0 R14: 00007ffff5940c08 R15: 00007fffd2a00000
[   42.711295][ T4696]  </TASK>


The kernel config is available at:
https://download.01.org/0day-ci/archive/20240510/202405101352.bcf759f-lkp@intel.com
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 0598465357cc3a..4ba93835dd847f 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -16,18 +16,6 @@ 
 #include "xfs_dir2_priv.h"
 #include "xfs_trace.h"
 
-/*
- * Prototypes for internal functions.
- */
-static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args,
-				     xfs_dir2_sf_entry_t *sfep,
-				     xfs_dir2_data_aoff_t offset,
-				     int new_isize);
-static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange,
-				     int new_isize);
-static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
-				    xfs_dir2_sf_entry_t **sfepp,
-				    xfs_dir2_data_aoff_t *offsetp);
 #ifdef DEBUG
 static void xfs_dir2_sf_check(xfs_da_args_t *args);
 #else
@@ -361,6 +349,61 @@  xfs_dir2_try_block_to_sf(
 	return error;
 }
 
+static xfs_dir2_data_aoff_t
+xfs_dir2_sf_addname_pick_offset(
+	struct xfs_da_args	*args,
+	unsigned int		*byteoffp)
+{
+	struct xfs_inode	*dp = args->dp;
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
+	xfs_dir2_data_aoff_t	offset = args->geo->data_first_offset;
+	struct xfs_dir2_sf_entry *sfep = xfs_dir2_sf_firstentry(sfp);
+	unsigned int		data_size =
+		xfs_dir2_data_entsize(mp, args->namelen);
+	unsigned int		data_overhead =
+		xfs_dir2_block_overhead(sfp->count + 1);
+	xfs_dir2_data_aoff_t	hole_offset = 0;
+	unsigned int		byteoff = 0;
+	unsigned int		i;
+
+	/*
+	 * Scan the directory to find the last offset and find a suitable
+	 * hole that we can use if needed.
+	 */
+	for (i = 0; i < sfp->count; i++) {
+		if (!hole_offset &&
+		    offset + data_size <= xfs_dir2_sf_get_offset(sfep)) {
+			hole_offset = offset;
+			byteoff = (void *)sfep - dp->i_df.if_data;
+		}
+		offset = xfs_dir2_sf_get_offset(sfep) +
+			xfs_dir2_data_entsize(mp, sfep->namelen);
+		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
+	}
+
+	/*
+	 * If just appending the entry would make the offset too large, use the
+	 * first hole that fits it if there is one or else give up and convert
+	 * to block format.
+	 *
+	 * Note that "too large" here is a completely arbitrary limit.  The
+	 * offset is logical concept not related to the physical byteoff and
+	 * there is no fundamental underlying limit to it.  But it has been
+	 * encoded in asserts and verifiers for a long time so we have to
+	 * respect it.
+	 */
+	if (offset + data_size + data_overhead > args->geo->blksize) {
+		if (!hole_offset)
+			return 0;
+		*byteoffp = byteoff;
+		return hole_offset;
+	}
+
+	*byteoffp = dp->i_disk_size;
+	return offset;
+}
+
 static void
 xfs_dir2_sf_addname_common(
 	struct xfs_da_args	*args,
@@ -384,23 +427,29 @@  xfs_dir2_sf_addname_common(
 
 /*
  * Add a name to a shortform directory.
- * There are two algorithms, "easy" and "hard" which we decide on
- * before changing anything.
- * Convert to block form if necessary, if the new entry won't fit.
+ *
+ * Shortform directories are always tighty packed, and we simply allocate
+ * a new higher offset and add the entry at the end.
+ *
+ * While we could search for a hole in the offset space there really isn't
+ * much of a a point in doing so and complicating the algorithm.
+ *
+ * Convert to block form if the new entry won't fit.
  */
-int						/* error */
+int
 xfs_dir2_sf_addname(
-	xfs_da_args_t		*args)		/* operation arguments */
+	struct xfs_da_args	*args)
 {
 	struct xfs_inode	*dp = args->dp;
+	struct xfs_mount	*mp = dp->i_mount;
 	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
-	int			error;		/* error return value */
-	int			incr_isize;	/* total change in size */
-	int			new_isize;	/* size after adding name */
-	int			objchange;	/* changing to 8-byte inodes */
-	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */
-	int			pick;		/* which algorithm to use */
-	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */
+	struct xfs_dir2_sf_entry *sfep;
+	xfs_dir2_data_aoff_t	offset;
+	unsigned int		entsize;
+	unsigned int		new_isize;
+	unsigned int		byteoff;
+	bool			objchange = false;
+	int			error;
 
 	trace_xfs_dir2_sf_addname(args);
 
@@ -408,11 +457,12 @@  xfs_dir2_sf_addname(
 	ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL);
 	ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
 	ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+
 	/*
-	 * Compute entry (and change in) size.
+	 * Compute entry size and new inode size.
 	 */
-	incr_isize = xfs_dir2_sf_entsize(dp->i_mount, sfp, args->namelen);
-	objchange = 0;
+	entsize = xfs_dir2_sf_entsize(mp, sfp, args->namelen);
+	new_isize = dp->i_disk_size + entsize;
 
 	/*
 	 * Do we have to change to 8 byte inodes?
@@ -421,19 +471,17 @@  xfs_dir2_sf_addname(
 		/*
 		 * Yes, adjust the inode size.  old count + (parent + new)
 		 */
-		incr_isize += (sfp->count + 2) * XFS_INO64_DIFF;
-		objchange = 1;
+		new_isize += (sfp->count + 2) * XFS_INO64_DIFF;
+		objchange = true;
 	}
 
-	new_isize = (int)dp->i_disk_size + incr_isize;
-
 	/*
 	 * Too large to fit into the inode fork or too large offset?
 	 */
 	if (new_isize > xfs_inode_data_fork_size(dp))
 		goto convert;
-	pick = xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset);
-	if (pick == 0)
+	offset = xfs_dir2_sf_addname_pick_offset(args, &byteoff);
+	if (!offset)
 		goto convert;
 
 	/*
@@ -451,20 +499,17 @@  xfs_dir2_sf_addname(
 		return 0;
 	}
 
+	sfp = xfs_idata_realloc(dp, entsize, XFS_DATA_FORK);
+	sfep = dp->i_df.if_data + byteoff;
+
 	/*
-	 * Do it the easy way - just add it at the end.
-	 */
-	if (pick == 1)
-		xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize);
-	/*
-	 * Do it the hard way - look for a place to insert the new entry.
-	 * Convert to 8 byte inode numbers first if necessary.
+	 * If we're inserting in the middle, move the tail out of the way first.
 	 */
-	else {
-		ASSERT(pick == 2);
-		xfs_dir2_sf_addname_hard(args, objchange, new_isize);
+	if (byteoff < dp->i_disk_size) {
+		memmove(sfep, (void *)sfep + entsize,
+			dp->i_disk_size - byteoff);
 	}
-
+	xfs_dir2_sf_addname_common(args, sfep, offset, objchange);
 	dp->i_disk_size = new_isize;
 	xfs_dir2_sf_check(args);
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -482,192 +527,6 @@  xfs_dir2_sf_addname(
 	return xfs_dir2_block_addname(args);
 }
 
-/*
- * Add the new entry the "easy" way.
- * This is copying the old directory and adding the new entry at the end.
- * Since it's sorted by "offset" we need room after the last offset
- * that's already there, and then room to convert to a block directory.
- * This is already checked by the pick routine.
- */
-static void
-xfs_dir2_sf_addname_easy(
-	xfs_da_args_t		*args,		/* operation arguments */
-	xfs_dir2_sf_entry_t	*sfep,		/* pointer to new entry */
-	xfs_dir2_data_aoff_t	offset,		/* offset to use for new ent */
-	int			new_isize)	/* new directory size */
-{
-	struct xfs_inode	*dp = args->dp;
-	struct xfs_mount	*mp = dp->i_mount;
-	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
-	int			byteoff = (int)((char *)sfep - (char *)sfp);
-
-	/*
-	 * Grow the in-inode space.
-	 */
-	sfp = xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen),
-			  XFS_DATA_FORK);
-	/*
-	 * Need to set up again due to realloc of the inode data.
-	 */
-	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
-	xfs_dir2_sf_addname_common(args, sfep, offset, false);
-}
-
-/*
- * Add the new entry the "hard" way.
- * The caller has already converted to 8 byte inode numbers if necessary,
- * in which case we need to leave the i8count at 1.
- * Find a hole that the new entry will fit into, and copy
- * the first part of the entries, the new entry, and the last part of
- * the entries.
- */
-/* ARGSUSED */
-static void
-xfs_dir2_sf_addname_hard(
-	xfs_da_args_t		*args,		/* operation arguments */
-	int			objchange,	/* changing inode number size */
-	int			new_isize)	/* new directory size */
-{
-	struct xfs_inode	*dp = args->dp;
-	struct xfs_mount	*mp = dp->i_mount;
-	int			add_datasize;	/* data size need for new ent */
-	char			*buf;		/* buffer for old */
-	int			eof;		/* reached end of old dir */
-	int			nbytes;		/* temp for byte copies */
-	xfs_dir2_data_aoff_t	new_offset;	/* next offset value */
-	xfs_dir2_data_aoff_t	offset;		/* current offset value */
-	int			old_isize;	/* previous size */
-	xfs_dir2_sf_entry_t	*oldsfep;	/* entry in original dir */
-	xfs_dir2_sf_hdr_t	*oldsfp;	/* original shortform dir */
-	xfs_dir2_sf_entry_t	*sfep;		/* entry in new dir */
-	xfs_dir2_sf_hdr_t	*sfp;		/* new shortform dir */
-
-	/*
-	 * Copy the old directory to the stack buffer.
-	 */
-	old_isize = (int)dp->i_disk_size;
-	buf = kmalloc(old_isize, GFP_KERNEL | __GFP_NOFAIL);
-	oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-	memcpy(oldsfp, dp->i_df.if_data, old_isize);
-	/*
-	 * Loop over the old directory finding the place we're going
-	 * to insert the new entry.
-	 * If it's going to end up at the end then oldsfep will point there.
-	 */
-	for (offset = args->geo->data_first_offset,
-	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
-	      add_datasize = xfs_dir2_data_entsize(mp, args->namelen),
-	      eof = (char *)oldsfep == &buf[old_isize];
-	     !eof;
-	     offset = new_offset + xfs_dir2_data_entsize(mp, oldsfep->namelen),
-	      oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep),
-	      eof = (char *)oldsfep == &buf[old_isize]) {
-		new_offset = xfs_dir2_sf_get_offset(oldsfep);
-		if (offset + add_datasize <= new_offset)
-			break;
-	}
-	/*
-	 * Get rid of the old directory, then allocate space for
-	 * the new one.  We do this so xfs_idata_realloc won't copy
-	 * the data.
-	 */
-	xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
-	sfp = xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
-
-	/*
-	 * Copy the first part of the directory, including the header.
-	 */
-	nbytes = (int)((char *)oldsfep - (char *)oldsfp);
-	memcpy(sfp, oldsfp, nbytes);
-	sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
-
-	/*
-	 * Fill in the new entry, and update the header counts.
-	 */
-	xfs_dir2_sf_addname_common(args, sfep, offset, objchange);
-
-	/*
-	 * If there's more left to copy, do that.
-	 */
-	if (!eof) {
-		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
-		memcpy(sfep, oldsfep, old_isize - nbytes);
-	}
-	kfree(buf);
-}
-
-/*
- * Decide if the new entry will fit at all.
- * If it will fit, pick between adding the new entry to the end (easy)
- * or somewhere else (hard).
- * Return 0 (won't fit), 1 (easy), 2 (hard).
- */
-/*ARGSUSED*/
-static int					/* pick result */
-xfs_dir2_sf_addname_pick(
-	xfs_da_args_t		*args,		/* operation arguments */
-	int			objchange,	/* inode # size changes */
-	xfs_dir2_sf_entry_t	**sfepp,	/* out(1): new entry ptr */
-	xfs_dir2_data_aoff_t	*offsetp)	/* out(1): new offset */
-{
-	struct xfs_inode	*dp = args->dp;
-	struct xfs_mount	*mp = dp->i_mount;
-	int			holefit;	/* found hole it will fit in */
-	int			i;		/* entry number */
-	xfs_dir2_data_aoff_t	offset;		/* data block offset */
-	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */
-	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
-	int			size;		/* entry's data size */
-	int			used;		/* data bytes used */
-
-	size = xfs_dir2_data_entsize(mp, args->namelen);
-	offset = args->geo->data_first_offset;
-	sfep = xfs_dir2_sf_firstentry(sfp);
-	holefit = 0;
-	/*
-	 * Loop over sf entries.
-	 * Keep track of data offset and whether we've seen a place
-	 * to insert the new entry.
-	 */
-	for (i = 0; i < sfp->count; i++) {
-		if (!holefit)
-			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
-		if (holefit)
-			*offsetp = offset;
-		offset = xfs_dir2_sf_get_offset(sfep) +
-			 xfs_dir2_data_entsize(mp, sfep->namelen);
-		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
-	}
-	/*
-	 * Calculate data bytes used excluding the new entry, if this
-	 * was a data block (block form directory).
-	 */
-	used = offset + xfs_dir2_block_overhead(sfp->count + 1);
-	/*
-	 * If it won't fit in a block form then we can't insert it,
-	 * we'll go back, convert to block, then try the insert and convert
-	 * to leaf.
-	 */
-	if (used + (holefit ? 0 : size) > args->geo->blksize)
-		return 0;
-	/*
-	 * If changing the inode number size, do it the hard way.
-	 */
-	if (objchange)
-		return 2;
-	/*
-	 * If it won't fit at the end then do it the hard way (use the hole).
-	 */
-	if (used + size > args->geo->blksize)
-		return 2;
-	/*
-	 * Do it the easy way.
-	 */
-	*sfepp = sfep;
-	*offsetp = offset;
-	return 1;
-}
-
 #ifdef DEBUG
 /*
  * Check consistency of shortform directory, assert if bad.