[1/7] xfs: zero length symlinks are not valid
diff mbox series

Message ID 20181119210459.8506-2-david@fromorbit.com
State Accepted
Headers show
Series
  • xfs: various fixes for 4.20
Related show

Commit Message

Dave Chinner Nov. 19, 2018, 9:04 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

A log recovery failure has been reproduced where a symlink inode has
a zero length in extent form. It was caused by a shutdown during a
combined fstress+fsmark workload.

The underlying problem is the issue in xfs_inactive_symlink(): the
inode is unlocked between the symlink inactivation/truncation and
the inode being freed. This opens a window for the inode to be
written to disk before it xfs_ifree() removes it from the unlinked
list, marks it free in the inobt and zeros the mode.

For shortform inodes, the fix is simple. xfs_ifree() clears the data
fork state, so there's no need to do it in xfs_inactive_symlink().
This means the shortform fork verifier will not see a zero length
data fork as it mirrors the inode size through to xfs_ifree()), and
hence if the inode gets written back and the fork verifiers are run
they will still see a fork that matches the on-disk inode size.

For extent form (remote) symlinks, it is a little more tricky. Here
we explicitly set the inode size to zero, so the above race can lead
to zero length symlinks on disk. Because the inode is unlinked at
this point (i.e. on the unlinked list) and unreferenced, it can
never be seen again by a user. Hence when we set the inode size to
zeor, also change the type to S_IFREG. xfs_ifree() expects S_IFREG
inodes to be of zero length, and so this avoids all the problems of
zero length symlinks ever hitting the disk. It also avoids the
problem of needing to handle zero length symlink inodes in log
recovery to replay the extent free intents and the remaining
deferops to free the extents the symlink used.

Also add a couple of asserts to warn us if zero length symlinks end
up in either the symlink create or inactivation paths.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_symlink_remote.c | 14 +++++++++----
 fs/xfs/xfs_symlink.c               | 33 +++++++++++++++---------------
 2 files changed, 27 insertions(+), 20 deletions(-)

Comments

hch@infradead.org Nov. 20, 2018, 8:12 a.m. UTC | #1
>  	/*
> -	 * Lock the inode, fix the size, and join it to the transaction.
> -	 * Hold it so in the normal path, we still have it locked for
> -	 * the second transaction.  In the error paths we need it
> +	 * Lock the inode, fix the size, turn it into a regular file and join it
> +	 * to the transaction.  Hold it so in the normal path, we still have it
> +	 * locked for the second transaction.  In the error paths we need it
>  	 * held so the cancel won't rele it, see below.
>  	 */

Most of this comment seems rather pointless as it explains the how and
not the why.  Additionally it is out of data (we don't hold inodes for
transactions anymore), and partially below the code it claims to
document.  Given that you explain the why pretty well in the top of the
function document I'd drop this entirely.

> +	/*
> +	 * Inline fork state gets removed by xfs_difree() so we have nothing to
> +	 * do here in that case.
> +	 */
>  	if (ip->i_df.if_flags & XFS_IFINLINE) {
> -		if (ip->i_df.if_bytes > 0) 
> -			xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
> -					  XFS_DATA_FORK);
>  		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -		ASSERT(ip->i_df.if_bytes == 0);
>  		return 0;
>  	}

The ilock here is rather pointless now that we do nothing underneath
it but checking two different integral values.

Something like this additional cleanup might be nice now, with
the function split being rather pointless now:

diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index b2c1177c717f..8e2e12e34098 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -386,29 +386,41 @@ xfs_symlink(
  * userspace cannot find this inode anymore, so this change is not user visible
  * but allows us to catch corrupt zero-length symlinks in the verifiers.
  */
-STATIC int
-xfs_inactive_symlink_rmt(
-	struct xfs_inode *ip)
+
+int
+xfs_inactive_symlink(
+	struct xfs_inode	*ip)
 {
-	xfs_buf_t	*bp;
-	int		done;
-	int		error;
-	int		i;
-	xfs_mount_t	*mp;
-	xfs_bmbt_irec_t	mval[XFS_SYMLINK_MAPS];
-	int		nmaps;
-	int		size;
-	xfs_trans_t	*tp;
-
-	mp = ip->i_mount;
-	ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_buf		*bp;
+	int			done;
+	int			error;
+	int			i;
+	struct xfs_bmbt_irec	mval[XFS_SYMLINK_MAPS];
+	int			nmaps;
+	int			size;
+	struct xfs_trans	*tp;
+
+	trace_xfs_inactive_symlink(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	ASSERT(ip->i_d.di_size > 0 && ip->i_d.di_size <= XFS_SYMLINK_MAXLEN);
+
 	/*
-	 * We're freeing a symlink that has some
-	 * blocks allocated to it.  Free the
-	 * blocks here.  We know that we've got
-	 * either 1 or 2 extents and that we can
-	 * free them all in one bunmapi call.
+	 * Inline fork state gets removed by xfs_difree() so we have nothing to
+	 * do here in that case.
 	 */
+	if (ip->i_df.if_flags & XFS_IFINLINE)
+		return 0;
+
+	/*
+	 * We're freeing a symlink that has some blocks allocated to it.  Free
+	 * the blocks here.  We know that we've got either 1 or 2 extents and
+	 * that we can free them all in one bunmapi call.
+	 */
+	ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS);
 	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -419,10 +431,7 @@ xfs_inactive_symlink_rmt(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	/*
-	 * Lock the inode, fix the size, turn it into a regular file and join it
-	 * to the transaction.  Hold it so in the normal path, we still have it
-	 * locked for the second transaction.  In the error paths we need it
-	 * held so the cancel won't rele it, see below.
+	 * Fix the size, turn it into a regular file in the same transaction.
 	 */
 	size = (int)ip->i_d.di_size;
 	ip->i_d.di_size = 0;
@@ -485,45 +494,3 @@ xfs_inactive_symlink_rmt(
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
-
-/*
- * xfs_inactive_symlink - free a symlink
- */
-int
-xfs_inactive_symlink(
-	struct xfs_inode	*ip)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	int			pathlen;
-
-	trace_xfs_inactive_symlink(ip);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	pathlen = (int)ip->i_d.di_size;
-	ASSERT(pathlen);
-
-	if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN) {
-		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
-			 __func__, (unsigned long long)ip->i_ino, pathlen);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		ASSERT(0);
-		return -EFSCORRUPTED;
-	}
-
-	/*
-	 * Inline fork state gets removed by xfs_difree() so we have nothing to
-	 * do here in that case.
-	 */
-	if (ip->i_df.if_flags & XFS_IFINLINE) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		return 0;
-	}
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	/* remove the remote symlink */
-	return xfs_inactive_symlink_rmt(ip);
-}
Brian Foster Nov. 20, 2018, 1:44 p.m. UTC | #2
On Tue, Nov 20, 2018 at 08:04:53AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> A log recovery failure has been reproduced where a symlink inode has
> a zero length in extent form. It was caused by a shutdown during a
> combined fstress+fsmark workload.
> 
> The underlying problem is the issue in xfs_inactive_symlink(): the
> inode is unlocked between the symlink inactivation/truncation and
> the inode being freed. This opens a window for the inode to be
> written to disk before it xfs_ifree() removes it from the unlinked
> list, marks it free in the inobt and zeros the mode.
> 
> For shortform inodes, the fix is simple. xfs_ifree() clears the data
> fork state, so there's no need to do it in xfs_inactive_symlink().
> This means the shortform fork verifier will not see a zero length
> data fork as it mirrors the inode size through to xfs_ifree()), and
> hence if the inode gets written back and the fork verifiers are run
> they will still see a fork that matches the on-disk inode size.
> 
> For extent form (remote) symlinks, it is a little more tricky. Here
> we explicitly set the inode size to zero, so the above race can lead
> to zero length symlinks on disk. Because the inode is unlinked at
> this point (i.e. on the unlinked list) and unreferenced, it can
> never be seen again by a user. Hence when we set the inode size to
> zeor, also change the type to S_IFREG. xfs_ifree() expects S_IFREG
> inodes to be of zero length, and so this avoids all the problems of
> zero length symlinks ever hitting the disk. It also avoids the
> problem of needing to handle zero length symlink inodes in log
> recovery to replay the extent free intents and the remaining
> deferops to free the extents the symlink used.
> 
> Also add a couple of asserts to warn us if zero length symlinks end
> up in either the symlink create or inactivation paths.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---

Hmm, I saw this and thought this was something we had already fixed.
Looking back, I see this was actually posted[1] months ago and there was
a fairly nuanced discussion. On a quick skim, that appears to have
concluded with the patch being mostly sane, but requiring a couple minor
tweaks and commit log updates. This patch looks exactly the same to me,
however. Hm?

Brian

[1] https://marc.info/?l=linux-xfs&m=152930146405930&w=2

>  fs/xfs/libxfs/xfs_symlink_remote.c | 14 +++++++++----
>  fs/xfs/xfs_symlink.c               | 33 +++++++++++++++---------------
>  2 files changed, 27 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
> index 95374ab2dee7..77d80106f989 100644
> --- a/fs/xfs/libxfs/xfs_symlink_remote.c
> +++ b/fs/xfs/libxfs/xfs_symlink_remote.c
> @@ -199,7 +199,10 @@ xfs_symlink_local_to_remote(
>  					ifp->if_bytes - 1);
>  }
>  
> -/* Verify the consistency of an inline symlink. */
> +/*
> + * Verify the in-memory consistency of an inline symlink data fork. This
> + * does not do on-disk format checks.
> + */
>  xfs_failaddr_t
>  xfs_symlink_shortform_verify(
>  	struct xfs_inode	*ip)
> @@ -215,9 +218,12 @@ xfs_symlink_shortform_verify(
>  	size = ifp->if_bytes;
>  	endp = sfp + size;
>  
> -	/* Zero length symlinks can exist while we're deleting a remote one. */
> -	if (size == 0)
> -		return NULL;
> +	/*
> +	 * Zero length symlinks should never occur in memory as they are
> +	 * never alllowed to exist on disk.
> +	 */
> +	if (!size)
> +		return __this_address;
>  
>  	/* No negative sizes or overly long symlink targets. */
>  	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
> diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
> index a3e98c64b6e3..b2c1177c717f 100644
> --- a/fs/xfs/xfs_symlink.c
> +++ b/fs/xfs/xfs_symlink.c
> @@ -192,6 +192,7 @@ xfs_symlink(
>  	pathlen = strlen(target_path);
>  	if (pathlen >= XFS_SYMLINK_MAXLEN)      /* total string too long */
>  		return -ENAMETOOLONG;
> +	ASSERT(pathlen > 0);
>  
>  	udqp = gdqp = NULL;
>  	prid = xfs_get_initial_prid(dp);
> @@ -378,6 +379,12 @@ xfs_symlink(
>  
>  /*
>   * Free a symlink that has blocks associated with it.
> + *
> + * Note: zero length symlinks are not allowed to exist. When we set the size to
> + * zero, also change it to a regular file so that it does not get written to
> + * disk as a zero length symlink. The inode is on the unlinked list already, so
> + * userspace cannot find this inode anymore, so this change is not user visible
> + * but allows us to catch corrupt zero-length symlinks in the verifiers.
>   */
>  STATIC int
>  xfs_inactive_symlink_rmt(
> @@ -412,13 +419,14 @@ xfs_inactive_symlink_rmt(
>  	xfs_trans_ijoin(tp, ip, 0);
>  
>  	/*
> -	 * Lock the inode, fix the size, and join it to the transaction.
> -	 * Hold it so in the normal path, we still have it locked for
> -	 * the second transaction.  In the error paths we need it
> +	 * Lock the inode, fix the size, turn it into a regular file and join it
> +	 * to the transaction.  Hold it so in the normal path, we still have it
> +	 * locked for the second transaction.  In the error paths we need it
>  	 * held so the cancel won't rele it, see below.
>  	 */
>  	size = (int)ip->i_d.di_size;
>  	ip->i_d.di_size = 0;
> +	VFS_I(ip)->i_mode = (VFS_I(ip)->i_mode & ~S_IFMT) | S_IFREG;
>  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
>  	/*
>  	 * Find the block(s) so we can inval and unmap them.
> @@ -494,17 +502,10 @@ xfs_inactive_symlink(
>  		return -EIO;
>  
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> -
> -	/*
> -	 * Zero length symlinks _can_ exist.
> -	 */
>  	pathlen = (int)ip->i_d.di_size;
> -	if (!pathlen) {
> -		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -		return 0;
> -	}
> +	ASSERT(pathlen);
>  
> -	if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
> +	if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN) {
>  		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
>  			 __func__, (unsigned long long)ip->i_ino, pathlen);
>  		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> @@ -512,12 +513,12 @@ xfs_inactive_symlink(
>  		return -EFSCORRUPTED;
>  	}
>  
> +	/*
> +	 * Inline fork state gets removed by xfs_difree() so we have nothing to
> +	 * do here in that case.
> +	 */
>  	if (ip->i_df.if_flags & XFS_IFINLINE) {
> -		if (ip->i_df.if_bytes > 0) 
> -			xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
> -					  XFS_DATA_FORK);
>  		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -		ASSERT(ip->i_df.if_bytes == 0);
>  		return 0;
>  	}
>  
> -- 
> 2.19.1
>
Dave Chinner Nov. 20, 2018, 9:19 p.m. UTC | #3
On Tue, Nov 20, 2018 at 08:44:39AM -0500, Brian Foster wrote:
> On Tue, Nov 20, 2018 at 08:04:53AM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > A log recovery failure has been reproduced where a symlink inode has
> > a zero length in extent form. It was caused by a shutdown during a
> > combined fstress+fsmark workload.
> > 
> > The underlying problem is the issue in xfs_inactive_symlink(): the
> > inode is unlocked between the symlink inactivation/truncation and
> > the inode being freed. This opens a window for the inode to be
> > written to disk before it xfs_ifree() removes it from the unlinked
> > list, marks it free in the inobt and zeros the mode.
> > 
> > For shortform inodes, the fix is simple. xfs_ifree() clears the data
> > fork state, so there's no need to do it in xfs_inactive_symlink().
> > This means the shortform fork verifier will not see a zero length
> > data fork as it mirrors the inode size through to xfs_ifree()), and
> > hence if the inode gets written back and the fork verifiers are run
> > they will still see a fork that matches the on-disk inode size.
> > 
> > For extent form (remote) symlinks, it is a little more tricky. Here
> > we explicitly set the inode size to zero, so the above race can lead
> > to zero length symlinks on disk. Because the inode is unlinked at
> > this point (i.e. on the unlinked list) and unreferenced, it can
> > never be seen again by a user. Hence when we set the inode size to
> > zeor, also change the type to S_IFREG. xfs_ifree() expects S_IFREG
> > inodes to be of zero length, and so this avoids all the problems of
> > zero length symlinks ever hitting the disk. It also avoids the
> > problem of needing to handle zero length symlink inodes in log
> > recovery to replay the extent free intents and the remaining
> > deferops to free the extents the symlink used.
> > 
> > Also add a couple of asserts to warn us if zero length symlinks end
> > up in either the symlink create or inactivation paths.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> 
> Hmm, I saw this and thought this was something we had already fixed.
> Looking back, I see this was actually posted[1] months ago and there was
> a fairly nuanced discussion. On a quick skim, that appears to have
> concluded with the patch being mostly sane, but requiring a couple minor
> tweaks and commit log updates. This patch looks exactly the same to me,
> however. Hm?

Hmmm, I did all that, months ago. The code barely changed, it was
all just commit message updates. maybe I picked up the wrong version
of the patch - it had been sitting around for a while. I had even
gone back and checked the discussion before concluding that "it 
doesn't need code changes" before adding it to this stack...

I'll go back and see if I can find a more recent version...

Cheers,

Dave.
Brian Foster Nov. 21, 2018, 12:01 p.m. UTC | #4
On Wed, Nov 21, 2018 at 08:19:18AM +1100, Dave Chinner wrote:
> On Tue, Nov 20, 2018 at 08:44:39AM -0500, Brian Foster wrote:
> > On Tue, Nov 20, 2018 at 08:04:53AM +1100, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > A log recovery failure has been reproduced where a symlink inode has
> > > a zero length in extent form. It was caused by a shutdown during a
> > > combined fstress+fsmark workload.
> > > 
> > > The underlying problem is the issue in xfs_inactive_symlink(): the
> > > inode is unlocked between the symlink inactivation/truncation and
> > > the inode being freed. This opens a window for the inode to be
> > > written to disk before it xfs_ifree() removes it from the unlinked
> > > list, marks it free in the inobt and zeros the mode.
> > > 
> > > For shortform inodes, the fix is simple. xfs_ifree() clears the data
> > > fork state, so there's no need to do it in xfs_inactive_symlink().
> > > This means the shortform fork verifier will not see a zero length
> > > data fork as it mirrors the inode size through to xfs_ifree()), and
> > > hence if the inode gets written back and the fork verifiers are run
> > > they will still see a fork that matches the on-disk inode size.
> > > 
> > > For extent form (remote) symlinks, it is a little more tricky. Here
> > > we explicitly set the inode size to zero, so the above race can lead
> > > to zero length symlinks on disk. Because the inode is unlinked at
> > > this point (i.e. on the unlinked list) and unreferenced, it can
> > > never be seen again by a user. Hence when we set the inode size to
> > > zeor, also change the type to S_IFREG. xfs_ifree() expects S_IFREG
> > > inodes to be of zero length, and so this avoids all the problems of
> > > zero length symlinks ever hitting the disk. It also avoids the
> > > problem of needing to handle zero length symlink inodes in log
> > > recovery to replay the extent free intents and the remaining
> > > deferops to free the extents the symlink used.
> > > 
> > > Also add a couple of asserts to warn us if zero length symlinks end
> > > up in either the symlink create or inactivation paths.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > ---
> > 
> > Hmm, I saw this and thought this was something we had already fixed.
> > Looking back, I see this was actually posted[1] months ago and there was
> > a fairly nuanced discussion. On a quick skim, that appears to have
> > concluded with the patch being mostly sane, but requiring a couple minor
> > tweaks and commit log updates. This patch looks exactly the same to me,
> > however. Hm?
> 
> Hmmm, I did all that, months ago. The code barely changed, it was
> all just commit message updates. maybe I picked up the wrong version
> of the patch - it had been sitting around for a while. I had even
> gone back and checked the discussion before concluding that "it 
> doesn't need code changes" before adding it to this stack...
> 
> I'll go back and see if I can find a more recent version...
> 

It was mostly commit log changes except for an
s/xfs_difree()/xfs_ifree()/ needed in the code comment. IIRC, there was
additional context worth describing for log recovery in the commit log.

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

Patch
diff mbox series

diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 95374ab2dee7..77d80106f989 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -199,7 +199,10 @@  xfs_symlink_local_to_remote(
 					ifp->if_bytes - 1);
 }
 
-/* Verify the consistency of an inline symlink. */
+/*
+ * Verify the in-memory consistency of an inline symlink data fork. This
+ * does not do on-disk format checks.
+ */
 xfs_failaddr_t
 xfs_symlink_shortform_verify(
 	struct xfs_inode	*ip)
@@ -215,9 +218,12 @@  xfs_symlink_shortform_verify(
 	size = ifp->if_bytes;
 	endp = sfp + size;
 
-	/* Zero length symlinks can exist while we're deleting a remote one. */
-	if (size == 0)
-		return NULL;
+	/*
+	 * Zero length symlinks should never occur in memory as they are
+	 * never alllowed to exist on disk.
+	 */
+	if (!size)
+		return __this_address;
 
 	/* No negative sizes or overly long symlink targets. */
 	if (size < 0 || size > XFS_SYMLINK_MAXLEN)
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a3e98c64b6e3..b2c1177c717f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -192,6 +192,7 @@  xfs_symlink(
 	pathlen = strlen(target_path);
 	if (pathlen >= XFS_SYMLINK_MAXLEN)      /* total string too long */
 		return -ENAMETOOLONG;
+	ASSERT(pathlen > 0);
 
 	udqp = gdqp = NULL;
 	prid = xfs_get_initial_prid(dp);
@@ -378,6 +379,12 @@  xfs_symlink(
 
 /*
  * Free a symlink that has blocks associated with it.
+ *
+ * Note: zero length symlinks are not allowed to exist. When we set the size to
+ * zero, also change it to a regular file so that it does not get written to
+ * disk as a zero length symlink. The inode is on the unlinked list already, so
+ * userspace cannot find this inode anymore, so this change is not user visible
+ * but allows us to catch corrupt zero-length symlinks in the verifiers.
  */
 STATIC int
 xfs_inactive_symlink_rmt(
@@ -412,13 +419,14 @@  xfs_inactive_symlink_rmt(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	/*
-	 * Lock the inode, fix the size, and join it to the transaction.
-	 * Hold it so in the normal path, we still have it locked for
-	 * the second transaction.  In the error paths we need it
+	 * Lock the inode, fix the size, turn it into a regular file and join it
+	 * to the transaction.  Hold it so in the normal path, we still have it
+	 * locked for the second transaction.  In the error paths we need it
 	 * held so the cancel won't rele it, see below.
 	 */
 	size = (int)ip->i_d.di_size;
 	ip->i_d.di_size = 0;
+	VFS_I(ip)->i_mode = (VFS_I(ip)->i_mode & ~S_IFMT) | S_IFREG;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	/*
 	 * Find the block(s) so we can inval and unmap them.
@@ -494,17 +502,10 @@  xfs_inactive_symlink(
 		return -EIO;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Zero length symlinks _can_ exist.
-	 */
 	pathlen = (int)ip->i_d.di_size;
-	if (!pathlen) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		return 0;
-	}
+	ASSERT(pathlen);
 
-	if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
+	if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN) {
 		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
 			 __func__, (unsigned long long)ip->i_ino, pathlen);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -512,12 +513,12 @@  xfs_inactive_symlink(
 		return -EFSCORRUPTED;
 	}
 
+	/*
+	 * Inline fork state gets removed by xfs_difree() so we have nothing to
+	 * do here in that case.
+	 */
 	if (ip->i_df.if_flags & XFS_IFINLINE) {
-		if (ip->i_df.if_bytes > 0) 
-			xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
-					  XFS_DATA_FORK);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		ASSERT(ip->i_df.if_bytes == 0);
 		return 0;
 	}