[v4] xfs: Fix deadlock between AGI and AGF when target_ip exists in xfs_rename()
diff mbox series

Message ID 1573557210-6241-1-git-send-email-kaixuxia@tencent.com
State Accepted
Headers show
Series
  • [v4] xfs: Fix deadlock between AGI and AGF when target_ip exists in xfs_rename()
Related show

Commit Message

kaixuxia Nov. 12, 2019, 11:13 a.m. UTC
When target_ip exists in xfs_rename(), the xfs_dir_replace() call may
need to hold the AGF lock to allocate more blocks, and then invoking
the xfs_droplink() call to hold AGI lock to drop target_ip onto the
unlinked list, so we get the lock order AGF->AGI. This would break the
ordering constraint on AGI and AGF locking - inode allocation locks
the AGI, then can allocate a new extent for new inodes, locking the
AGF after the AGI.

In this patch we check whether the replace operation need more
blocks firstly. If so, acquire the agi lock firstly to preserve
locking order(AGI/AGF). Actually, the locking order problem only
occurs when we are locking the AGI/AGF of the same AG. For multiple
AGs the AGI lock will be released after the transaction committed.

Signed-off-by: kaixuxia <kaixuxia@tencent.com>
---
Changes in v4:
 -Remove the typedef usages.
 -Invoke xfs_dir2_sf_replace_needblock() in
  xfs_dir2_sf_replace() directly.

 fs/xfs/libxfs/xfs_dir2.h    |  2 ++
 fs/xfs/libxfs/xfs_dir2_sf.c | 28 +++++++++++++++++++++++-----
 fs/xfs/xfs_inode.c          | 15 +++++++++++++++
 3 files changed, 40 insertions(+), 5 deletions(-)

Comments

Darrick J. Wong Nov. 12, 2019, 4:34 p.m. UTC | #1
On Tue, Nov 12, 2019 at 07:13:30PM +0800, kaixuxia wrote:
> When target_ip exists in xfs_rename(), the xfs_dir_replace() call may
> need to hold the AGF lock to allocate more blocks, and then invoking
> the xfs_droplink() call to hold AGI lock to drop target_ip onto the
> unlinked list, so we get the lock order AGF->AGI. This would break the
> ordering constraint on AGI and AGF locking - inode allocation locks
> the AGI, then can allocate a new extent for new inodes, locking the
> AGF after the AGI.
> 
> In this patch we check whether the replace operation need more
> blocks firstly. If so, acquire the agi lock firstly to preserve
> locking order(AGI/AGF). Actually, the locking order problem only
> occurs when we are locking the AGI/AGF of the same AG. For multiple
> AGs the AGI lock will be released after the transaction committed.
> 
> Signed-off-by: kaixuxia <kaixuxia@tencent.com>
> ---
> Changes in v4:
>  -Remove the typedef usages.
>  -Invoke xfs_dir2_sf_replace_needblock() in
>   xfs_dir2_sf_replace() directly.
> 
>  fs/xfs/libxfs/xfs_dir2.h    |  2 ++
>  fs/xfs/libxfs/xfs_dir2_sf.c | 28 +++++++++++++++++++++++-----
>  fs/xfs/xfs_inode.c          | 15 +++++++++++++++
>  3 files changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
> index f542447..01b1722 100644
> --- a/fs/xfs/libxfs/xfs_dir2.h
> +++ b/fs/xfs/libxfs/xfs_dir2.h
> @@ -124,6 +124,8 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
>  extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
>  				struct xfs_name *name, xfs_ino_t ino,
>  				xfs_extlen_t tot);
> +extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp,
> +				xfs_ino_t inum);
>  extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
>  				struct xfs_name *name, xfs_ino_t inum,
>  				xfs_extlen_t tot);
> diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
> index 85f14fc..0e112e1 100644
> --- a/fs/xfs/libxfs/xfs_dir2_sf.c
> +++ b/fs/xfs/libxfs/xfs_dir2_sf.c
> @@ -945,6 +945,27 @@ static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
>  }
>  
>  /*
> + * Check whether the sf dir replace operation need more blocks.
> + */
> +bool
> +xfs_dir2_sf_replace_needblock(
> +	struct xfs_inode	*dp,
> +	xfs_ino_t		inum)
> +{
> +	int			newsize;
> +	struct xfs_dir2_sf_hdr	*sfp;
> +
> +	if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> +		return false;
> +
> +	sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
> +	newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
> +
> +	return inum > XFS_DIR2_MAX_SHORT_INUM &&
> +	       sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp);
> +}
> +
> +/*
>   * Replace the inode number of an entry in a shortform directory.
>   */
>  int						/* error */
> @@ -980,17 +1001,14 @@ static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
>  	 */
>  	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
>  		int	error;			/* error return value */
> -		int	newsize;		/* new inode size */
>  
> -		newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
>  		/*
>  		 * Won't fit as shortform, convert to block then do replace.
>  		 */
> -		if (newsize > XFS_IFORK_DSIZE(dp)) {
> +		if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) {
>  			error = xfs_dir2_sf_to_block(args);
> -			if (error) {
> +			if (error)
>  				return error;
> -			}
>  			return xfs_dir2_block_replace(args);
>  		}
>  		/*
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 18f4b26..5dc3796 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3196,6 +3196,7 @@ struct xfs_iunlink {
>  	struct xfs_trans	*tp;
>  	struct xfs_inode	*wip = NULL;		/* whiteout inode */
>  	struct xfs_inode	*inodes[__XFS_SORT_INODES];
> +	struct xfs_buf		*agibp;
>  	int			num_inodes = __XFS_SORT_INODES;
>  	bool			new_parent = (src_dp != target_dp);
>  	bool			src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
> @@ -3361,6 +3362,20 @@ struct xfs_iunlink {
>  		 * In case there is already an entry with the same
>  		 * name at the destination directory, remove it first.
>  		 */
> +
> +		/*
> +		 * Check whether the replace operation need more blocks.
> +		 * If so, acquire the agi lock firstly to preserve locking
> +		 * order (AGI/AGF). Only convert the shortform directory to
> +		 * block form maybe need more blocks.

The comment still seems a little clunky.  How about:

"Check whether the replace operation will need to allocate blocks.  This
happens when the shortform directory lacks space and we have to convert
it to a block format directory.  When more blocks are necessary we must
lock the AGI first to preserve locking order (AGI -> AGF)."

> +		 */
> +		if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) {
> +			error = xfs_read_agi(mp, tp,
> +				XFS_INO_TO_AGNO(mp, target_ip->i_ino), &agibp);

The second line needs a double indent.

I can fix both of these on commit if Brian doesn't have any further
suggestions.

Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> +			if (error)
> +				goto out_trans_cancel;
> +		}
> +
>  		error = xfs_dir_replace(tp, target_dp, target_name,
>  					src_ip->i_ino, spaceres);
>  		if (error)
> -- 
> 1.8.3.1
>
Brian Foster Nov. 12, 2019, 4:59 p.m. UTC | #2
On Tue, Nov 12, 2019 at 08:34:14AM -0800, Darrick J. Wong wrote:
> On Tue, Nov 12, 2019 at 07:13:30PM +0800, kaixuxia wrote:
> > When target_ip exists in xfs_rename(), the xfs_dir_replace() call may
> > need to hold the AGF lock to allocate more blocks, and then invoking
> > the xfs_droplink() call to hold AGI lock to drop target_ip onto the
> > unlinked list, so we get the lock order AGF->AGI. This would break the
> > ordering constraint on AGI and AGF locking - inode allocation locks
> > the AGI, then can allocate a new extent for new inodes, locking the
> > AGF after the AGI.
> > 
> > In this patch we check whether the replace operation need more
> > blocks firstly. If so, acquire the agi lock firstly to preserve
> > locking order(AGI/AGF). Actually, the locking order problem only
> > occurs when we are locking the AGI/AGF of the same AG. For multiple
> > AGs the AGI lock will be released after the transaction committed.
> > 
> > Signed-off-by: kaixuxia <kaixuxia@tencent.com>
> > ---
> > Changes in v4:
> >  -Remove the typedef usages.
> >  -Invoke xfs_dir2_sf_replace_needblock() in
> >   xfs_dir2_sf_replace() directly.
> > 
> >  fs/xfs/libxfs/xfs_dir2.h    |  2 ++
> >  fs/xfs/libxfs/xfs_dir2_sf.c | 28 +++++++++++++++++++++++-----
> >  fs/xfs/xfs_inode.c          | 15 +++++++++++++++
> >  3 files changed, 40 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
> > index f542447..01b1722 100644
> > --- a/fs/xfs/libxfs/xfs_dir2.h
> > +++ b/fs/xfs/libxfs/xfs_dir2.h
> > @@ -124,6 +124,8 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
> >  extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
> >  				struct xfs_name *name, xfs_ino_t ino,
> >  				xfs_extlen_t tot);
> > +extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp,
> > +				xfs_ino_t inum);
> >  extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
> >  				struct xfs_name *name, xfs_ino_t inum,
> >  				xfs_extlen_t tot);
> > diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
> > index 85f14fc..0e112e1 100644
> > --- a/fs/xfs/libxfs/xfs_dir2_sf.c
> > +++ b/fs/xfs/libxfs/xfs_dir2_sf.c
> > @@ -945,6 +945,27 @@ static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
> >  }
> >  
> >  /*
> > + * Check whether the sf dir replace operation need more blocks.
> > + */
> > +bool
> > +xfs_dir2_sf_replace_needblock(
> > +	struct xfs_inode	*dp,
> > +	xfs_ino_t		inum)
> > +{
> > +	int			newsize;
> > +	struct xfs_dir2_sf_hdr	*sfp;
> > +
> > +	if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL)
> > +		return false;
> > +
> > +	sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
> > +	newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
> > +
> > +	return inum > XFS_DIR2_MAX_SHORT_INUM &&
> > +	       sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp);
> > +}
> > +
> > +/*
> >   * Replace the inode number of an entry in a shortform directory.
> >   */
> >  int						/* error */
> > @@ -980,17 +1001,14 @@ static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
> >  	 */
> >  	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
> >  		int	error;			/* error return value */
> > -		int	newsize;		/* new inode size */
> >  
> > -		newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
> >  		/*
> >  		 * Won't fit as shortform, convert to block then do replace.
> >  		 */
> > -		if (newsize > XFS_IFORK_DSIZE(dp)) {
> > +		if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) {
> >  			error = xfs_dir2_sf_to_block(args);
> > -			if (error) {
> > +			if (error)
> >  				return error;
> > -			}
> >  			return xfs_dir2_block_replace(args);
> >  		}
> >  		/*
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index 18f4b26..5dc3796 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3196,6 +3196,7 @@ struct xfs_iunlink {
> >  	struct xfs_trans	*tp;
> >  	struct xfs_inode	*wip = NULL;		/* whiteout inode */
> >  	struct xfs_inode	*inodes[__XFS_SORT_INODES];
> > +	struct xfs_buf		*agibp;
> >  	int			num_inodes = __XFS_SORT_INODES;
> >  	bool			new_parent = (src_dp != target_dp);
> >  	bool			src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
> > @@ -3361,6 +3362,20 @@ struct xfs_iunlink {
> >  		 * In case there is already an entry with the same
> >  		 * name at the destination directory, remove it first.
> >  		 */
> > +
> > +		/*
> > +		 * Check whether the replace operation need more blocks.
> > +		 * If so, acquire the agi lock firstly to preserve locking
> > +		 * order (AGI/AGF). Only convert the shortform directory to
> > +		 * block form maybe need more blocks.
> 
> The comment still seems a little clunky.  How about:
> 
> "Check whether the replace operation will need to allocate blocks.  This
> happens when the shortform directory lacks space and we have to convert
> it to a block format directory.  When more blocks are necessary we must
> lock the AGI first to preserve locking order (AGI -> AGF)."
> 
> > +		 */
> > +		if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) {
> > +			error = xfs_read_agi(mp, tp,
> > +				XFS_INO_TO_AGNO(mp, target_ip->i_ino), &agibp);
> 
> The second line needs a double indent.
> 
> I can fix both of these on commit if Brian doesn't have any further
> suggestions.
> 
> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> 

I was hoping for a little more cleanup of the existing
xfs_dir2_sf_replace() logic, but that can always come later. This looks
correct to me and we might as well get this bug fixed. With Darrick's
adjustments:

Reviewed-by: Brian Foster <bfoster@redhat.com>

> --D
> 
> > +			if (error)
> > +				goto out_trans_cancel;
> > +		}
> > +
> >  		error = xfs_dir_replace(tp, target_dp, target_name,
> >  					src_ip->i_ino, spaceres);
> >  		if (error)
> > -- 
> > 1.8.3.1
> > 
>

Patch
diff mbox series

diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index f542447..01b1722 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -124,6 +124,8 @@  extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t ino,
 				xfs_extlen_t tot);
+extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp,
+				xfs_ino_t inum);
 extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t inum,
 				xfs_extlen_t tot);
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 85f14fc..0e112e1 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -945,6 +945,27 @@  static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
 }
 
 /*
+ * Check whether the sf dir replace operation need more blocks.
+ */
+bool
+xfs_dir2_sf_replace_needblock(
+	struct xfs_inode	*dp,
+	xfs_ino_t		inum)
+{
+	int			newsize;
+	struct xfs_dir2_sf_hdr	*sfp;
+
+	if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+		return false;
+
+	sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
+	newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
+
+	return inum > XFS_DIR2_MAX_SHORT_INUM &&
+	       sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp);
+}
+
+/*
  * Replace the inode number of an entry in a shortform directory.
  */
 int						/* error */
@@ -980,17 +1001,14 @@  static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
 	 */
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
 		int	error;			/* error return value */
-		int	newsize;		/* new inode size */
 
-		newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
 		/*
 		 * Won't fit as shortform, convert to block then do replace.
 		 */
-		if (newsize > XFS_IFORK_DSIZE(dp)) {
+		if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) {
 			error = xfs_dir2_sf_to_block(args);
-			if (error) {
+			if (error)
 				return error;
-			}
 			return xfs_dir2_block_replace(args);
 		}
 		/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 18f4b26..5dc3796 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3196,6 +3196,7 @@  struct xfs_iunlink {
 	struct xfs_trans	*tp;
 	struct xfs_inode	*wip = NULL;		/* whiteout inode */
 	struct xfs_inode	*inodes[__XFS_SORT_INODES];
+	struct xfs_buf		*agibp;
 	int			num_inodes = __XFS_SORT_INODES;
 	bool			new_parent = (src_dp != target_dp);
 	bool			src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
@@ -3361,6 +3362,20 @@  struct xfs_iunlink {
 		 * In case there is already an entry with the same
 		 * name at the destination directory, remove it first.
 		 */
+
+		/*
+		 * Check whether the replace operation need more blocks.
+		 * If so, acquire the agi lock firstly to preserve locking
+		 * order (AGI/AGF). Only convert the shortform directory to
+		 * block form maybe need more blocks.
+		 */
+		if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) {
+			error = xfs_read_agi(mp, tp,
+				XFS_INO_TO_AGNO(mp, target_ip->i_ino), &agibp);
+			if (error)
+				goto out_trans_cancel;
+		}
+
 		error = xfs_dir_replace(tp, target_dp, target_name,
 					src_ip->i_ino, spaceres);
 		if (error)