[RFC,2/2] The new attribute leaf buffer is not held locked across the transaction
diff mbox

Message ID 20171130175810.GJ21412@magnolia
State New
Headers show

Commit Message

Darrick J. Wong Nov. 30, 2017, 5:58 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

The new attribute leaf buffer is not held locked across the transaction
roll between the shortform->leaf modification and the addition of the
new entry.  As a result, the attribute buffer modification being made is
not atomic from an operational perspective.  Hence the AIL push can grab
it in the transient state of "just created" after the initial
transaction is rolled, because the buffer has been released.  This leads
to xfs_attr3_leaf_verify() asserting that hdr.count is zero, treating
this as in-memory corruption, and shutting down the filesystem.

Darrick ported the original patch to 4.15 and reworked it use the
xfs_defer_bjoin helper and hold/join the buffer correctly across the
second transaction roll.

Signed-off-by: Alex Lyakas <alex@zadarastorage.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c      |   19 +++++++++++++++----
 fs/xfs/libxfs/xfs_attr_leaf.c |    9 ++++++---
 fs/xfs/libxfs/xfs_attr_leaf.h |    3 ++-
 3 files changed, 23 insertions(+), 8 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Brian Foster Dec. 1, 2017, 1:37 p.m. UTC | #1
On Thu, Nov 30, 2017 at 09:58:10AM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> The new attribute leaf buffer is not held locked across the transaction
> roll between the shortform->leaf modification and the addition of the
> new entry.  As a result, the attribute buffer modification being made is
> not atomic from an operational perspective.  Hence the AIL push can grab
> it in the transient state of "just created" after the initial
> transaction is rolled, because the buffer has been released.  This leads
> to xfs_attr3_leaf_verify() asserting that hdr.count is zero, treating
> this as in-memory corruption, and shutting down the filesystem.
> 
> Darrick ported the original patch to 4.15 and reworked it use the
> xfs_defer_bjoin helper and hold/join the buffer correctly across the
> second transaction roll.
> 
> Signed-off-by: Alex Lyakas <alex@zadarastorage.com>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_attr.c      |   19 +++++++++++++++----
>  fs/xfs/libxfs/xfs_attr_leaf.c |    9 ++++++---
>  fs/xfs/libxfs/xfs_attr_leaf.h |    3 ++-
>  3 files changed, 23 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> index 6249c92..dc7bcf7 100644
> --- a/fs/xfs/libxfs/xfs_attr.c
> +++ b/fs/xfs/libxfs/xfs_attr.c
> @@ -212,6 +212,7 @@ xfs_attr_set(
>  	int			flags)
>  {
>  	struct xfs_mount	*mp = dp->i_mount;
> +	struct xfs_buf		*leaf_bp = NULL;
>  	struct xfs_da_args	args;
>  	struct xfs_defer_ops	dfops;
>  	struct xfs_trans_res	tres;
> @@ -327,9 +328,15 @@ xfs_attr_set(
>  		 * GROT: another possible req'mt for a double-split btree op.
>  		 */
>  		xfs_defer_init(args.dfops, args.firstblock);
> -		error = xfs_attr_shortform_to_leaf(&args);
> +		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
>  		if (error)
>  			goto out_defer_cancel;
> +		/*
> +		 * Prevent the leaf buffer from being unlocked so that a
> +		 * concurrent AIL push cannot grab the half-baked leaf
> +		 * buffer and run into problems with the write verifier.
> +		 */
> +		xfs_defer_bjoin(args.dfops, leaf_bp);
>  		xfs_defer_ijoin(args.dfops, dp);
>  		error = xfs_defer_finish(&args.trans, args.dfops);
>  		if (error)
> @@ -337,13 +344,15 @@ xfs_attr_set(
>  
>  		/*
>  		 * Commit the leaf transformation.  We'll need another (linked)
> -		 * transaction to add the new attribute to the leaf.
> +		 * transaction to add the new attribute to the leaf, which
> +		 * means that we have to hold & join the leaf buffer here too.
>  		 */
> -
> +		xfs_trans_bhold(args.trans, leaf_bp);

If I follow correctly, I think the change to xfs_defer_trans_roll()
would mean we'd need to dirty the buffer here to relog it (rather than
bhold it). That is essentially what xfs_trans_roll_inode() does for the
inode, so we could refactor that function into a more generic helper
that also receives an optional bp to dirty in the tx and rejoin after
the roll. That would also eliminate the need for the bjoin call below
(though we should retain an update to the comment above to remind us of
the behavior ;).

Brian

>  		error = xfs_trans_roll_inode(&args.trans, dp);
>  		if (error)
>  			goto out;
> -
> +		xfs_trans_bjoin(args.trans, leaf_bp);
> +		leaf_bp = NULL;
>  	}
>  
>  	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
> @@ -376,6 +385,8 @@ xfs_attr_set(
>  	xfs_defer_cancel(&dfops);
>  	args.trans = NULL;
>  out:
> +	if (leaf_bp)
> +		xfs_trans_brelse(args.trans, leaf_bp);
>  	if (args.trans)
>  		xfs_trans_cancel(args.trans);
>  	xfs_iunlock(dp, XFS_ILOCK_EXCL);
> diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
> index 8e59375..ebfa477 100644
> --- a/fs/xfs/libxfs/xfs_attr_leaf.c
> +++ b/fs/xfs/libxfs/xfs_attr_leaf.c
> @@ -739,10 +739,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
>  }
>  
>  /*
> - * Convert from using the shortform to the leaf.
> + * Convert from using the shortform to the leaf.  On success, return the
> + * buffer so that we can keep it locked until we're totally done with it.
>   */
>  int
> -xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
> +xfs_attr_shortform_to_leaf(
> +	struct xfs_da_args	*args,
> +	struct xfs_buf		**leaf_bp)
>  {
>  	xfs_inode_t *dp;
>  	xfs_attr_shortform_t *sf;
> @@ -822,7 +825,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
>  		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
>  	}
>  	error = 0;
> -
> +	*leaf_bp = bp;
>  out:
>  	kmem_free(tmpbuffer);
>  	return error;
> diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
> index c97f1fe..4da08af 100644
> --- a/fs/xfs/libxfs/xfs_attr_leaf.h
> +++ b/fs/xfs/libxfs/xfs_attr_leaf.h
> @@ -48,7 +48,8 @@ void	xfs_attr_shortform_create(struct xfs_da_args *args);
>  void	xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
>  int	xfs_attr_shortform_lookup(struct xfs_da_args *args);
>  int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
> -int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
> +int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
> +			struct xfs_buf **leaf_bp);
>  int	xfs_attr_shortform_remove(struct xfs_da_args *args);
>  int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
>  int	xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92..dc7bcf7 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@  xfs_attr_set(
 	int			flags)
 {
 	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_buf		*leaf_bp = NULL;
 	struct xfs_da_args	args;
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans_res	tres;
@@ -327,9 +328,15 @@  xfs_attr_set(
 		 * GROT: another possible req'mt for a double-split btree op.
 		 */
 		xfs_defer_init(args.dfops, args.firstblock);
-		error = xfs_attr_shortform_to_leaf(&args);
+		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
 		if (error)
 			goto out_defer_cancel;
+		/*
+		 * Prevent the leaf buffer from being unlocked so that a
+		 * concurrent AIL push cannot grab the half-baked leaf
+		 * buffer and run into problems with the write verifier.
+		 */
+		xfs_defer_bjoin(args.dfops, leaf_bp);
 		xfs_defer_ijoin(args.dfops, dp);
 		error = xfs_defer_finish(&args.trans, args.dfops);
 		if (error)
@@ -337,13 +344,15 @@  xfs_attr_set(
 
 		/*
 		 * Commit the leaf transformation.  We'll need another (linked)
-		 * transaction to add the new attribute to the leaf.
+		 * transaction to add the new attribute to the leaf, which
+		 * means that we have to hold & join the leaf buffer here too.
 		 */
-
+		xfs_trans_bhold(args.trans, leaf_bp);
 		error = xfs_trans_roll_inode(&args.trans, dp);
 		if (error)
 			goto out;
-
+		xfs_trans_bjoin(args.trans, leaf_bp);
+		leaf_bp = NULL;
 	}
 
 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -376,6 +385,8 @@  xfs_attr_set(
 	xfs_defer_cancel(&dfops);
 	args.trans = NULL;
 out:
+	if (leaf_bp)
+		xfs_trans_brelse(args.trans, leaf_bp);
 	if (args.trans)
 		xfs_trans_cancel(args.trans);
 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 8e59375..ebfa477 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -739,10 +739,13 @@  xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 }
 
 /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
  */
 int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+	struct xfs_da_args	*args,
+	struct xfs_buf		**leaf_bp)
 {
 	xfs_inode_t *dp;
 	xfs_attr_shortform_t *sf;
@@ -822,7 +825,7 @@  xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
 	}
 	error = 0;
-
+	*leaf_bp = bp;
 out:
 	kmem_free(tmpbuffer);
 	return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index c97f1fe..4da08af 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@  void	xfs_attr_shortform_create(struct xfs_da_args *args);
 void	xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int	xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int	xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int	xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+			struct xfs_buf **leaf_bp);
 int	xfs_attr_shortform_remove(struct xfs_da_args *args);
 int	xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int	xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);