diff mbox series

[v4,16/17] xfs: Add delay ready attr remove routines

Message ID 20191107012801.22863-17-allison.henderson@oracle.com (mailing list archive)
State Superseded
Headers show
Series xfs: Delay Ready Attributes | expand

Commit Message

Allison Henderson Nov. 7, 2019, 1:28 a.m. UTC
This patch modifies the attr remove routines to be delay ready.
This means they no longer roll or commit transactions, but instead
return -EAGAIN to have the calling routine roll and refresh the
transaction.  In this series, xfs_attr_remove_args has become
xfs_attr_remove_later, which uses a state machine to keep track
of where it was when EAGAIN was returned.  xfs_attr_node_removename
has also been modified to use the state machine, and a  new version of
xfs_attr_remove_args consists of a simple loop to refresh the
transaction until the operation is completed.

Signed-off-by: Allison Collins <allison.henderson@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
 fs/xfs/libxfs/xfs_attr.h |   1 +
 2 files changed, 104 insertions(+), 20 deletions(-)

Comments

Darrick J. Wong Nov. 8, 2019, 9:37 p.m. UTC | #1
On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
> This patch modifies the attr remove routines to be delay ready.
> This means they no longer roll or commit transactions, but instead
> return -EAGAIN to have the calling routine roll and refresh the
> transaction.  In this series, xfs_attr_remove_args has become
> xfs_attr_remove_later, which uses a state machine to keep track
> of where it was when EAGAIN was returned.  xfs_attr_node_removename
> has also been modified to use the state machine, and a  new version of
> xfs_attr_remove_args consists of a simple loop to refresh the
> transaction until the operation is completed.
> 
> Signed-off-by: Allison Collins <allison.henderson@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
>  fs/xfs/libxfs/xfs_attr.h |   1 +
>  2 files changed, 104 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> index 626d4a98..38d5c5c 100644
> --- a/fs/xfs/libxfs/xfs_attr.c
> +++ b/fs/xfs/libxfs/xfs_attr.c
> @@ -369,10 +369,56 @@ xfs_has_attr(
>   */
>  int
>  xfs_attr_remove_args(
> +	struct xfs_da_args	*args)
> +{
> +	int			error = 0;
> +	int			err2 = 0;
> +
> +	do {
> +		error = xfs_attr_remove_later(args);
> +		if (error && error != -EAGAIN)
> +			goto out;
> +
> +		xfs_trans_log_inode(args->trans, args->dp,
> +			XFS_ILOG_CORE | XFS_ILOG_ADATA);

Don't the individual pieces of attr removal log ADATA on their own, when
needed?

If it weren't for that, this could just be xfs_trans_roll_inode, right?

--D

> +
> +		err2 = xfs_trans_roll(&args->trans);
> +		if (err2) {
> +			error = err2;
> +			goto out;
> +		}
> +
> +		/* Rejoin inode */
> +		xfs_trans_ijoin(args->trans, args->dp, 0);
> +
> +	} while (error == -EAGAIN);
> +out:
> +	return error;
> +}
> +
> +/*
> + * Remove the attribute specified in @args.
> + * This routine is meant to function as a delayed operation, and may return
> + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
> + * to handle this, and recall the function until a successful error code is
> + * returned.
> + */
> +int
> +xfs_attr_remove_later(
>  	struct xfs_da_args      *args)
>  {
>  	struct xfs_inode	*dp = args->dp;
> -	int			error;
> +	int			error = 0;
> +
> +	/* State machine switch */
> +	switch (args->dc.dc_state) {
> +	case XFS_DC_RM_INVALIDATE:
> +	case XFS_DC_RM_SHRINK:
> +	case XFS_DC_RM_NODE_BLKS:
> +		goto node;
> +	default:
> +		break;
> +	}
>  
>  	if (!xfs_inode_hasattr(dp)) {
>  		error = -ENOATTR;
> @@ -382,6 +428,7 @@ xfs_attr_remove_args(
>  	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
>  		error = xfs_attr_leaf_removename(args);
>  	} else {
> +node:
>  		error = xfs_attr_node_removename(args);
>  	}
>  
> @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
>  		/* bp is gone due to xfs_da_shrink_inode */
>  		if (error)
>  			return error;
> -		error = xfs_defer_finish(&args->trans);
> -		if (error)
> -			return error;
>  	}
>  	return 0;
>  }
> @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
>   * This will involve walking down the Btree, and may involve joining
>   * leaf nodes and even joining intermediate nodes up to and including
>   * the root node (a special case of an intermediate node).
> + *
> + * This routine is meant to function as a delayed operation, and may return
> + * -EAGAIN when the transaction needs to be rolled.  Calling functions
> + * will need to handle this, and recall the function until a successful error
> + * code is returned.
>   */
>  STATIC int
>  xfs_attr_node_removename(
> @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
>  	struct xfs_buf		*bp;
>  	int			retval, error, forkoff;
>  	struct xfs_inode	*dp = args->dp;
> +	int			done = 0;
>  
>  	trace_xfs_attr_node_removename(args);
> +	state = args->dc.da_state;
> +	blk = args->dc.blk;
> +
> +	/* State machine switch */
> +	switch (args->dc.dc_state) {
> +	case XFS_DC_RM_NODE_BLKS:
> +		goto rm_node_blks;
> +	case XFS_DC_RM_INVALIDATE:
> +		goto rm_invalidate;
> +	case XFS_DC_RM_SHRINK:
> +		goto rm_shrink;
> +	default:
> +		break;
> +	}
>  
>  	error = xfs_attr_node_hasname(args, &state);
>  	if (error != -EEXIST)
>  		goto out;
> +	else
> +		error = 0;
>  
>  	/*
>  	 * If there is an out-of-line value, de-allocate the blocks.
> @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
>  	blk = &state->path.blk[ state->path.active-1 ];
>  	ASSERT(blk->bp != NULL);
>  	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
> +
> +	/*
> +	 * Store blk and state in the context incase we need to cycle out the
> +	 * transaction
> +	 */
> +	args->dc.blk = blk;
> +	args->dc.da_state = state;
> +
>  	if (args->rmtblkno > 0) {
>  		/*
>  		 * Fill in disk block numbers in the state structure
> @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
>  		if (error)
>  			goto out;
>  
> -		error = xfs_trans_roll_inode(&args->trans, args->dp);
> +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
> +		return -EAGAIN;
> +rm_invalidate:
> +		error = xfs_attr_rmtval_invalidate(args);
>  		if (error)
>  			goto out;
> +rm_node_blks:
> +		/*
> +		 * Unmap value blocks for this attr.  This is similar to
> +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
> +		 * for new transactions
> +		 */
> +		while (!done && !error) {
> +			error = xfs_bunmapi(args->trans, args->dp,
> +				    args->rmtblkno, args->rmtblkcnt,
> +				    XFS_BMAPI_ATTRFORK, 1, &done);
> +			if (error)
> +				return error;
>  
> -		error = xfs_attr_rmtval_remove(args);
> -		if (error)
> -			goto out;
> +			if (!done) {
> +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
> +				return -EAGAIN;
> +			}
> +		}
>  
>  		/*
>  		 * Refill the state structure with buffers, the prior calls
> @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
>  		error = xfs_da3_join(state);
>  		if (error)
>  			goto out;
> -		error = xfs_defer_finish(&args->trans);
> -		if (error)
> -			goto out;
> -		/*
> -		 * Commit the Btree join operation and start a new trans.
> -		 */
> -		error = xfs_trans_roll_inode(&args->trans, dp);
> -		if (error)
> -			goto out;
> +
> +		args->dc.dc_state = XFS_DC_RM_SHRINK;
> +		return -EAGAIN;
>  	}
>  
> +rm_shrink:
>  	/*
>  	 * If the result is small enough, push it all into the inode.
>  	 */
> @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
>  			/* bp is gone due to xfs_da_shrink_inode */
>  			if (error)
>  				goto out;
> -			error = xfs_defer_finish(&args->trans);
> -			if (error)
> -				goto out;
>  		} else
>  			xfs_trans_brelse(args->trans, bp);
>  	}
> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
> index 3b5dad4..fb8bf5b 100644
> --- a/fs/xfs/libxfs/xfs_attr.h
> +++ b/fs/xfs/libxfs/xfs_attr.h
> @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
>  int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
>  int xfs_has_attr(struct xfs_da_args *args);
>  int xfs_attr_remove_args(struct xfs_da_args *args);
> +int xfs_attr_remove_later(struct xfs_da_args *args);
>  int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
>  		  int flags, struct attrlist_cursor_kern *cursor);
>  bool xfs_attr_namecheck(const void *name, size_t length);
> -- 
> 2.7.4
>
Allison Henderson Nov. 9, 2019, 12:25 a.m. UTC | #2
On 11/8/19 2:37 PM, Darrick J. Wong wrote:
> On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
>> This patch modifies the attr remove routines to be delay ready.
>> This means they no longer roll or commit transactions, but instead
>> return -EAGAIN to have the calling routine roll and refresh the
>> transaction.  In this series, xfs_attr_remove_args has become
>> xfs_attr_remove_later, which uses a state machine to keep track
>> of where it was when EAGAIN was returned.  xfs_attr_node_removename
>> has also been modified to use the state machine, and a  new version of
>> xfs_attr_remove_args consists of a simple loop to refresh the
>> transaction until the operation is completed.
>>
>> Signed-off-by: Allison Collins <allison.henderson@oracle.com>
>> ---
>>   fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
>>   fs/xfs/libxfs/xfs_attr.h |   1 +
>>   2 files changed, 104 insertions(+), 20 deletions(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
>> index 626d4a98..38d5c5c 100644
>> --- a/fs/xfs/libxfs/xfs_attr.c
>> +++ b/fs/xfs/libxfs/xfs_attr.c
>> @@ -369,10 +369,56 @@ xfs_has_attr(
>>    */
>>   int
>>   xfs_attr_remove_args(
>> +	struct xfs_da_args	*args)
>> +{
>> +	int			error = 0;
>> +	int			err2 = 0;
>> +
>> +	do {
>> +		error = xfs_attr_remove_later(args);
>> +		if (error && error != -EAGAIN)
>> +			goto out;
>> +
>> +		xfs_trans_log_inode(args->trans, args->dp,
>> +			XFS_ILOG_CORE | XFS_ILOG_ADATA);
> 
> Don't the individual pieces of attr removal log ADATA on their own, when
> needed?
> 
> If it weren't for that, this could just be xfs_trans_roll_inode, right?

Oh i see.  I will see if I can simplify that.  Thanks!
Allison

> 
> --D
> 
>> +
>> +		err2 = xfs_trans_roll(&args->trans);
>> +		if (err2) {
>> +			error = err2;
>> +			goto out;
>> +		}
>> +
>> +		/* Rejoin inode */
>> +		xfs_trans_ijoin(args->trans, args->dp, 0);
>> +
>> +	} while (error == -EAGAIN);
>> +out:
>> +	return error;
>> +}
>> +
>> +/*
>> + * Remove the attribute specified in @args.
>> + * This routine is meant to function as a delayed operation, and may return
>> + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
>> + * to handle this, and recall the function until a successful error code is
>> + * returned.
>> + */
>> +int
>> +xfs_attr_remove_later(
>>   	struct xfs_da_args      *args)
>>   {
>>   	struct xfs_inode	*dp = args->dp;
>> -	int			error;
>> +	int			error = 0;
>> +
>> +	/* State machine switch */
>> +	switch (args->dc.dc_state) {
>> +	case XFS_DC_RM_INVALIDATE:
>> +	case XFS_DC_RM_SHRINK:
>> +	case XFS_DC_RM_NODE_BLKS:
>> +		goto node;
>> +	default:
>> +		break;
>> +	}
>>   
>>   	if (!xfs_inode_hasattr(dp)) {
>>   		error = -ENOATTR;
>> @@ -382,6 +428,7 @@ xfs_attr_remove_args(
>>   	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
>>   		error = xfs_attr_leaf_removename(args);
>>   	} else {
>> +node:
>>   		error = xfs_attr_node_removename(args);
>>   	}
>>   
>> @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
>>   		/* bp is gone due to xfs_da_shrink_inode */
>>   		if (error)
>>   			return error;
>> -		error = xfs_defer_finish(&args->trans);
>> -		if (error)
>> -			return error;
>>   	}
>>   	return 0;
>>   }
>> @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
>>    * This will involve walking down the Btree, and may involve joining
>>    * leaf nodes and even joining intermediate nodes up to and including
>>    * the root node (a special case of an intermediate node).
>> + *
>> + * This routine is meant to function as a delayed operation, and may return
>> + * -EAGAIN when the transaction needs to be rolled.  Calling functions
>> + * will need to handle this, and recall the function until a successful error
>> + * code is returned.
>>    */
>>   STATIC int
>>   xfs_attr_node_removename(
>> @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
>>   	struct xfs_buf		*bp;
>>   	int			retval, error, forkoff;
>>   	struct xfs_inode	*dp = args->dp;
>> +	int			done = 0;
>>   
>>   	trace_xfs_attr_node_removename(args);
>> +	state = args->dc.da_state;
>> +	blk = args->dc.blk;
>> +
>> +	/* State machine switch */
>> +	switch (args->dc.dc_state) {
>> +	case XFS_DC_RM_NODE_BLKS:
>> +		goto rm_node_blks;
>> +	case XFS_DC_RM_INVALIDATE:
>> +		goto rm_invalidate;
>> +	case XFS_DC_RM_SHRINK:
>> +		goto rm_shrink;
>> +	default:
>> +		break;
>> +	}
>>   
>>   	error = xfs_attr_node_hasname(args, &state);
>>   	if (error != -EEXIST)
>>   		goto out;
>> +	else
>> +		error = 0;
>>   
>>   	/*
>>   	 * If there is an out-of-line value, de-allocate the blocks.
>> @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
>>   	blk = &state->path.blk[ state->path.active-1 ];
>>   	ASSERT(blk->bp != NULL);
>>   	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
>> +
>> +	/*
>> +	 * Store blk and state in the context incase we need to cycle out the
>> +	 * transaction
>> +	 */
>> +	args->dc.blk = blk;
>> +	args->dc.da_state = state;
>> +
>>   	if (args->rmtblkno > 0) {
>>   		/*
>>   		 * Fill in disk block numbers in the state structure
>> @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
>>   		if (error)
>>   			goto out;
>>   
>> -		error = xfs_trans_roll_inode(&args->trans, args->dp);
>> +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
>> +		return -EAGAIN;
>> +rm_invalidate:
>> +		error = xfs_attr_rmtval_invalidate(args);
>>   		if (error)
>>   			goto out;
>> +rm_node_blks:
>> +		/*
>> +		 * Unmap value blocks for this attr.  This is similar to
>> +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
>> +		 * for new transactions
>> +		 */
>> +		while (!done && !error) {
>> +			error = xfs_bunmapi(args->trans, args->dp,
>> +				    args->rmtblkno, args->rmtblkcnt,
>> +				    XFS_BMAPI_ATTRFORK, 1, &done);
>> +			if (error)
>> +				return error;
>>   
>> -		error = xfs_attr_rmtval_remove(args);
>> -		if (error)
>> -			goto out;
>> +			if (!done) {
>> +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
>> +				return -EAGAIN;
>> +			}
>> +		}
>>   
>>   		/*
>>   		 * Refill the state structure with buffers, the prior calls
>> @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
>>   		error = xfs_da3_join(state);
>>   		if (error)
>>   			goto out;
>> -		error = xfs_defer_finish(&args->trans);
>> -		if (error)
>> -			goto out;
>> -		/*
>> -		 * Commit the Btree join operation and start a new trans.
>> -		 */
>> -		error = xfs_trans_roll_inode(&args->trans, dp);
>> -		if (error)
>> -			goto out;
>> +
>> +		args->dc.dc_state = XFS_DC_RM_SHRINK;
>> +		return -EAGAIN;
>>   	}
>>   
>> +rm_shrink:
>>   	/*
>>   	 * If the result is small enough, push it all into the inode.
>>   	 */
>> @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
>>   			/* bp is gone due to xfs_da_shrink_inode */
>>   			if (error)
>>   				goto out;
>> -			error = xfs_defer_finish(&args->trans);
>> -			if (error)
>> -				goto out;
>>   		} else
>>   			xfs_trans_brelse(args->trans, bp);
>>   	}
>> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
>> index 3b5dad4..fb8bf5b 100644
>> --- a/fs/xfs/libxfs/xfs_attr.h
>> +++ b/fs/xfs/libxfs/xfs_attr.h
>> @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
>>   int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
>>   int xfs_has_attr(struct xfs_da_args *args);
>>   int xfs_attr_remove_args(struct xfs_da_args *args);
>> +int xfs_attr_remove_later(struct xfs_da_args *args);
>>   int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
>>   		  int flags, struct attrlist_cursor_kern *cursor);
>>   bool xfs_attr_namecheck(const void *name, size_t length);
>> -- 
>> 2.7.4
>>
Brian Foster Nov. 12, 2019, 1:37 p.m. UTC | #3
On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
> This patch modifies the attr remove routines to be delay ready.
> This means they no longer roll or commit transactions, but instead
> return -EAGAIN to have the calling routine roll and refresh the
> transaction.  In this series, xfs_attr_remove_args has become
> xfs_attr_remove_later, which uses a state machine to keep track
> of where it was when EAGAIN was returned.  xfs_attr_node_removename
> has also been modified to use the state machine, and a  new version of
> xfs_attr_remove_args consists of a simple loop to refresh the
> transaction until the operation is completed.
> 
> Signed-off-by: Allison Collins <allison.henderson@oracle.com>
> ---

On a cursory look, this is definitely more along the lines of what I was
thinking on the previous revisions. I would like to see if we can get a
bit more refactoring/cleanup before this point though. Further thoughts
inline..

>  fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
>  fs/xfs/libxfs/xfs_attr.h |   1 +
>  2 files changed, 104 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> index 626d4a98..38d5c5c 100644
> --- a/fs/xfs/libxfs/xfs_attr.c
> +++ b/fs/xfs/libxfs/xfs_attr.c
> @@ -369,10 +369,56 @@ xfs_has_attr(
>   */
>  int
>  xfs_attr_remove_args(
> +	struct xfs_da_args	*args)
> +{
> +	int			error = 0;
> +	int			err2 = 0;
> +
> +	do {
> +		error = xfs_attr_remove_later(args);
> +		if (error && error != -EAGAIN)
> +			goto out;

xfs_attr_remove_later() strikes me as an odd name with respect to the
functionality. Perhaps something like xfs_attr_remove_step() is
(slightly) more accurate..?

> +
> +		xfs_trans_log_inode(args->trans, args->dp,
> +			XFS_ILOG_CORE | XFS_ILOG_ADATA);
> +
> +		err2 = xfs_trans_roll(&args->trans);
> +		if (err2) {
> +			error = err2;

Also do we really need two error codes in this function? It seems like
we should be able to write this with one, but I haven't tried it..

> +			goto out;
> +		}
> +
> +		/* Rejoin inode */
> +		xfs_trans_ijoin(args->trans, args->dp, 0);
> +
> +	} while (error == -EAGAIN);
> +out:
> +	return error;
> +}
> +
> +/*
> + * Remove the attribute specified in @args.
> + * This routine is meant to function as a delayed operation, and may return
> + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
> + * to handle this, and recall the function until a successful error code is
> + * returned.
> + */
> +int
> +xfs_attr_remove_later(
>  	struct xfs_da_args      *args)
>  {
>  	struct xfs_inode	*dp = args->dp;
> -	int			error;
> +	int			error = 0;
> +
> +	/* State machine switch */
> +	switch (args->dc.dc_state) {
> +	case XFS_DC_RM_INVALIDATE:
> +	case XFS_DC_RM_SHRINK:
> +	case XFS_DC_RM_NODE_BLKS:
> +		goto node;
> +	default:
> +		break;
> +	}
>  
>  	if (!xfs_inode_hasattr(dp)) {
>  		error = -ENOATTR;
> @@ -382,6 +428,7 @@ xfs_attr_remove_args(
>  	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
>  		error = xfs_attr_leaf_removename(args);
>  	} else {
> +node:
>  		error = xfs_attr_node_removename(args);
>  	}
>  
> @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
>  		/* bp is gone due to xfs_da_shrink_inode */
>  		if (error)
>  			return error;
> -		error = xfs_defer_finish(&args->trans);
> -		if (error)
> -			return error;
>  	}
>  	return 0;
>  }
> @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
>   * This will involve walking down the Btree, and may involve joining
>   * leaf nodes and even joining intermediate nodes up to and including
>   * the root node (a special case of an intermediate node).
> + *
> + * This routine is meant to function as a delayed operation, and may return
> + * -EAGAIN when the transaction needs to be rolled.  Calling functions
> + * will need to handle this, and recall the function until a successful error
> + * code is returned.
>   */
>  STATIC int
>  xfs_attr_node_removename(
> @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
>  	struct xfs_buf		*bp;
>  	int			retval, error, forkoff;
>  	struct xfs_inode	*dp = args->dp;
> +	int			done = 0;
>  
>  	trace_xfs_attr_node_removename(args);
> +	state = args->dc.da_state;
> +	blk = args->dc.blk;
> +
> +	/* State machine switch */
> +	switch (args->dc.dc_state) {
> +	case XFS_DC_RM_NODE_BLKS:
> +		goto rm_node_blks;
> +	case XFS_DC_RM_INVALIDATE:
> +		goto rm_invalidate;
> +	case XFS_DC_RM_SHRINK:
> +		goto rm_shrink;
> +	default:
> +		break;

I wonder if it's worth having an explicit state for the initial path.
That could be useful for readability and debuggability in the future.

> +	}
>  
>  	error = xfs_attr_node_hasname(args, &state);
>  	if (error != -EEXIST)
>  		goto out;
> +	else
> +		error = 0;
>  
>  	/*
>  	 * If there is an out-of-line value, de-allocate the blocks.
> @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
>  	blk = &state->path.blk[ state->path.active-1 ];
>  	ASSERT(blk->bp != NULL);
>  	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
> +
> +	/*
> +	 * Store blk and state in the context incase we need to cycle out the
> +	 * transaction
> +	 */
> +	args->dc.blk = blk;
> +	args->dc.da_state = state;
> +
>  	if (args->rmtblkno > 0) {
>  		/*
>  		 * Fill in disk block numbers in the state structure
> @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
>  		if (error)
>  			goto out;
>  
> -		error = xfs_trans_roll_inode(&args->trans, args->dp);
> +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
> +		return -EAGAIN;
> +rm_invalidate:
> +		error = xfs_attr_rmtval_invalidate(args);
>  		if (error)
>  			goto out;
> +rm_node_blks:

While I think the design is the right idea, jumping down into a function
like this is pretty hairy. I think we should try to further break this
function down into smaller elements one way or another that model the
steps defined by the state structure. There's probably multiple ways to
do that. For example, the remote attr bits could be broken down into
a subfunction that processes the couple of states associated with remote
blocks. That said, ISTM it might be wiser to try and keep the state
processing in one place if possible. That would imply to break the
remote processing loop down into a couple functions. All in all, this
function might end up looking something like:

xfs_attr_node_removename()
{
	/* switch statement and comment to document each state */

	error = xfs_attr_node_hasname(args, &state);
	...

	if (remote) {
		error = do_setflag();
		if (error)
			return error;

		/* roll */
		state = INVALIDATE;
		return -EAGAIN;
	}

rmt_invalidate:
	state = INVALIDATE;
	if (remote)
		do_invalidate();
	/* fallthru */

rmt_rm_blks:
	state = RM_NODE_BLKS;
	if (remote) {
		/* loops and returns -EAGAIN until we fallthru */
		error = rmt_remove_step();
		if (error)
			return error;

		xfs_attr_refillstate();
	}

/* maybe worth a new state here? */
removename:
	state = REMOVENAME;
	xfs_attr3_leaf_remove();
	...
	if (...) {
		state = SHRINK;
		return -EAGAIN;
	}

shrink:
	state = SHRINK;
	error = do_shrink();

	return 0;
}

I'm not totally sold on the idea of rolling the state forward explicitly
like this, but it seems like it could be a bit more maintainable. All in
all this is still fairly ugly, but this is mostly a mechanical attempt
to keep state management isolated and we can polish it up from there.
Thoughts?

Brian

> +		/*
> +		 * Unmap value blocks for this attr.  This is similar to
> +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
> +		 * for new transactions
> +		 */
> +		while (!done && !error) {
> +			error = xfs_bunmapi(args->trans, args->dp,
> +				    args->rmtblkno, args->rmtblkcnt,
> +				    XFS_BMAPI_ATTRFORK, 1, &done);
> +			if (error)
> +				return error;
>  
> -		error = xfs_attr_rmtval_remove(args);
> -		if (error)
> -			goto out;
> +			if (!done) {
> +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
> +				return -EAGAIN;
> +			}
> +		}
>  
>  		/*
>  		 * Refill the state structure with buffers, the prior calls
> @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
>  		error = xfs_da3_join(state);
>  		if (error)
>  			goto out;
> -		error = xfs_defer_finish(&args->trans);
> -		if (error)
> -			goto out;
> -		/*
> -		 * Commit the Btree join operation and start a new trans.
> -		 */
> -		error = xfs_trans_roll_inode(&args->trans, dp);
> -		if (error)
> -			goto out;
> +
> +		args->dc.dc_state = XFS_DC_RM_SHRINK;
> +		return -EAGAIN;
>  	}
>  
> +rm_shrink:
>  	/*
>  	 * If the result is small enough, push it all into the inode.
>  	 */
> @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
>  			/* bp is gone due to xfs_da_shrink_inode */
>  			if (error)
>  				goto out;
> -			error = xfs_defer_finish(&args->trans);
> -			if (error)
> -				goto out;
>  		} else
>  			xfs_trans_brelse(args->trans, bp);
>  	}
> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
> index 3b5dad4..fb8bf5b 100644
> --- a/fs/xfs/libxfs/xfs_attr.h
> +++ b/fs/xfs/libxfs/xfs_attr.h
> @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
>  int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
>  int xfs_has_attr(struct xfs_da_args *args);
>  int xfs_attr_remove_args(struct xfs_da_args *args);
> +int xfs_attr_remove_later(struct xfs_da_args *args);
>  int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
>  		  int flags, struct attrlist_cursor_kern *cursor);
>  bool xfs_attr_namecheck(const void *name, size_t length);
> -- 
> 2.7.4
>
Allison Henderson Nov. 13, 2019, 12:43 a.m. UTC | #4
On 11/12/19 6:37 AM, Brian Foster wrote:
> On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
>> This patch modifies the attr remove routines to be delay ready.
>> This means they no longer roll or commit transactions, but instead
>> return -EAGAIN to have the calling routine roll and refresh the
>> transaction.  In this series, xfs_attr_remove_args has become
>> xfs_attr_remove_later, which uses a state machine to keep track
>> of where it was when EAGAIN was returned.  xfs_attr_node_removename
>> has also been modified to use the state machine, and a  new version of
>> xfs_attr_remove_args consists of a simple loop to refresh the
>> transaction until the operation is completed.
>>
>> Signed-off-by: Allison Collins <allison.henderson@oracle.com>
>> ---
> 
> On a cursory look, this is definitely more along the lines of what I was
> thinking on the previous revisions. I would like to see if we can get a
> bit more refactoring/cleanup before this point though. Further thoughts
> inline..
> 
>>   fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
>>   fs/xfs/libxfs/xfs_attr.h |   1 +
>>   2 files changed, 104 insertions(+), 20 deletions(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
>> index 626d4a98..38d5c5c 100644
>> --- a/fs/xfs/libxfs/xfs_attr.c
>> +++ b/fs/xfs/libxfs/xfs_attr.c
>> @@ -369,10 +369,56 @@ xfs_has_attr(
>>    */
>>   int
>>   xfs_attr_remove_args(
>> +	struct xfs_da_args	*args)
>> +{
>> +	int			error = 0;
>> +	int			err2 = 0;
>> +
>> +	do {
>> +		error = xfs_attr_remove_later(args);
>> +		if (error && error != -EAGAIN)
>> +			goto out;
> 
> xfs_attr_remove_later() strikes me as an odd name with respect to the
> functionality. Perhaps something like xfs_attr_remove_step() is
> (slightly) more accurate..?
Sure that's fine.  I think Darrick had proposed the *_later scheme in an 
earlier review but that was when the code paths were split.  Darrick, 
does the *_step scheme work for you?

> 
>> +
>> +		xfs_trans_log_inode(args->trans, args->dp,
>> +			XFS_ILOG_CORE | XFS_ILOG_ADATA);
>> +
>> +		err2 = xfs_trans_roll(&args->trans);
>> +		if (err2) {
>> +			error = err2;
> 
> Also do we really need two error codes in this function? It seems like
> we should be able to write this with one, but I haven't tried it..
No, because then we'll loose the xfs_attr_remove_later return code, 
which is either 0 or EAGAIN at this point.  And we need that to drive 
the loop.  To get rid of err2, we'd need another "not_done" variable or 
something.  Like:

do {
	...
	not_done = (error == -EAGAIN);
	...
} while (not_done)


Not sure if not_done is really preferable to err2?

> 
>> +			goto out;
>> +		}
>> +
>> +		/* Rejoin inode */
>> +		xfs_trans_ijoin(args->trans, args->dp, 0);
>> +
>> +	} while (error == -EAGAIN);
>> +out:
>> +	return error;
>> +}
>> +
>> +/*
>> + * Remove the attribute specified in @args.
>> + * This routine is meant to function as a delayed operation, and may return
>> + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
>> + * to handle this, and recall the function until a successful error code is
>> + * returned.
>> + */
>> +int
>> +xfs_attr_remove_later(
>>   	struct xfs_da_args      *args)
>>   {
>>   	struct xfs_inode	*dp = args->dp;
>> -	int			error;
>> +	int			error = 0;
>> +
>> +	/* State machine switch */
>> +	switch (args->dc.dc_state) {
>> +	case XFS_DC_RM_INVALIDATE:
>> +	case XFS_DC_RM_SHRINK:
>> +	case XFS_DC_RM_NODE_BLKS:
>> +		goto node;
>> +	default:
>> +		break;
>> +	}
>>   
>>   	if (!xfs_inode_hasattr(dp)) {
>>   		error = -ENOATTR;
>> @@ -382,6 +428,7 @@ xfs_attr_remove_args(
>>   	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
>>   		error = xfs_attr_leaf_removename(args);
>>   	} else {
>> +node:
>>   		error = xfs_attr_node_removename(args);
>>   	}
>>   
>> @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
>>   		/* bp is gone due to xfs_da_shrink_inode */
>>   		if (error)
>>   			return error;
>> -		error = xfs_defer_finish(&args->trans);
>> -		if (error)
>> -			return error;
>>   	}
>>   	return 0;
>>   }
>> @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
>>    * This will involve walking down the Btree, and may involve joining
>>    * leaf nodes and even joining intermediate nodes up to and including
>>    * the root node (a special case of an intermediate node).
>> + *
>> + * This routine is meant to function as a delayed operation, and may return
>> + * -EAGAIN when the transaction needs to be rolled.  Calling functions
>> + * will need to handle this, and recall the function until a successful error
>> + * code is returned.
>>    */
>>   STATIC int
>>   xfs_attr_node_removename(
>> @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
>>   	struct xfs_buf		*bp;
>>   	int			retval, error, forkoff;
>>   	struct xfs_inode	*dp = args->dp;
>> +	int			done = 0;
>>   
>>   	trace_xfs_attr_node_removename(args);
>> +	state = args->dc.da_state;
>> +	blk = args->dc.blk;
>> +
>> +	/* State machine switch */
>> +	switch (args->dc.dc_state) {
>> +	case XFS_DC_RM_NODE_BLKS:
>> +		goto rm_node_blks;
>> +	case XFS_DC_RM_INVALIDATE:
>> +		goto rm_invalidate;
>> +	case XFS_DC_RM_SHRINK:
>> +		goto rm_shrink;
>> +	default:
>> +		break;
> 
> I wonder if it's worth having an explicit state for the initial path.
> That could be useful for readability and debuggability in the future.
We could, it will just require to the calling function to set that 
before state calling it.

Mechanically, I dont think it would hurt anything, but it may lead to 
developer wonderments like... "Where's the EAGAIN for this state?" 
"Shouldnt this state appear in the switch up top too?"  Or if it does 
"Why do we have it here, if it never executes?"  "I wonder if i should 
sent a patch to take it out..." :-)

Puzzlement aside though, I cant quite think of what condition it would 
help to debug?  It's not an error for the statemachine to hold a value 
outside of the helpers scope.  It just means the caller was using it up 
to this point.  Helpers really shouldnt have enough context about their 
callers to know or care what the caller states mean.  If we added a 
special init state, all the default statement would really mean is: "The 
caller forgot to set the init state".

Thoughts?

> 
>> +	}
>>   
>>   	error = xfs_attr_node_hasname(args, &state);
>>   	if (error != -EEXIST)
>>   		goto out;
>> +	else
>> +		error = 0;
>>   
>>   	/*
>>   	 * If there is an out-of-line value, de-allocate the blocks.
>> @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
>>   	blk = &state->path.blk[ state->path.active-1 ];
>>   	ASSERT(blk->bp != NULL);
>>   	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
>> +
>> +	/*
>> +	 * Store blk and state in the context incase we need to cycle out the
>> +	 * transaction
>> +	 */
>> +	args->dc.blk = blk;
>> +	args->dc.da_state = state;
>> +
>>   	if (args->rmtblkno > 0) {
>>   		/*
>>   		 * Fill in disk block numbers in the state structure
>> @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
>>   		if (error)
>>   			goto out;
>>   
>> -		error = xfs_trans_roll_inode(&args->trans, args->dp);
>> +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
>> +		return -EAGAIN;
>> +rm_invalidate:
>> +		error = xfs_attr_rmtval_invalidate(args);
>>   		if (error)
>>   			goto out;
>> +rm_node_blks:
> 
> While I think the design is the right idea, jumping down into a function
> like this is pretty hairy. I think we should try to further break this
> function down into smaller elements one way or another that model the
> steps defined by the state structure. There's probably multiple ways to
> do that. For example, the remote attr bits could be broken down into
> a subfunction that processes the couple of states associated with remote
> blocks. That said, ISTM it might be wiser to try and keep the state
> processing in one place if possible. That would imply to break the
> remote processing loop down into a couple functions. All in all, this
> function might end up looking something like:
> 
> xfs_attr_node_removename()
> {
> 	/* switch statement and comment to document each state */
> 
> 	error = xfs_attr_node_hasname(args, &state);
> 	...
> 
> 	if (remote) {
> 		error = do_setflag();
> 		if (error)
> 			return error;
> 
> 		/* roll */
> 		state = INVALIDATE;
> 		return -EAGAIN;
> 	}
> 
> rmt_invalidate:
> 	state = INVALIDATE;
> 	if (remote)
> 		do_invalidate();
> 	/* fallthru */
> 
> rmt_rm_blks:
> 	state = RM_NODE_BLKS;
> 	if (remote) {
> 		/* loops and returns -EAGAIN until we fallthru */
> 		error = rmt_remove_step();
> 		if (error)
> 			return error;
> 
> 		xfs_attr_refillstate();
> 	}
> 
> /* maybe worth a new state here? */
> removename:
> 	state = REMOVENAME;
> 	xfs_attr3_leaf_remove();
> 	...
> 	if (...) {
> 		state = SHRINK;
> 		return -EAGAIN;
> 	}
> 
> shrink:
> 	state = SHRINK;
> 	error = do_shrink();
> 
> 	return 0;
> }
Ok, I had to go over this a few times, but I think I understand what 
you're describing.  Will update in the next version
> 
> I'm not totally sold on the idea of rolling the state forward explicitly
> like this, but it seems like it could be a bit more maintainable.
I think it is.  Having a dedicated struct just for this purpose 
alleviates a lot of struggle with trying to grab onto things like the 
fork or the incomplete flags to represent what we're trying to do here. 
Doing so also overloads their original intent in that if these 
structures ever change in the future, it may break something that the 
state machine depends on.  In this solution, they remain disjoint 
concepts dedicated to their purpose.  And anyway, I couldn't completely 
escape the state machine in the previous set.  I still had to add the 
extra flag space which functioned more or less like "i was here" tick 
marks.  If we have to have it, we may as well leverage what it can do. 
For example I can drop patch 11 from this set because I don't need the 
extra isset helpers to see if it's already been done.

  All in
> all this is still fairly ugly, but this is mostly a mechanical attempt
> to keep state management isolated and we can polish it up from there.
> Thoughts?

Yes, at this point, I do kind of feel like it's the least of the ugly 
prototypes.  So I'm just kind of proceeding, with caution. :-)

Thanks for the in depths reviews!!  I know its a lot!  Much appreciated!!

Allison

> 
> Brian
> 
>> +		/*
>> +		 * Unmap value blocks for this attr.  This is similar to
>> +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
>> +		 * for new transactions
>> +		 */
>> +		while (!done && !error) {
>> +			error = xfs_bunmapi(args->trans, args->dp,
>> +				    args->rmtblkno, args->rmtblkcnt,
>> +				    XFS_BMAPI_ATTRFORK, 1, &done);
>> +			if (error)
>> +				return error;
>>   
>> -		error = xfs_attr_rmtval_remove(args);
>> -		if (error)
>> -			goto out;
>> +			if (!done) {
>> +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
>> +				return -EAGAIN;
>> +			}
>> +		}
>>   
>>   		/*
>>   		 * Refill the state structure with buffers, the prior calls
>> @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
>>   		error = xfs_da3_join(state);
>>   		if (error)
>>   			goto out;
>> -		error = xfs_defer_finish(&args->trans);
>> -		if (error)
>> -			goto out;
>> -		/*
>> -		 * Commit the Btree join operation and start a new trans.
>> -		 */
>> -		error = xfs_trans_roll_inode(&args->trans, dp);
>> -		if (error)
>> -			goto out;
>> +
>> +		args->dc.dc_state = XFS_DC_RM_SHRINK;
>> +		return -EAGAIN;
>>   	}
>>   
>> +rm_shrink:
>>   	/*
>>   	 * If the result is small enough, push it all into the inode.
>>   	 */
>> @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
>>   			/* bp is gone due to xfs_da_shrink_inode */
>>   			if (error)
>>   				goto out;
>> -			error = xfs_defer_finish(&args->trans);
>> -			if (error)
>> -				goto out;
>>   		} else
>>   			xfs_trans_brelse(args->trans, bp);
>>   	}
>> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
>> index 3b5dad4..fb8bf5b 100644
>> --- a/fs/xfs/libxfs/xfs_attr.h
>> +++ b/fs/xfs/libxfs/xfs_attr.h
>> @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
>>   int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
>>   int xfs_has_attr(struct xfs_da_args *args);
>>   int xfs_attr_remove_args(struct xfs_da_args *args);
>> +int xfs_attr_remove_later(struct xfs_da_args *args);
>>   int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
>>   		  int flags, struct attrlist_cursor_kern *cursor);
>>   bool xfs_attr_namecheck(const void *name, size_t length);
>> -- 
>> 2.7.4
>>
>
Brian Foster Nov. 13, 2019, 11:54 a.m. UTC | #5
On Tue, Nov 12, 2019 at 05:43:04PM -0700, Allison Collins wrote:
> 
> 
> On 11/12/19 6:37 AM, Brian Foster wrote:
> > On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
> > > This patch modifies the attr remove routines to be delay ready.
> > > This means they no longer roll or commit transactions, but instead
> > > return -EAGAIN to have the calling routine roll and refresh the
> > > transaction.  In this series, xfs_attr_remove_args has become
> > > xfs_attr_remove_later, which uses a state machine to keep track
> > > of where it was when EAGAIN was returned.  xfs_attr_node_removename
> > > has also been modified to use the state machine, and a  new version of
> > > xfs_attr_remove_args consists of a simple loop to refresh the
> > > transaction until the operation is completed.
> > > 
> > > Signed-off-by: Allison Collins <allison.henderson@oracle.com>
> > > ---
> > 
> > On a cursory look, this is definitely more along the lines of what I was
> > thinking on the previous revisions. I would like to see if we can get a
> > bit more refactoring/cleanup before this point though. Further thoughts
> > inline..
> > 
> > >   fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
> > >   fs/xfs/libxfs/xfs_attr.h |   1 +
> > >   2 files changed, 104 insertions(+), 20 deletions(-)
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> > > index 626d4a98..38d5c5c 100644
> > > --- a/fs/xfs/libxfs/xfs_attr.c
> > > +++ b/fs/xfs/libxfs/xfs_attr.c
> > > @@ -369,10 +369,56 @@ xfs_has_attr(
> > >    */
> > >   int
> > >   xfs_attr_remove_args(
> > > +	struct xfs_da_args	*args)
> > > +{
> > > +	int			error = 0;
> > > +	int			err2 = 0;
> > > +
> > > +	do {
> > > +		error = xfs_attr_remove_later(args);
> > > +		if (error && error != -EAGAIN)
> > > +			goto out;
> > 
> > xfs_attr_remove_later() strikes me as an odd name with respect to the
> > functionality. Perhaps something like xfs_attr_remove_step() is
> > (slightly) more accurate..?
> Sure that's fine.  I think Darrick had proposed the *_later scheme in an
> earlier review but that was when the code paths were split.  Darrick, does
> the *_step scheme work for you?
> 

FWIW, xfs_attr_remove_iter() also came to mind after sending the
previous mail.

> > 
> > > +
> > > +		xfs_trans_log_inode(args->trans, args->dp,
> > > +			XFS_ILOG_CORE | XFS_ILOG_ADATA);
> > > +
> > > +		err2 = xfs_trans_roll(&args->trans);
> > > +		if (err2) {
> > > +			error = err2;
> > 
> > Also do we really need two error codes in this function? It seems like
> > we should be able to write this with one, but I haven't tried it..
> No, because then we'll loose the xfs_attr_remove_later return code, which is
> either 0 or EAGAIN at this point.  And we need that to drive the loop.  To
> get rid of err2, we'd need another "not_done" variable or something.  Like:
> 
> do {
> 	...
> 	not_done = (error == -EAGAIN);
> 	...
> } while (not_done)
> 
> 
> Not sure if not_done is really preferable to err2?
> 

Eh, NBD. It just looked like potentially verbose logic on a quick scan.
On a closer look, I see that we want to handle both error == 0 and error
== -EAGAIN, so this seems fine as is to me.

> > 
> > > +			goto out;
> > > +		}
> > > +
> > > +		/* Rejoin inode */
> > > +		xfs_trans_ijoin(args->trans, args->dp, 0);
> > > +
> > > +	} while (error == -EAGAIN);
> > > +out:
> > > +	return error;
> > > +}
> > > +
> > > +/*
> > > + * Remove the attribute specified in @args.
> > > + * This routine is meant to function as a delayed operation, and may return
> > > + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
> > > + * to handle this, and recall the function until a successful error code is
> > > + * returned.
> > > + */
> > > +int
> > > +xfs_attr_remove_later(
> > >   	struct xfs_da_args      *args)
> > >   {
> > >   	struct xfs_inode	*dp = args->dp;
> > > -	int			error;
> > > +	int			error = 0;
> > > +
> > > +	/* State machine switch */
> > > +	switch (args->dc.dc_state) {
> > > +	case XFS_DC_RM_INVALIDATE:
> > > +	case XFS_DC_RM_SHRINK:
> > > +	case XFS_DC_RM_NODE_BLKS:
> > > +		goto node;
> > > +	default:
> > > +		break;
> > > +	}
> > >   	if (!xfs_inode_hasattr(dp)) {
> > >   		error = -ENOATTR;
> > > @@ -382,6 +428,7 @@ xfs_attr_remove_args(
> > >   	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
> > >   		error = xfs_attr_leaf_removename(args);
> > >   	} else {
> > > +node:
> > >   		error = xfs_attr_node_removename(args);
> > >   	}
> > > @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
> > >   		/* bp is gone due to xfs_da_shrink_inode */
> > >   		if (error)
> > >   			return error;
> > > -		error = xfs_defer_finish(&args->trans);
> > > -		if (error)
> > > -			return error;
> > >   	}
> > >   	return 0;
> > >   }
> > > @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
> > >    * This will involve walking down the Btree, and may involve joining
> > >    * leaf nodes and even joining intermediate nodes up to and including
> > >    * the root node (a special case of an intermediate node).
> > > + *
> > > + * This routine is meant to function as a delayed operation, and may return
> > > + * -EAGAIN when the transaction needs to be rolled.  Calling functions
> > > + * will need to handle this, and recall the function until a successful error
> > > + * code is returned.
> > >    */
> > >   STATIC int
> > >   xfs_attr_node_removename(
> > > @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
> > >   	struct xfs_buf		*bp;
> > >   	int			retval, error, forkoff;
> > >   	struct xfs_inode	*dp = args->dp;
> > > +	int			done = 0;
> > >   	trace_xfs_attr_node_removename(args);
> > > +	state = args->dc.da_state;
> > > +	blk = args->dc.blk;
> > > +
> > > +	/* State machine switch */
> > > +	switch (args->dc.dc_state) {
> > > +	case XFS_DC_RM_NODE_BLKS:
> > > +		goto rm_node_blks;
> > > +	case XFS_DC_RM_INVALIDATE:
> > > +		goto rm_invalidate;
> > > +	case XFS_DC_RM_SHRINK:
> > > +		goto rm_shrink;
> > > +	default:
> > > +		break;
> > 
> > I wonder if it's worth having an explicit state for the initial path.
> > That could be useful for readability and debuggability in the future.
> We could, it will just require to the calling function to set that before
> state calling it.
> 

Looking back, I see we have XFS_DC_INIT in the enum, but it isn't used.
What is that particular state for? Any reason the enum doesn't start
with a value of 0?

> Mechanically, I dont think it would hurt anything, but it may lead to
> developer wonderments like... "Where's the EAGAIN for this state?" "Shouldnt
> this state appear in the switch up top too?"  Or if it does "Why do we have
> it here, if it never executes?"  "I wonder if i should sent a patch to take
> it out..." :-)
> 

Perhaps, though some of those questions already exist with the current
code. Not every function cares about every state from what I can tell.
Also, not every state that can return -EAGAIN is guaranteed to do so.

> Puzzlement aside though, I cant quite think of what condition it would help
> to debug?  It's not an error for the statemachine to hold a value outside of
> the helpers scope.  It just means the caller was using it up to this point.
> Helpers really shouldnt have enough context about their callers to know or
> care what the caller states mean.  If we added a special init state, all the
> default statement would really mean is: "The caller forgot to set the init
> state".
> 

See my question above around the existing init state. However that is
intended to be used is not really clear to me. BTW, it might be better
to introduce the core structures in one patch and add the individual
states as they are used by the subsequent patches that use them.

WRT to readability/debuggability, I suppose a simple example would be to
consider if we had a tracepoint somewhere in the higher level code that
printed state and error code and we wanted to use that to identify an
unexpected error that occurs during an attr op. With the current code,
if that printed something like:

	state INVALIDATE error -EIO

... that doesn't tell us a whole lot about what happened beyond
something failed sometime after the setflag state. The attr itself might
not even have remote blocks to invalidate, which is slightly confusing.
This is handwavy/subjective, of course..

> Thoughts?
> 
> > 
> > > +	}
> > >   	error = xfs_attr_node_hasname(args, &state);
> > >   	if (error != -EEXIST)
> > >   		goto out;
> > > +	else
> > > +		error = 0;
> > >   	/*
> > >   	 * If there is an out-of-line value, de-allocate the blocks.
> > > @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
> > >   	blk = &state->path.blk[ state->path.active-1 ];
> > >   	ASSERT(blk->bp != NULL);
> > >   	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
> > > +
> > > +	/*
> > > +	 * Store blk and state in the context incase we need to cycle out the
> > > +	 * transaction
> > > +	 */
> > > +	args->dc.blk = blk;
> > > +	args->dc.da_state = state;
> > > +
> > >   	if (args->rmtblkno > 0) {
> > >   		/*
> > >   		 * Fill in disk block numbers in the state structure
> > > @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
> > >   		if (error)
> > >   			goto out;
> > > -		error = xfs_trans_roll_inode(&args->trans, args->dp);
> > > +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
> > > +		return -EAGAIN;
> > > +rm_invalidate:
> > > +		error = xfs_attr_rmtval_invalidate(args);
> > >   		if (error)
> > >   			goto out;
> > > +rm_node_blks:
> > 
> > While I think the design is the right idea, jumping down into a function
> > like this is pretty hairy. I think we should try to further break this
> > function down into smaller elements one way or another that model the
> > steps defined by the state structure. There's probably multiple ways to
> > do that. For example, the remote attr bits could be broken down into
> > a subfunction that processes the couple of states associated with remote
> > blocks. That said, ISTM it might be wiser to try and keep the state
> > processing in one place if possible. That would imply to break the
> > remote processing loop down into a couple functions. All in all, this
> > function might end up looking something like:
> > 
> > xfs_attr_node_removename()
> > {
> > 	/* switch statement and comment to document each state */
> > 
> > 	error = xfs_attr_node_hasname(args, &state);
> > 	...
> > 
> > 	if (remote) {
> > 		error = do_setflag();
> > 		if (error)
> > 			return error;
> > 
> > 		/* roll */
> > 		state = INVALIDATE;
> > 		return -EAGAIN;
> > 	}
> > 
> > rmt_invalidate:
> > 	state = INVALIDATE;
> > 	if (remote)
> > 		do_invalidate();
> > 	/* fallthru */
> > 
> > rmt_rm_blks:
> > 	state = RM_NODE_BLKS;
> > 	if (remote) {
> > 		/* loops and returns -EAGAIN until we fallthru */
> > 		error = rmt_remove_step();
> > 		if (error)
> > 			return error;
> > 
> > 		xfs_attr_refillstate();
> > 	}
> > 
> > /* maybe worth a new state here? */
> > removename:
> > 	state = REMOVENAME;
> > 	xfs_attr3_leaf_remove();
> > 	...
> > 	if (...) {
> > 		state = SHRINK;
> > 		return -EAGAIN;
> > 	}
> > 
> > shrink:
> > 	state = SHRINK;
> > 	error = do_shrink();
> > 
> > 	return 0;
> > }
> Ok, I had to go over this a few times, but I think I understand what you're
> describing.  Will update in the next version
> > 
> > I'm not totally sold on the idea of rolling the state forward explicitly
> > like this, but it seems like it could be a bit more maintainable.
> I think it is.  Having a dedicated struct just for this purpose alleviates a
> lot of struggle with trying to grab onto things like the fork or the
> incomplete flags to represent what we're trying to do here. Doing so also
> overloads their original intent in that if these structures ever change in
> the future, it may break something that the state machine depends on.  In
> this solution, they remain disjoint concepts dedicated to their purpose.
> And anyway, I couldn't completely escape the state machine in the previous
> set.  I still had to add the extra flag space which functioned more or less
> like "i was here" tick marks.  If we have to have it, we may as well
> leverage what it can do. For example I can drop patch 11 from this set
> because I don't need the extra isset helpers to see if it's already been
> done.
> 

Right.. I agree that the "bookmark" like approach in the current scheme
makes the state implementation (and not necessarily the operational
implementation) a little hard to follow and review. Note again that what
I wrote up here was just a quick example for that higher level feedback
of somehow or another trying to isolate state updates from state
implementation, so I'm not necessarily tied to that specific approach if
there are other ways to similarly simplify things.

I do think fixing up the code to avoid jumping into loops and whatnot is
more important. It could also be that just continuing to break things
down into as small functions as possible (i.e. with a goal of 1 function
per state) kind of forces a natural separation.

Brian

>  All in
> > all this is still fairly ugly, but this is mostly a mechanical attempt
> > to keep state management isolated and we can polish it up from there.
> > Thoughts?
> 
> Yes, at this point, I do kind of feel like it's the least of the ugly
> prototypes.  So I'm just kind of proceeding, with caution. :-)
> 
> Thanks for the in depths reviews!!  I know its a lot!  Much appreciated!!
> 
> Allison
> 
> > 
> > Brian
> > 
> > > +		/*
> > > +		 * Unmap value blocks for this attr.  This is similar to
> > > +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
> > > +		 * for new transactions
> > > +		 */
> > > +		while (!done && !error) {
> > > +			error = xfs_bunmapi(args->trans, args->dp,
> > > +				    args->rmtblkno, args->rmtblkcnt,
> > > +				    XFS_BMAPI_ATTRFORK, 1, &done);
> > > +			if (error)
> > > +				return error;
> > > -		error = xfs_attr_rmtval_remove(args);
> > > -		if (error)
> > > -			goto out;
> > > +			if (!done) {
> > > +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
> > > +				return -EAGAIN;
> > > +			}
> > > +		}
> > >   		/*
> > >   		 * Refill the state structure with buffers, the prior calls
> > > @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
> > >   		error = xfs_da3_join(state);
> > >   		if (error)
> > >   			goto out;
> > > -		error = xfs_defer_finish(&args->trans);
> > > -		if (error)
> > > -			goto out;
> > > -		/*
> > > -		 * Commit the Btree join operation and start a new trans.
> > > -		 */
> > > -		error = xfs_trans_roll_inode(&args->trans, dp);
> > > -		if (error)
> > > -			goto out;
> > > +
> > > +		args->dc.dc_state = XFS_DC_RM_SHRINK;
> > > +		return -EAGAIN;
> > >   	}
> > > +rm_shrink:
> > >   	/*
> > >   	 * If the result is small enough, push it all into the inode.
> > >   	 */
> > > @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
> > >   			/* bp is gone due to xfs_da_shrink_inode */
> > >   			if (error)
> > >   				goto out;
> > > -			error = xfs_defer_finish(&args->trans);
> > > -			if (error)
> > > -				goto out;
> > >   		} else
> > >   			xfs_trans_brelse(args->trans, bp);
> > >   	}
> > > diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
> > > index 3b5dad4..fb8bf5b 100644
> > > --- a/fs/xfs/libxfs/xfs_attr.h
> > > +++ b/fs/xfs/libxfs/xfs_attr.h
> > > @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
> > >   int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
> > >   int xfs_has_attr(struct xfs_da_args *args);
> > >   int xfs_attr_remove_args(struct xfs_da_args *args);
> > > +int xfs_attr_remove_later(struct xfs_da_args *args);
> > >   int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
> > >   		  int flags, struct attrlist_cursor_kern *cursor);
> > >   bool xfs_attr_namecheck(const void *name, size_t length);
> > > -- 
> > > 2.7.4
> > > 
> > 
>
Allison Henderson Nov. 13, 2019, 11:39 p.m. UTC | #6
On 11/13/19 4:54 AM, Brian Foster wrote:
> On Tue, Nov 12, 2019 at 05:43:04PM -0700, Allison Collins wrote:
>>
>>
>> On 11/12/19 6:37 AM, Brian Foster wrote:
>>> On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
>>>> This patch modifies the attr remove routines to be delay ready.
>>>> This means they no longer roll or commit transactions, but instead
>>>> return -EAGAIN to have the calling routine roll and refresh the
>>>> transaction.  In this series, xfs_attr_remove_args has become
>>>> xfs_attr_remove_later, which uses a state machine to keep track
>>>> of where it was when EAGAIN was returned.  xfs_attr_node_removename
>>>> has also been modified to use the state machine, and a  new version of
>>>> xfs_attr_remove_args consists of a simple loop to refresh the
>>>> transaction until the operation is completed.
>>>>
>>>> Signed-off-by: Allison Collins <allison.henderson@oracle.com>
>>>> ---
>>>
>>> On a cursory look, this is definitely more along the lines of what I was
>>> thinking on the previous revisions. I would like to see if we can get a
>>> bit more refactoring/cleanup before this point though. Further thoughts
>>> inline..
>>>
>>>>    fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
>>>>    fs/xfs/libxfs/xfs_attr.h |   1 +
>>>>    2 files changed, 104 insertions(+), 20 deletions(-)
>>>>
>>>> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
>>>> index 626d4a98..38d5c5c 100644
>>>> --- a/fs/xfs/libxfs/xfs_attr.c
>>>> +++ b/fs/xfs/libxfs/xfs_attr.c
>>>> @@ -369,10 +369,56 @@ xfs_has_attr(
>>>>     */
>>>>    int
>>>>    xfs_attr_remove_args(
>>>> +	struct xfs_da_args	*args)
>>>> +{
>>>> +	int			error = 0;
>>>> +	int			err2 = 0;
>>>> +
>>>> +	do {
>>>> +		error = xfs_attr_remove_later(args);
>>>> +		if (error && error != -EAGAIN)
>>>> +			goto out;
>>>
>>> xfs_attr_remove_later() strikes me as an odd name with respect to the
>>> functionality. Perhaps something like xfs_attr_remove_step() is
>>> (slightly) more accurate..?
>> Sure that's fine.  I think Darrick had proposed the *_later scheme in an
>> earlier review but that was when the code paths were split.  Darrick, does
>> the *_step scheme work for you?
>>
> 
> FWIW, xfs_attr_remove_iter() also came to mind after sending the
> previous mail.
Alrighty, I am fine with a *_iter scheme then

> 
>>>
>>>> +
>>>> +		xfs_trans_log_inode(args->trans, args->dp,
>>>> +			XFS_ILOG_CORE | XFS_ILOG_ADATA);
>>>> +
>>>> +		err2 = xfs_trans_roll(&args->trans);
>>>> +		if (err2) {
>>>> +			error = err2;
>>>
>>> Also do we really need two error codes in this function? It seems like
>>> we should be able to write this with one, but I haven't tried it..
>> No, because then we'll loose the xfs_attr_remove_later return code, which is
>> either 0 or EAGAIN at this point.  And we need that to drive the loop.  To
>> get rid of err2, we'd need another "not_done" variable or something.  Like:
>>
>> do {
>> 	...
>> 	not_done = (error == -EAGAIN);
>> 	...
>> } while (not_done)
>>
>>
>> Not sure if not_done is really preferable to err2?
>>
> 
> Eh, NBD. It just looked like potentially verbose logic on a quick scan.
> On a closer look, I see that we want to handle both error == 0 and error
> == -EAGAIN, so this seems fine as is to me.
> 
>>>
>>>> +			goto out;
>>>> +		}
>>>> +
>>>> +		/* Rejoin inode */
>>>> +		xfs_trans_ijoin(args->trans, args->dp, 0);
>>>> +
>>>> +	} while (error == -EAGAIN);
>>>> +out:
>>>> +	return error;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Remove the attribute specified in @args.
>>>> + * This routine is meant to function as a delayed operation, and may return
>>>> + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
>>>> + * to handle this, and recall the function until a successful error code is
>>>> + * returned.
>>>> + */
>>>> +int
>>>> +xfs_attr_remove_later(
>>>>    	struct xfs_da_args      *args)
>>>>    {
>>>>    	struct xfs_inode	*dp = args->dp;
>>>> -	int			error;
>>>> +	int			error = 0;
>>>> +
>>>> +	/* State machine switch */
>>>> +	switch (args->dc.dc_state) {
>>>> +	case XFS_DC_RM_INVALIDATE:
>>>> +	case XFS_DC_RM_SHRINK:
>>>> +	case XFS_DC_RM_NODE_BLKS:
>>>> +		goto node;
>>>> +	default:
>>>> +		break;
>>>> +	}
>>>>    	if (!xfs_inode_hasattr(dp)) {
>>>>    		error = -ENOATTR;
>>>> @@ -382,6 +428,7 @@ xfs_attr_remove_args(
>>>>    	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
>>>>    		error = xfs_attr_leaf_removename(args);
>>>>    	} else {
>>>> +node:
>>>>    		error = xfs_attr_node_removename(args);
>>>>    	}
>>>> @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
>>>>    		/* bp is gone due to xfs_da_shrink_inode */
>>>>    		if (error)
>>>>    			return error;
>>>> -		error = xfs_defer_finish(&args->trans);
>>>> -		if (error)
>>>> -			return error;
>>>>    	}
>>>>    	return 0;
>>>>    }
>>>> @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
>>>>     * This will involve walking down the Btree, and may involve joining
>>>>     * leaf nodes and even joining intermediate nodes up to and including
>>>>     * the root node (a special case of an intermediate node).
>>>> + *
>>>> + * This routine is meant to function as a delayed operation, and may return
>>>> + * -EAGAIN when the transaction needs to be rolled.  Calling functions
>>>> + * will need to handle this, and recall the function until a successful error
>>>> + * code is returned.
>>>>     */
>>>>    STATIC int
>>>>    xfs_attr_node_removename(
>>>> @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
>>>>    	struct xfs_buf		*bp;
>>>>    	int			retval, error, forkoff;
>>>>    	struct xfs_inode	*dp = args->dp;
>>>> +	int			done = 0;
>>>>    	trace_xfs_attr_node_removename(args);
>>>> +	state = args->dc.da_state;
>>>> +	blk = args->dc.blk;
>>>> +
>>>> +	/* State machine switch */
>>>> +	switch (args->dc.dc_state) {
>>>> +	case XFS_DC_RM_NODE_BLKS:
>>>> +		goto rm_node_blks;
>>>> +	case XFS_DC_RM_INVALIDATE:
>>>> +		goto rm_invalidate;
>>>> +	case XFS_DC_RM_SHRINK:
>>>> +		goto rm_shrink;
>>>> +	default:
>>>> +		break;
>>>
>>> I wonder if it's worth having an explicit state for the initial path.
>>> That could be useful for readability and debuggability in the future.
>> We could, it will just require to the calling function to set that before
>> state calling it.
>>
> 
> Looking back, I see we have XFS_DC_INIT in the enum, but it isn't used.
> What is that particular state for? Any reason the enum doesn't start
> with a value of 0?
Yes, sorry, it's used in the next patch series when we get into to the 
real delayed attr infrastructure.  I just forgot to take it out of the 
smaller subset here.

Basically all the use cases thus far already have args initialized, but 
in the log replay, we have to construct args from the log entry.  So we 
need the extra state to not re-initialize it every time and wipe out the 
state.  That use case is little more unique in that we have to set up 
the state machine after the ping pong has already started.

> 
>> Mechanically, I dont think it would hurt anything, but it may lead to
>> developer wonderments like... "Where's the EAGAIN for this state?" "Shouldnt
>> this state appear in the switch up top too?"  Or if it does "Why do we have
>> it here, if it never executes?"  "I wonder if i should sent a patch to take
>> it out..." :-)
>>
> 
> Perhaps, though some of those questions already exist with the current
> code. Not every function cares about every state from what I can tell.
> Also, not every state that can return -EAGAIN is guaranteed to do so.
> 
>> Puzzlement aside though, I cant quite think of what condition it would help
>> to debug?  It's not an error for the statemachine to hold a value outside of
>> the helpers scope.  It just means the caller was using it up to this point.
>> Helpers really shouldnt have enough context about their callers to know or
>> care what the caller states mean.  If we added a special init state, all the
>> default statement would really mean is: "The caller forgot to set the init
>> state".
>>
> 
> See my question above around the existing init state. However that is
> intended to be used is not really clear to me. BTW, it might be better
> to introduce the core structures in one patch and add the individual
> states as they are used by the subsequent patches that use them.
Ok, I will introduce the states progressively then, I think that caused 
the confusion

> 
> WRT to readability/debuggability, I suppose a simple example would be to
> consider if we had a tracepoint somewhere in the higher level code that
> printed state and error code and we wanted to use that to identify an
> unexpected error that occurs during an attr op. With the current code,
> if that printed something like:
> 
> 	state INVALIDATE error -EIO
> 
> ... that doesn't tell us a whole lot about what happened beyond
> something failed sometime after the setflag state. The attr itself might
> not even have remote blocks to invalidate, which is slightly confusing.
> This is handwavy/subjective, of course..
Yes, as it is, it doesn't really have any way of detecting a corrupt 
state.  Really though, if we want to do that what we need is another 
layer of modeling where we enforce the sequence of the states.  So for 
example, some sort of mapping or tree that defines: XFS_DC_LEAF_TO_NODE 
and XFS_DC_FOUND_LBLK as being children (or mappings) of 
XFS_DC_SF_TO_LEAF.

In other words, if the state is XFS_DC_SF_TO_LEAF, you can change it to 
XFS_DC_FOUND_LBLK or XFS_DC_LEAF_TO_NODE, but you can't go to say 
XFS_DC_RM_SHRINK, that would generate an error.  And then we have some 
sort of set_state() routine to enforce these rules.

That's a lot of extra overhead though, and could be more of a burden 
than a help.  But it's something we could explore if people think it's 
worth the pursuit.

Allison
> 
>> Thoughts?
>>
>>>
>>>> +	}
>>>>    	error = xfs_attr_node_hasname(args, &state);
>>>>    	if (error != -EEXIST)
>>>>    		goto out;
>>>> +	else
>>>> +		error = 0;
>>>>    	/*
>>>>    	 * If there is an out-of-line value, de-allocate the blocks.
>>>> @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
>>>>    	blk = &state->path.blk[ state->path.active-1 ];
>>>>    	ASSERT(blk->bp != NULL);
>>>>    	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
>>>> +
>>>> +	/*
>>>> +	 * Store blk and state in the context incase we need to cycle out the
>>>> +	 * transaction
>>>> +	 */
>>>> +	args->dc.blk = blk;
>>>> +	args->dc.da_state = state;
>>>> +
>>>>    	if (args->rmtblkno > 0) {
>>>>    		/*
>>>>    		 * Fill in disk block numbers in the state structure
>>>> @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
>>>>    		if (error)
>>>>    			goto out;
>>>> -		error = xfs_trans_roll_inode(&args->trans, args->dp);
>>>> +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
>>>> +		return -EAGAIN;
>>>> +rm_invalidate:
>>>> +		error = xfs_attr_rmtval_invalidate(args);
>>>>    		if (error)
>>>>    			goto out;
>>>> +rm_node_blks:
>>>
>>> While I think the design is the right idea, jumping down into a function
>>> like this is pretty hairy. I think we should try to further break this
>>> function down into smaller elements one way or another that model the
>>> steps defined by the state structure. There's probably multiple ways to
>>> do that. For example, the remote attr bits could be broken down into
>>> a subfunction that processes the couple of states associated with remote
>>> blocks. That said, ISTM it might be wiser to try and keep the state
>>> processing in one place if possible. That would imply to break the
>>> remote processing loop down into a couple functions. All in all, this
>>> function might end up looking something like:
>>>
>>> xfs_attr_node_removename()
>>> {
>>> 	/* switch statement and comment to document each state */
>>>
>>> 	error = xfs_attr_node_hasname(args, &state);
>>> 	...
>>>
>>> 	if (remote) {
>>> 		error = do_setflag();
>>> 		if (error)
>>> 			return error;
>>>
>>> 		/* roll */
>>> 		state = INVALIDATE;
>>> 		return -EAGAIN;
>>> 	}
>>>
>>> rmt_invalidate:
>>> 	state = INVALIDATE;
>>> 	if (remote)
>>> 		do_invalidate();
>>> 	/* fallthru */
>>>
>>> rmt_rm_blks:
>>> 	state = RM_NODE_BLKS;
>>> 	if (remote) {
>>> 		/* loops and returns -EAGAIN until we fallthru */
>>> 		error = rmt_remove_step();
>>> 		if (error)
>>> 			return error;
>>>
>>> 		xfs_attr_refillstate();
>>> 	}
>>>
>>> /* maybe worth a new state here? */
>>> removename:
>>> 	state = REMOVENAME;
>>> 	xfs_attr3_leaf_remove();
>>> 	...
>>> 	if (...) {
>>> 		state = SHRINK;
>>> 		return -EAGAIN;
>>> 	}
>>>
>>> shrink:
>>> 	state = SHRINK;
>>> 	error = do_shrink();
>>>
>>> 	return 0;
>>> }
>> Ok, I had to go over this a few times, but I think I understand what you're
>> describing.  Will update in the next version
>>>
>>> I'm not totally sold on the idea of rolling the state forward explicitly
>>> like this, but it seems like it could be a bit more maintainable.
>> I think it is.  Having a dedicated struct just for this purpose alleviates a
>> lot of struggle with trying to grab onto things like the fork or the
>> incomplete flags to represent what we're trying to do here. Doing so also
>> overloads their original intent in that if these structures ever change in
>> the future, it may break something that the state machine depends on.  In
>> this solution, they remain disjoint concepts dedicated to their purpose.
>> And anyway, I couldn't completely escape the state machine in the previous
>> set.  I still had to add the extra flag space which functioned more or less
>> like "i was here" tick marks.  If we have to have it, we may as well
>> leverage what it can do. For example I can drop patch 11 from this set
>> because I don't need the extra isset helpers to see if it's already been
>> done.
>>
> 
> Right.. I agree that the "bookmark" like approach in the current scheme
> makes the state implementation (and not necessarily the operational
> implementation) a little hard to follow and review. Note again that what
> I wrote up here was just a quick example for that higher level feedback
> of somehow or another trying to isolate state updates from state
> implementation, so I'm not necessarily tied to that specific approach if
> there are other ways to similarly simplify things.
> 
> I do think fixing up the code to avoid jumping into loops and whatnot is
> more important. It could also be that just continuing to break things
> down into as small functions as possible (i.e. with a goal of 1 function
> per state) kind of forces a natural separation.
> 
> Brian
> 
>>   All in
>>> all this is still fairly ugly, but this is mostly a mechanical attempt
>>> to keep state management isolated and we can polish it up from there.
>>> Thoughts?
>>
>> Yes, at this point, I do kind of feel like it's the least of the ugly
>> prototypes.  So I'm just kind of proceeding, with caution. :-)
>>
>> Thanks for the in depths reviews!!  I know its a lot!  Much appreciated!!
>>
>> Allison
>>
>>>
>>> Brian
>>>
>>>> +		/*
>>>> +		 * Unmap value blocks for this attr.  This is similar to
>>>> +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
>>>> +		 * for new transactions
>>>> +		 */
>>>> +		while (!done && !error) {
>>>> +			error = xfs_bunmapi(args->trans, args->dp,
>>>> +				    args->rmtblkno, args->rmtblkcnt,
>>>> +				    XFS_BMAPI_ATTRFORK, 1, &done);
>>>> +			if (error)
>>>> +				return error;
>>>> -		error = xfs_attr_rmtval_remove(args);
>>>> -		if (error)
>>>> -			goto out;
>>>> +			if (!done) {
>>>> +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
>>>> +				return -EAGAIN;
>>>> +			}
>>>> +		}
>>>>    		/*
>>>>    		 * Refill the state structure with buffers, the prior calls
>>>> @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
>>>>    		error = xfs_da3_join(state);
>>>>    		if (error)
>>>>    			goto out;
>>>> -		error = xfs_defer_finish(&args->trans);
>>>> -		if (error)
>>>> -			goto out;
>>>> -		/*
>>>> -		 * Commit the Btree join operation and start a new trans.
>>>> -		 */
>>>> -		error = xfs_trans_roll_inode(&args->trans, dp);
>>>> -		if (error)
>>>> -			goto out;
>>>> +
>>>> +		args->dc.dc_state = XFS_DC_RM_SHRINK;
>>>> +		return -EAGAIN;
>>>>    	}
>>>> +rm_shrink:
>>>>    	/*
>>>>    	 * If the result is small enough, push it all into the inode.
>>>>    	 */
>>>> @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
>>>>    			/* bp is gone due to xfs_da_shrink_inode */
>>>>    			if (error)
>>>>    				goto out;
>>>> -			error = xfs_defer_finish(&args->trans);
>>>> -			if (error)
>>>> -				goto out;
>>>>    		} else
>>>>    			xfs_trans_brelse(args->trans, bp);
>>>>    	}
>>>> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
>>>> index 3b5dad4..fb8bf5b 100644
>>>> --- a/fs/xfs/libxfs/xfs_attr.h
>>>> +++ b/fs/xfs/libxfs/xfs_attr.h
>>>> @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
>>>>    int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
>>>>    int xfs_has_attr(struct xfs_da_args *args);
>>>>    int xfs_attr_remove_args(struct xfs_da_args *args);
>>>> +int xfs_attr_remove_later(struct xfs_da_args *args);
>>>>    int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
>>>>    		  int flags, struct attrlist_cursor_kern *cursor);
>>>>    bool xfs_attr_namecheck(const void *name, size_t length);
>>>> -- 
>>>> 2.7.4
>>>>
>>>
>>
>
Brian Foster Nov. 14, 2019, 12:48 p.m. UTC | #7
On Wed, Nov 13, 2019 at 04:39:18PM -0700, Allison Collins wrote:
> 
> 
> On 11/13/19 4:54 AM, Brian Foster wrote:
> > On Tue, Nov 12, 2019 at 05:43:04PM -0700, Allison Collins wrote:
> > > 
> > > 
> > > On 11/12/19 6:37 AM, Brian Foster wrote:
> > > > On Wed, Nov 06, 2019 at 06:28:00PM -0700, Allison Collins wrote:
> > > > > This patch modifies the attr remove routines to be delay ready.
> > > > > This means they no longer roll or commit transactions, but instead
> > > > > return -EAGAIN to have the calling routine roll and refresh the
> > > > > transaction.  In this series, xfs_attr_remove_args has become
> > > > > xfs_attr_remove_later, which uses a state machine to keep track
> > > > > of where it was when EAGAIN was returned.  xfs_attr_node_removename
> > > > > has also been modified to use the state machine, and a  new version of
> > > > > xfs_attr_remove_args consists of a simple loop to refresh the
> > > > > transaction until the operation is completed.
> > > > > 
> > > > > Signed-off-by: Allison Collins <allison.henderson@oracle.com>
> > > > > ---
> > > > 
> > > > On a cursory look, this is definitely more along the lines of what I was
> > > > thinking on the previous revisions. I would like to see if we can get a
> > > > bit more refactoring/cleanup before this point though. Further thoughts
> > > > inline..
> > > > 
> > > > >    fs/xfs/libxfs/xfs_attr.c | 123 +++++++++++++++++++++++++++++++++++++++--------
> > > > >    fs/xfs/libxfs/xfs_attr.h |   1 +
> > > > >    2 files changed, 104 insertions(+), 20 deletions(-)
> > > > > 
> > > > > diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> > > > > index 626d4a98..38d5c5c 100644
> > > > > --- a/fs/xfs/libxfs/xfs_attr.c
> > > > > +++ b/fs/xfs/libxfs/xfs_attr.c
> > > > > @@ -369,10 +369,56 @@ xfs_has_attr(
> > > > >     */
> > > > >    int
> > > > >    xfs_attr_remove_args(
> > > > > +	struct xfs_da_args	*args)
> > > > > +{
> > > > > +	int			error = 0;
> > > > > +	int			err2 = 0;
> > > > > +
> > > > > +	do {
> > > > > +		error = xfs_attr_remove_later(args);
> > > > > +		if (error && error != -EAGAIN)
> > > > > +			goto out;
> > > > 
> > > > xfs_attr_remove_later() strikes me as an odd name with respect to the
> > > > functionality. Perhaps something like xfs_attr_remove_step() is
> > > > (slightly) more accurate..?
> > > Sure that's fine.  I think Darrick had proposed the *_later scheme in an
> > > earlier review but that was when the code paths were split.  Darrick, does
> > > the *_step scheme work for you?
> > > 
> > 
> > FWIW, xfs_attr_remove_iter() also came to mind after sending the
> > previous mail.
> Alrighty, I am fine with a *_iter scheme then
> 
> > 
> > > > 
> > > > > +
> > > > > +		xfs_trans_log_inode(args->trans, args->dp,
> > > > > +			XFS_ILOG_CORE | XFS_ILOG_ADATA);
> > > > > +
> > > > > +		err2 = xfs_trans_roll(&args->trans);
> > > > > +		if (err2) {
> > > > > +			error = err2;
> > > > 
> > > > Also do we really need two error codes in this function? It seems like
> > > > we should be able to write this with one, but I haven't tried it..
> > > No, because then we'll loose the xfs_attr_remove_later return code, which is
> > > either 0 or EAGAIN at this point.  And we need that to drive the loop.  To
> > > get rid of err2, we'd need another "not_done" variable or something.  Like:
> > > 
> > > do {
> > > 	...
> > > 	not_done = (error == -EAGAIN);
> > > 	...
> > > } while (not_done)
> > > 
> > > 
> > > Not sure if not_done is really preferable to err2?
> > > 
> > 
> > Eh, NBD. It just looked like potentially verbose logic on a quick scan.
> > On a closer look, I see that we want to handle both error == 0 and error
> > == -EAGAIN, so this seems fine as is to me.
> > 
> > > > 
> > > > > +			goto out;
> > > > > +		}
> > > > > +
> > > > > +		/* Rejoin inode */
> > > > > +		xfs_trans_ijoin(args->trans, args->dp, 0);
> > > > > +
> > > > > +	} while (error == -EAGAIN);
> > > > > +out:
> > > > > +	return error;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * Remove the attribute specified in @args.
> > > > > + * This routine is meant to function as a delayed operation, and may return
> > > > > + * -EGAIN when the transaction needs to be rolled.  Calling functions will need
> > > > > + * to handle this, and recall the function until a successful error code is
> > > > > + * returned.
> > > > > + */
> > > > > +int
> > > > > +xfs_attr_remove_later(
> > > > >    	struct xfs_da_args      *args)
> > > > >    {
> > > > >    	struct xfs_inode	*dp = args->dp;
> > > > > -	int			error;
> > > > > +	int			error = 0;
> > > > > +
> > > > > +	/* State machine switch */
> > > > > +	switch (args->dc.dc_state) {
> > > > > +	case XFS_DC_RM_INVALIDATE:
> > > > > +	case XFS_DC_RM_SHRINK:
> > > > > +	case XFS_DC_RM_NODE_BLKS:
> > > > > +		goto node;
> > > > > +	default:
> > > > > +		break;
> > > > > +	}
> > > > >    	if (!xfs_inode_hasattr(dp)) {
> > > > >    		error = -ENOATTR;
> > > > > @@ -382,6 +428,7 @@ xfs_attr_remove_args(
> > > > >    	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
> > > > >    		error = xfs_attr_leaf_removename(args);
> > > > >    	} else {
> > > > > +node:
> > > > >    		error = xfs_attr_node_removename(args);
> > > > >    	}
> > > > > @@ -892,9 +939,6 @@ xfs_attr_leaf_removename(
> > > > >    		/* bp is gone due to xfs_da_shrink_inode */
> > > > >    		if (error)
> > > > >    			return error;
> > > > > -		error = xfs_defer_finish(&args->trans);
> > > > > -		if (error)
> > > > > -			return error;
> > > > >    	}
> > > > >    	return 0;
> > > > >    }
> > > > > @@ -1212,6 +1256,11 @@ xfs_attr_node_addname(
> > > > >     * This will involve walking down the Btree, and may involve joining
> > > > >     * leaf nodes and even joining intermediate nodes up to and including
> > > > >     * the root node (a special case of an intermediate node).
> > > > > + *
> > > > > + * This routine is meant to function as a delayed operation, and may return
> > > > > + * -EAGAIN when the transaction needs to be rolled.  Calling functions
> > > > > + * will need to handle this, and recall the function until a successful error
> > > > > + * code is returned.
> > > > >     */
> > > > >    STATIC int
> > > > >    xfs_attr_node_removename(
> > > > > @@ -1222,12 +1271,29 @@ xfs_attr_node_removename(
> > > > >    	struct xfs_buf		*bp;
> > > > >    	int			retval, error, forkoff;
> > > > >    	struct xfs_inode	*dp = args->dp;
> > > > > +	int			done = 0;
> > > > >    	trace_xfs_attr_node_removename(args);
> > > > > +	state = args->dc.da_state;
> > > > > +	blk = args->dc.blk;
> > > > > +
> > > > > +	/* State machine switch */
> > > > > +	switch (args->dc.dc_state) {
> > > > > +	case XFS_DC_RM_NODE_BLKS:
> > > > > +		goto rm_node_blks;
> > > > > +	case XFS_DC_RM_INVALIDATE:
> > > > > +		goto rm_invalidate;
> > > > > +	case XFS_DC_RM_SHRINK:
> > > > > +		goto rm_shrink;
> > > > > +	default:
> > > > > +		break;
> > > > 
> > > > I wonder if it's worth having an explicit state for the initial path.
> > > > That could be useful for readability and debuggability in the future.
> > > We could, it will just require to the calling function to set that before
> > > state calling it.
> > > 
> > 
> > Looking back, I see we have XFS_DC_INIT in the enum, but it isn't used.
> > What is that particular state for? Any reason the enum doesn't start
> > with a value of 0?
> Yes, sorry, it's used in the next patch series when we get into to the real
> delayed attr infrastructure.  I just forgot to take it out of the smaller
> subset here.
> 
> Basically all the use cases thus far already have args initialized, but in
> the log replay, we have to construct args from the log entry.  So we need
> the extra state to not re-initialize it every time and wipe out the state.
> That use case is little more unique in that we have to set up the state
> machine after the ping pong has already started.
> 
> > 
> > > Mechanically, I dont think it would hurt anything, but it may lead to
> > > developer wonderments like... "Where's the EAGAIN for this state?" "Shouldnt
> > > this state appear in the switch up top too?"  Or if it does "Why do we have
> > > it here, if it never executes?"  "I wonder if i should sent a patch to take
> > > it out..." :-)
> > > 
> > 
> > Perhaps, though some of those questions already exist with the current
> > code. Not every function cares about every state from what I can tell.
> > Also, not every state that can return -EAGAIN is guaranteed to do so.
> > 
> > > Puzzlement aside though, I cant quite think of what condition it would help
> > > to debug?  It's not an error for the statemachine to hold a value outside of
> > > the helpers scope.  It just means the caller was using it up to this point.
> > > Helpers really shouldnt have enough context about their callers to know or
> > > care what the caller states mean.  If we added a special init state, all the
> > > default statement would really mean is: "The caller forgot to set the init
> > > state".
> > > 
> > 
> > See my question above around the existing init state. However that is
> > intended to be used is not really clear to me. BTW, it might be better
> > to introduce the core structures in one patch and add the individual
> > states as they are used by the subsequent patches that use them.
> Ok, I will introduce the states progressively then, I think that caused the
> confusion
> 
> > 
> > WRT to readability/debuggability, I suppose a simple example would be to
> > consider if we had a tracepoint somewhere in the higher level code that
> > printed state and error code and we wanted to use that to identify an
> > unexpected error that occurs during an attr op. With the current code,
> > if that printed something like:
> > 
> > 	state INVALIDATE error -EIO
> > 
> > ... that doesn't tell us a whole lot about what happened beyond
> > something failed sometime after the setflag state. The attr itself might
> > not even have remote blocks to invalidate, which is slightly confusing.
> > This is handwavy/subjective, of course..
> Yes, as it is, it doesn't really have any way of detecting a corrupt state.
> Really though, if we want to do that what we need is another layer of
> modeling where we enforce the sequence of the states.  So for example, some
> sort of mapping or tree that defines: XFS_DC_LEAF_TO_NODE and
> XFS_DC_FOUND_LBLK as being children (or mappings) of XFS_DC_SF_TO_LEAF.
> 
> In other words, if the state is XFS_DC_SF_TO_LEAF, you can change it to
> XFS_DC_FOUND_LBLK or XFS_DC_LEAF_TO_NODE, but you can't go to say
> XFS_DC_RM_SHRINK, that would generate an error.  And then we have some sort
> of set_state() routine to enforce these rules.
> 
> That's a lot of extra overhead though, and could be more of a burden than a
> help.  But it's something we could explore if people think it's worth the
> pursuit.
> 

Yeah, I don't think it's necessary to go so far as to create a complex
set of rules and whatnot purely for the purpose of validating state
management. I'm just handwaving about potentially structuring the code
such that the state management is clear and easy to follow. :)

Brian

> Allison
> > 
> > > Thoughts?
> > > 
> > > > 
> > > > > +	}
> > > > >    	error = xfs_attr_node_hasname(args, &state);
> > > > >    	if (error != -EEXIST)
> > > > >    		goto out;
> > > > > +	else
> > > > > +		error = 0;
> > > > >    	/*
> > > > >    	 * If there is an out-of-line value, de-allocate the blocks.
> > > > > @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
> > > > >    	blk = &state->path.blk[ state->path.active-1 ];
> > > > >    	ASSERT(blk->bp != NULL);
> > > > >    	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
> > > > > +
> > > > > +	/*
> > > > > +	 * Store blk and state in the context incase we need to cycle out the
> > > > > +	 * transaction
> > > > > +	 */
> > > > > +	args->dc.blk = blk;
> > > > > +	args->dc.da_state = state;
> > > > > +
> > > > >    	if (args->rmtblkno > 0) {
> > > > >    		/*
> > > > >    		 * Fill in disk block numbers in the state structure
> > > > > @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
> > > > >    		if (error)
> > > > >    			goto out;
> > > > > -		error = xfs_trans_roll_inode(&args->trans, args->dp);
> > > > > +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
> > > > > +		return -EAGAIN;
> > > > > +rm_invalidate:
> > > > > +		error = xfs_attr_rmtval_invalidate(args);
> > > > >    		if (error)
> > > > >    			goto out;
> > > > > +rm_node_blks:
> > > > 
> > > > While I think the design is the right idea, jumping down into a function
> > > > like this is pretty hairy. I think we should try to further break this
> > > > function down into smaller elements one way or another that model the
> > > > steps defined by the state structure. There's probably multiple ways to
> > > > do that. For example, the remote attr bits could be broken down into
> > > > a subfunction that processes the couple of states associated with remote
> > > > blocks. That said, ISTM it might be wiser to try and keep the state
> > > > processing in one place if possible. That would imply to break the
> > > > remote processing loop down into a couple functions. All in all, this
> > > > function might end up looking something like:
> > > > 
> > > > xfs_attr_node_removename()
> > > > {
> > > > 	/* switch statement and comment to document each state */
> > > > 
> > > > 	error = xfs_attr_node_hasname(args, &state);
> > > > 	...
> > > > 
> > > > 	if (remote) {
> > > > 		error = do_setflag();
> > > > 		if (error)
> > > > 			return error;
> > > > 
> > > > 		/* roll */
> > > > 		state = INVALIDATE;
> > > > 		return -EAGAIN;
> > > > 	}
> > > > 
> > > > rmt_invalidate:
> > > > 	state = INVALIDATE;
> > > > 	if (remote)
> > > > 		do_invalidate();
> > > > 	/* fallthru */
> > > > 
> > > > rmt_rm_blks:
> > > > 	state = RM_NODE_BLKS;
> > > > 	if (remote) {
> > > > 		/* loops and returns -EAGAIN until we fallthru */
> > > > 		error = rmt_remove_step();
> > > > 		if (error)
> > > > 			return error;
> > > > 
> > > > 		xfs_attr_refillstate();
> > > > 	}
> > > > 
> > > > /* maybe worth a new state here? */
> > > > removename:
> > > > 	state = REMOVENAME;
> > > > 	xfs_attr3_leaf_remove();
> > > > 	...
> > > > 	if (...) {
> > > > 		state = SHRINK;
> > > > 		return -EAGAIN;
> > > > 	}
> > > > 
> > > > shrink:
> > > > 	state = SHRINK;
> > > > 	error = do_shrink();
> > > > 
> > > > 	return 0;
> > > > }
> > > Ok, I had to go over this a few times, but I think I understand what you're
> > > describing.  Will update in the next version
> > > > 
> > > > I'm not totally sold on the idea of rolling the state forward explicitly
> > > > like this, but it seems like it could be a bit more maintainable.
> > > I think it is.  Having a dedicated struct just for this purpose alleviates a
> > > lot of struggle with trying to grab onto things like the fork or the
> > > incomplete flags to represent what we're trying to do here. Doing so also
> > > overloads their original intent in that if these structures ever change in
> > > the future, it may break something that the state machine depends on.  In
> > > this solution, they remain disjoint concepts dedicated to their purpose.
> > > And anyway, I couldn't completely escape the state machine in the previous
> > > set.  I still had to add the extra flag space which functioned more or less
> > > like "i was here" tick marks.  If we have to have it, we may as well
> > > leverage what it can do. For example I can drop patch 11 from this set
> > > because I don't need the extra isset helpers to see if it's already been
> > > done.
> > > 
> > 
> > Right.. I agree that the "bookmark" like approach in the current scheme
> > makes the state implementation (and not necessarily the operational
> > implementation) a little hard to follow and review. Note again that what
> > I wrote up here was just a quick example for that higher level feedback
> > of somehow or another trying to isolate state updates from state
> > implementation, so I'm not necessarily tied to that specific approach if
> > there are other ways to similarly simplify things.
> > 
> > I do think fixing up the code to avoid jumping into loops and whatnot is
> > more important. It could also be that just continuing to break things
> > down into as small functions as possible (i.e. with a goal of 1 function
> > per state) kind of forces a natural separation.
> > 
> > Brian
> > 
> > >   All in
> > > > all this is still fairly ugly, but this is mostly a mechanical attempt
> > > > to keep state management isolated and we can polish it up from there.
> > > > Thoughts?
> > > 
> > > Yes, at this point, I do kind of feel like it's the least of the ugly
> > > prototypes.  So I'm just kind of proceeding, with caution. :-)
> > > 
> > > Thanks for the in depths reviews!!  I know its a lot!  Much appreciated!!
> > > 
> > > Allison
> > > 
> > > > 
> > > > Brian
> > > > 
> > > > > +		/*
> > > > > +		 * Unmap value blocks for this attr.  This is similar to
> > > > > +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
> > > > > +		 * for new transactions
> > > > > +		 */
> > > > > +		while (!done && !error) {
> > > > > +			error = xfs_bunmapi(args->trans, args->dp,
> > > > > +				    args->rmtblkno, args->rmtblkcnt,
> > > > > +				    XFS_BMAPI_ATTRFORK, 1, &done);
> > > > > +			if (error)
> > > > > +				return error;
> > > > > -		error = xfs_attr_rmtval_remove(args);
> > > > > -		if (error)
> > > > > -			goto out;
> > > > > +			if (!done) {
> > > > > +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
> > > > > +				return -EAGAIN;
> > > > > +			}
> > > > > +		}
> > > > >    		/*
> > > > >    		 * Refill the state structure with buffers, the prior calls
> > > > > @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
> > > > >    		error = xfs_da3_join(state);
> > > > >    		if (error)
> > > > >    			goto out;
> > > > > -		error = xfs_defer_finish(&args->trans);
> > > > > -		if (error)
> > > > > -			goto out;
> > > > > -		/*
> > > > > -		 * Commit the Btree join operation and start a new trans.
> > > > > -		 */
> > > > > -		error = xfs_trans_roll_inode(&args->trans, dp);
> > > > > -		if (error)
> > > > > -			goto out;
> > > > > +
> > > > > +		args->dc.dc_state = XFS_DC_RM_SHRINK;
> > > > > +		return -EAGAIN;
> > > > >    	}
> > > > > +rm_shrink:
> > > > >    	/*
> > > > >    	 * If the result is small enough, push it all into the inode.
> > > > >    	 */
> > > > > @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
> > > > >    			/* bp is gone due to xfs_da_shrink_inode */
> > > > >    			if (error)
> > > > >    				goto out;
> > > > > -			error = xfs_defer_finish(&args->trans);
> > > > > -			if (error)
> > > > > -				goto out;
> > > > >    		} else
> > > > >    			xfs_trans_brelse(args->trans, bp);
> > > > >    	}
> > > > > diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
> > > > > index 3b5dad4..fb8bf5b 100644
> > > > > --- a/fs/xfs/libxfs/xfs_attr.h
> > > > > +++ b/fs/xfs/libxfs/xfs_attr.h
> > > > > @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
> > > > >    int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
> > > > >    int xfs_has_attr(struct xfs_da_args *args);
> > > > >    int xfs_attr_remove_args(struct xfs_da_args *args);
> > > > > +int xfs_attr_remove_later(struct xfs_da_args *args);
> > > > >    int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
> > > > >    		  int flags, struct attrlist_cursor_kern *cursor);
> > > > >    bool xfs_attr_namecheck(const void *name, size_t length);
> > > > > -- 
> > > > > 2.7.4
> > > > > 
> > > > 
> > > 
> > 
>
Allison Henderson Nov. 14, 2019, 5:58 p.m. UTC | #8
On 11/14/19 5:48 AM, Brian Foster wrote:
> On Wed, Nov 13, 2019 at 04:39:18PM -0700, Allison Collins wrote:
>>
>>
>> On 11/13/19 4:54 AM, Brian Foster wrote:

<snip>

>> Yes, as it is, it doesn't really have any way of detecting a corrupt state.
>> Really though, if we want to do that what we need is another layer of
>> modeling where we enforce the sequence of the states.  So for example, some
>> sort of mapping or tree that defines: XFS_DC_LEAF_TO_NODE and
>> XFS_DC_FOUND_LBLK as being children (or mappings) of XFS_DC_SF_TO_LEAF.
>>
>> In other words, if the state is XFS_DC_SF_TO_LEAF, you can change it to
>> XFS_DC_FOUND_LBLK or XFS_DC_LEAF_TO_NODE, but you can't go to say
>> XFS_DC_RM_SHRINK, that would generate an error.  And then we have some sort
>> of set_state() routine to enforce these rules.
>>
>> That's a lot of extra overhead though, and could be more of a burden than a
>> help.  But it's something we could explore if people think it's worth the
>> pursuit.
>>
> 
> Yeah, I don't think it's necessary to go so far as to create a complex
> set of rules and whatnot purely for the purpose of validating state
> management. I'm just handwaving about potentially structuring the code
> such that the state management is clear and easy to follow. :)
> 
> Brian

Yeah, that's kinda how I felt too.  Maybe later we could come back and 
reconsider it depending on how the refactoring goes.  I've done a little 
research online around about state machine designs, and usually what I 
see is some sort of mapping of state codes to function pointers.  So if 
it's possible to modularize everything into helpers, then an extra layer 
of abstraction starts to make a lot more sense.  But what we wouldn't 
want is so much refactoring that we have a bunch of little helpers so 
small and fragmented that the code flow is a headache to follow.  So I 
figure maybe for now we could just stick to refactoring snippets under 
the switch approach, and if it starts to look like something that could 
really benefit from a fully formed state machine model, we can consider 
it then :-)

Allison

> 
>> Allison
>>>
>>>> Thoughts?
>>>>
>>>>>
>>>>>> +	}
>>>>>>     	error = xfs_attr_node_hasname(args, &state);
>>>>>>     	if (error != -EEXIST)
>>>>>>     		goto out;
>>>>>> +	else
>>>>>> +		error = 0;
>>>>>>     	/*
>>>>>>     	 * If there is an out-of-line value, de-allocate the blocks.
>>>>>> @@ -1237,6 +1303,14 @@ xfs_attr_node_removename(
>>>>>>     	blk = &state->path.blk[ state->path.active-1 ];
>>>>>>     	ASSERT(blk->bp != NULL);
>>>>>>     	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * Store blk and state in the context incase we need to cycle out the
>>>>>> +	 * transaction
>>>>>> +	 */
>>>>>> +	args->dc.blk = blk;
>>>>>> +	args->dc.da_state = state;
>>>>>> +
>>>>>>     	if (args->rmtblkno > 0) {
>>>>>>     		/*
>>>>>>     		 * Fill in disk block numbers in the state structure
>>>>>> @@ -1255,13 +1329,30 @@ xfs_attr_node_removename(
>>>>>>     		if (error)
>>>>>>     			goto out;
>>>>>> -		error = xfs_trans_roll_inode(&args->trans, args->dp);
>>>>>> +		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
>>>>>> +		return -EAGAIN;
>>>>>> +rm_invalidate:
>>>>>> +		error = xfs_attr_rmtval_invalidate(args);
>>>>>>     		if (error)
>>>>>>     			goto out;
>>>>>> +rm_node_blks:
>>>>>
>>>>> While I think the design is the right idea, jumping down into a function
>>>>> like this is pretty hairy. I think we should try to further break this
>>>>> function down into smaller elements one way or another that model the
>>>>> steps defined by the state structure. There's probably multiple ways to
>>>>> do that. For example, the remote attr bits could be broken down into
>>>>> a subfunction that processes the couple of states associated with remote
>>>>> blocks. That said, ISTM it might be wiser to try and keep the state
>>>>> processing in one place if possible. That would imply to break the
>>>>> remote processing loop down into a couple functions. All in all, this
>>>>> function might end up looking something like:
>>>>>
>>>>> xfs_attr_node_removename()
>>>>> {
>>>>> 	/* switch statement and comment to document each state */
>>>>>
>>>>> 	error = xfs_attr_node_hasname(args, &state);
>>>>> 	...
>>>>>
>>>>> 	if (remote) {
>>>>> 		error = do_setflag();
>>>>> 		if (error)
>>>>> 			return error;
>>>>>
>>>>> 		/* roll */
>>>>> 		state = INVALIDATE;
>>>>> 		return -EAGAIN;
>>>>> 	}
>>>>>
>>>>> rmt_invalidate:
>>>>> 	state = INVALIDATE;
>>>>> 	if (remote)
>>>>> 		do_invalidate();
>>>>> 	/* fallthru */
>>>>>
>>>>> rmt_rm_blks:
>>>>> 	state = RM_NODE_BLKS;
>>>>> 	if (remote) {
>>>>> 		/* loops and returns -EAGAIN until we fallthru */
>>>>> 		error = rmt_remove_step();
>>>>> 		if (error)
>>>>> 			return error;
>>>>>
>>>>> 		xfs_attr_refillstate();
>>>>> 	}
>>>>>
>>>>> /* maybe worth a new state here? */
>>>>> removename:
>>>>> 	state = REMOVENAME;
>>>>> 	xfs_attr3_leaf_remove();
>>>>> 	...
>>>>> 	if (...) {
>>>>> 		state = SHRINK;
>>>>> 		return -EAGAIN;
>>>>> 	}
>>>>>
>>>>> shrink:
>>>>> 	state = SHRINK;
>>>>> 	error = do_shrink();
>>>>>
>>>>> 	return 0;
>>>>> }
>>>> Ok, I had to go over this a few times, but I think I understand what you're
>>>> describing.  Will update in the next version
>>>>>
>>>>> I'm not totally sold on the idea of rolling the state forward explicitly
>>>>> like this, but it seems like it could be a bit more maintainable.
>>>> I think it is.  Having a dedicated struct just for this purpose alleviates a
>>>> lot of struggle with trying to grab onto things like the fork or the
>>>> incomplete flags to represent what we're trying to do here. Doing so also
>>>> overloads their original intent in that if these structures ever change in
>>>> the future, it may break something that the state machine depends on.  In
>>>> this solution, they remain disjoint concepts dedicated to their purpose.
>>>> And anyway, I couldn't completely escape the state machine in the previous
>>>> set.  I still had to add the extra flag space which functioned more or less
>>>> like "i was here" tick marks.  If we have to have it, we may as well
>>>> leverage what it can do. For example I can drop patch 11 from this set
>>>> because I don't need the extra isset helpers to see if it's already been
>>>> done.
>>>>
>>>
>>> Right.. I agree that the "bookmark" like approach in the current scheme
>>> makes the state implementation (and not necessarily the operational
>>> implementation) a little hard to follow and review. Note again that what
>>> I wrote up here was just a quick example for that higher level feedback
>>> of somehow or another trying to isolate state updates from state
>>> implementation, so I'm not necessarily tied to that specific approach if
>>> there are other ways to similarly simplify things.
>>>
>>> I do think fixing up the code to avoid jumping into loops and whatnot is
>>> more important. It could also be that just continuing to break things
>>> down into as small functions as possible (i.e. with a goal of 1 function
>>> per state) kind of forces a natural separation.
>>>
>>> Brian
>>>
>>>>    All in
>>>>> all this is still fairly ugly, but this is mostly a mechanical attempt
>>>>> to keep state management isolated and we can polish it up from there.
>>>>> Thoughts?
>>>>
>>>> Yes, at this point, I do kind of feel like it's the least of the ugly
>>>> prototypes.  So I'm just kind of proceeding, with caution. :-)
>>>>
>>>> Thanks for the in depths reviews!!  I know its a lot!  Much appreciated!!
>>>>
>>>> Allison
>>>>
>>>>>
>>>>> Brian
>>>>>
>>>>>> +		/*
>>>>>> +		 * Unmap value blocks for this attr.  This is similar to
>>>>>> +		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
>>>>>> +		 * for new transactions
>>>>>> +		 */
>>>>>> +		while (!done && !error) {
>>>>>> +			error = xfs_bunmapi(args->trans, args->dp,
>>>>>> +				    args->rmtblkno, args->rmtblkcnt,
>>>>>> +				    XFS_BMAPI_ATTRFORK, 1, &done);
>>>>>> +			if (error)
>>>>>> +				return error;
>>>>>> -		error = xfs_attr_rmtval_remove(args);
>>>>>> -		if (error)
>>>>>> -			goto out;
>>>>>> +			if (!done) {
>>>>>> +				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
>>>>>> +				return -EAGAIN;
>>>>>> +			}
>>>>>> +		}
>>>>>>     		/*
>>>>>>     		 * Refill the state structure with buffers, the prior calls
>>>>>> @@ -1287,17 +1378,12 @@ xfs_attr_node_removename(
>>>>>>     		error = xfs_da3_join(state);
>>>>>>     		if (error)
>>>>>>     			goto out;
>>>>>> -		error = xfs_defer_finish(&args->trans);
>>>>>> -		if (error)
>>>>>> -			goto out;
>>>>>> -		/*
>>>>>> -		 * Commit the Btree join operation and start a new trans.
>>>>>> -		 */
>>>>>> -		error = xfs_trans_roll_inode(&args->trans, dp);
>>>>>> -		if (error)
>>>>>> -			goto out;
>>>>>> +
>>>>>> +		args->dc.dc_state = XFS_DC_RM_SHRINK;
>>>>>> +		return -EAGAIN;
>>>>>>     	}
>>>>>> +rm_shrink:
>>>>>>     	/*
>>>>>>     	 * If the result is small enough, push it all into the inode.
>>>>>>     	 */
>>>>>> @@ -1319,9 +1405,6 @@ xfs_attr_node_removename(
>>>>>>     			/* bp is gone due to xfs_da_shrink_inode */
>>>>>>     			if (error)
>>>>>>     				goto out;
>>>>>> -			error = xfs_defer_finish(&args->trans);
>>>>>> -			if (error)
>>>>>> -				goto out;
>>>>>>     		} else
>>>>>>     			xfs_trans_brelse(args->trans, bp);
>>>>>>     	}
>>>>>> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
>>>>>> index 3b5dad4..fb8bf5b 100644
>>>>>> --- a/fs/xfs/libxfs/xfs_attr.h
>>>>>> +++ b/fs/xfs/libxfs/xfs_attr.h
>>>>>> @@ -152,6 +152,7 @@ int xfs_attr_set_args(struct xfs_da_args *args);
>>>>>>     int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
>>>>>>     int xfs_has_attr(struct xfs_da_args *args);
>>>>>>     int xfs_attr_remove_args(struct xfs_da_args *args);
>>>>>> +int xfs_attr_remove_later(struct xfs_da_args *args);
>>>>>>     int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
>>>>>>     		  int flags, struct attrlist_cursor_kern *cursor);
>>>>>>     bool xfs_attr_namecheck(const void *name, size_t length);
>>>>>> -- 
>>>>>> 2.7.4
>>>>>>
>>>>>
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 626d4a98..38d5c5c 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -369,10 +369,56 @@  xfs_has_attr(
  */
 int
 xfs_attr_remove_args(
+	struct xfs_da_args	*args)
+{
+	int			error = 0;
+	int			err2 = 0;
+
+	do {
+		error = xfs_attr_remove_later(args);
+		if (error && error != -EAGAIN)
+			goto out;
+
+		xfs_trans_log_inode(args->trans, args->dp,
+			XFS_ILOG_CORE | XFS_ILOG_ADATA);
+
+		err2 = xfs_trans_roll(&args->trans);
+		if (err2) {
+			error = err2;
+			goto out;
+		}
+
+		/* Rejoin inode */
+		xfs_trans_ijoin(args->trans, args->dp, 0);
+
+	} while (error == -EAGAIN);
+out:
+	return error;
+}
+
+/*
+ * Remove the attribute specified in @args.
+ * This routine is meant to function as a delayed operation, and may return
+ * -EGAIN when the transaction needs to be rolled.  Calling functions will need
+ * to handle this, and recall the function until a successful error code is
+ * returned.
+ */
+int
+xfs_attr_remove_later(
 	struct xfs_da_args      *args)
 {
 	struct xfs_inode	*dp = args->dp;
-	int			error;
+	int			error = 0;
+
+	/* State machine switch */
+	switch (args->dc.dc_state) {
+	case XFS_DC_RM_INVALIDATE:
+	case XFS_DC_RM_SHRINK:
+	case XFS_DC_RM_NODE_BLKS:
+		goto node;
+	default:
+		break;
+	}
 
 	if (!xfs_inode_hasattr(dp)) {
 		error = -ENOATTR;
@@ -382,6 +428,7 @@  xfs_attr_remove_args(
 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
 		error = xfs_attr_leaf_removename(args);
 	} else {
+node:
 		error = xfs_attr_node_removename(args);
 	}
 
@@ -892,9 +939,6 @@  xfs_attr_leaf_removename(
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
 			return error;
-		error = xfs_defer_finish(&args->trans);
-		if (error)
-			return error;
 	}
 	return 0;
 }
@@ -1212,6 +1256,11 @@  xfs_attr_node_addname(
  * This will involve walking down the Btree, and may involve joining
  * leaf nodes and even joining intermediate nodes up to and including
  * the root node (a special case of an intermediate node).
+ *
+ * This routine is meant to function as a delayed operation, and may return
+ * -EAGAIN when the transaction needs to be rolled.  Calling functions
+ * will need to handle this, and recall the function until a successful error
+ * code is returned.
  */
 STATIC int
 xfs_attr_node_removename(
@@ -1222,12 +1271,29 @@  xfs_attr_node_removename(
 	struct xfs_buf		*bp;
 	int			retval, error, forkoff;
 	struct xfs_inode	*dp = args->dp;
+	int			done = 0;
 
 	trace_xfs_attr_node_removename(args);
+	state = args->dc.da_state;
+	blk = args->dc.blk;
+
+	/* State machine switch */
+	switch (args->dc.dc_state) {
+	case XFS_DC_RM_NODE_BLKS:
+		goto rm_node_blks;
+	case XFS_DC_RM_INVALIDATE:
+		goto rm_invalidate;
+	case XFS_DC_RM_SHRINK:
+		goto rm_shrink;
+	default:
+		break;
+	}
 
 	error = xfs_attr_node_hasname(args, &state);
 	if (error != -EEXIST)
 		goto out;
+	else
+		error = 0;
 
 	/*
 	 * If there is an out-of-line value, de-allocate the blocks.
@@ -1237,6 +1303,14 @@  xfs_attr_node_removename(
 	blk = &state->path.blk[ state->path.active-1 ];
 	ASSERT(blk->bp != NULL);
 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+
+	/*
+	 * Store blk and state in the context incase we need to cycle out the
+	 * transaction
+	 */
+	args->dc.blk = blk;
+	args->dc.da_state = state;
+
 	if (args->rmtblkno > 0) {
 		/*
 		 * Fill in disk block numbers in the state structure
@@ -1255,13 +1329,30 @@  xfs_attr_node_removename(
 		if (error)
 			goto out;
 
-		error = xfs_trans_roll_inode(&args->trans, args->dp);
+		args->dc.dc_state = XFS_DC_RM_INVALIDATE;
+		return -EAGAIN;
+rm_invalidate:
+		error = xfs_attr_rmtval_invalidate(args);
 		if (error)
 			goto out;
+rm_node_blks:
+		/*
+		 * Unmap value blocks for this attr.  This is similar to
+		 * xfs_attr_rmtval_remove, but open coded here to return EAGAIN
+		 * for new transactions
+		 */
+		while (!done && !error) {
+			error = xfs_bunmapi(args->trans, args->dp,
+				    args->rmtblkno, args->rmtblkcnt,
+				    XFS_BMAPI_ATTRFORK, 1, &done);
+			if (error)
+				return error;
 
-		error = xfs_attr_rmtval_remove(args);
-		if (error)
-			goto out;
+			if (!done) {
+				args->dc.dc_state = XFS_DC_RM_NODE_BLKS;
+				return -EAGAIN;
+			}
+		}
 
 		/*
 		 * Refill the state structure with buffers, the prior calls
@@ -1287,17 +1378,12 @@  xfs_attr_node_removename(
 		error = xfs_da3_join(state);
 		if (error)
 			goto out;
-		error = xfs_defer_finish(&args->trans);
-		if (error)
-			goto out;
-		/*
-		 * Commit the Btree join operation and start a new trans.
-		 */
-		error = xfs_trans_roll_inode(&args->trans, dp);
-		if (error)
-			goto out;
+
+		args->dc.dc_state = XFS_DC_RM_SHRINK;
+		return -EAGAIN;
 	}
 
+rm_shrink:
 	/*
 	 * If the result is small enough, push it all into the inode.
 	 */
@@ -1319,9 +1405,6 @@  xfs_attr_node_removename(
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
 				goto out;
-			error = xfs_defer_finish(&args->trans);
-			if (error)
-				goto out;
 		} else
 			xfs_trans_brelse(args->trans, bp);
 	}
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 3b5dad4..fb8bf5b 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -152,6 +152,7 @@  int xfs_attr_set_args(struct xfs_da_args *args);
 int xfs_attr_remove(struct xfs_inode *dp, struct xfs_name *name, int flags);
 int xfs_has_attr(struct xfs_da_args *args);
 int xfs_attr_remove_args(struct xfs_da_args *args);
+int xfs_attr_remove_later(struct xfs_da_args *args);
 int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
 		  int flags, struct attrlist_cursor_kern *cursor);
 bool xfs_attr_namecheck(const void *name, size_t length);