diff mbox series

[v21,06/13] xfs: Implement attr logging and replay

Message ID 20210707222111.16339-7-allison.henderson@oracle.com (mailing list archive)
State Superseded
Headers show
Series Delayed Attributes | expand

Commit Message

Allison Henderson July 7, 2021, 10:21 p.m. UTC
This patch adds the needed routines to create, log and replay attr
intents

Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c  |   1 +
 fs/xfs/libxfs/xfs_defer.h  |   1 +
 fs/xfs/libxfs/xfs_format.h |   4 +-
 fs/xfs/xfs_attr_item.c     | 394 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 399 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong July 10, 2021, 1:08 a.m. UTC | #1
On Wed, Jul 07, 2021 at 03:21:04PM -0700, Allison Henderson wrote:
> This patch adds the needed routines to create, log and replay attr
> intents

I might call that last part "...and recover logged extended attribute
intents."

> 
> Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_defer.c  |   1 +
>  fs/xfs/libxfs/xfs_defer.h  |   1 +
>  fs/xfs/libxfs/xfs_format.h |   4 +-
>  fs/xfs/xfs_attr_item.c     | 394 +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 399 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
> index eff4a12..e9caff7 100644
> --- a/fs/xfs/libxfs/xfs_defer.c
> +++ b/fs/xfs/libxfs/xfs_defer.c
> @@ -178,6 +178,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
>  	[XFS_DEFER_OPS_TYPE_RMAP]	= &xfs_rmap_update_defer_type,
>  	[XFS_DEFER_OPS_TYPE_FREE]	= &xfs_extent_free_defer_type,
>  	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
> +	[XFS_DEFER_OPS_TYPE_ATTR]	= &xfs_attr_defer_type,
>  };
>  
>  static void
> diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
> index 0ed9dfa..72a5789 100644
> --- a/fs/xfs/libxfs/xfs_defer.h
> +++ b/fs/xfs/libxfs/xfs_defer.h
> @@ -19,6 +19,7 @@ enum xfs_defer_ops_type {
>  	XFS_DEFER_OPS_TYPE_RMAP,
>  	XFS_DEFER_OPS_TYPE_FREE,
>  	XFS_DEFER_OPS_TYPE_AGFL_FREE,
> +	XFS_DEFER_OPS_TYPE_ATTR,
>  	XFS_DEFER_OPS_TYPE_MAX,
>  };
>  
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index 3a4da111..477e815 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -485,7 +485,9 @@ xfs_sb_has_incompat_feature(
>  	return (sbp->sb_features_incompat & feature) != 0;
>  }
>  
> -#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
> +#define XFS_SB_FEAT_INCOMPAT_LOG_DELATTR   (1 << 0)	/* Delayed Attributes */
> +#define XFS_SB_FEAT_INCOMPAT_LOG_ALL \
> +	(XFS_SB_FEAT_INCOMPAT_LOG_DELATTR)
>  #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
>  static inline bool
>  xfs_sb_has_incompat_log_feature(
> diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
> index c9033e2..eda6ae3 100644
> --- a/fs/xfs/xfs_attr_item.c
> +++ b/fs/xfs/xfs_attr_item.c
> @@ -282,6 +282,183 @@ xfs_attrd_item_release(
>  	xfs_attrd_item_free(attrdp);
>  }
>  
> +/*
> + * Performs one step of an attribute update intent and marks the attrd item
> + * dirty..  An attr operation may be a set or a remove.  Note that the
> + * transaction is marked dirty regardless of whether the operation succeeds or
> + * fails to support the ATTRI/ATTRD lifecycle rules.
> + */
> +int
> +xfs_trans_attr_finish_update(
> +	struct xfs_delattr_context	*dac,
> +	struct xfs_attrd_log_item	*attrdp,
> +	struct xfs_buf			**leaf_bp,
> +	uint32_t			op_flags)
> +{
> +	struct xfs_da_args		*args = dac->da_args;
> +	int				error;
> +
> +	error = xfs_qm_dqattach_locked(args->dp, 0);

Hmm, this function is called at runtime and recovery time.  It's
/really/ late to be attaching dquots.

OH, this is for the recovery case, isn't it?

Hmm, xfs_attri_item_recover really ought to share the same
iget/dqattach/set-irecovery thing that bmap and swapext use.  I should
really post that cleanup as a patch.

> +	if (error)
> +		return error;
> +
> +	switch (op_flags) {

op_flags ought to be masked with XFS_ATTR_OP_FLAGS_TYPE_MASK.

> +	case XFS_ATTR_OP_FLAGS_SET:
> +		args->op_flags |= XFS_DA_OP_ADDNAME;
> +		error = xfs_attr_set_iter(dac, leaf_bp);
> +		break;
> +	case XFS_ATTR_OP_FLAGS_REMOVE:
> +		ASSERT(XFS_IFORK_Q(args->dp));
> +		error = xfs_attr_remove_iter(dac);
> +		break;
> +	default:
> +		error = -EFSCORRUPTED;
> +		break;
> +	}
> +
> +	/*
> +	 * Mark the transaction dirty, even on error. This ensures the
> +	 * transaction is aborted, which:
> +	 *
> +	 * 1.) releases the ATTRI and frees the ATTRD
> +	 * 2.) shuts down the filesystem
> +	 */
> +	args->trans->t_flags |= XFS_TRANS_DIRTY;
> +
> +	/*
> +	 * attr intent/done items are null when delayed attributes are disabled
> +	 */
> +	if (attrdp)
> +		set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
> +
> +	return error;
> +}
> +
> +/* Log an attr to the intent item. */
> +STATIC void
> +xfs_attr_log_item(
> +	struct xfs_trans		*tp,
> +	struct xfs_attri_log_item	*attrip,
> +	struct xfs_attr_item		*attr)
> +{
> +	struct xfs_attri_log_format	*attrp;
> +
> +	tp->t_flags |= XFS_TRANS_DIRTY;
> +	set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
> +
> +	/*
> +	 * At this point the xfs_attr_item has been constructed, and we've
> +	 * created the log intent. Fill in the attri log item and log format
> +	 * structure with fields from this xfs_attr_item
> +	 */
> +	attrp = &attrip->attri_format;
> +	attrp->alfi_ino = attr->xattri_dac.da_args->dp->i_ino;
> +	attrp->alfi_op_flags = attr->xattri_op_flags;
> +	attrp->alfi_value_len = attr->xattri_dac.da_args->valuelen;
> +	attrp->alfi_name_len = attr->xattri_dac.da_args->namelen;
> +	attrp->alfi_attr_flags = attr->xattri_dac.da_args->attr_filter;
> +
> +	attrip->attri_name = (void *)attr->xattri_dac.da_args->name;
> +	attrip->attri_value = attr->xattri_dac.da_args->value;
> +	attrip->attri_name_len = attr->xattri_dac.da_args->namelen;
> +	attrip->attri_value_len = attr->xattri_dac.da_args->valuelen;
> +}
> +
> +/* Get an ATTRI. */
> +static struct xfs_log_item *
> +xfs_attr_create_intent(
> +	struct xfs_trans		*tp,
> +	struct list_head		*items,
> +	unsigned int			count,
> +	bool				sort)
> +{
> +	struct xfs_mount		*mp = tp->t_mountp;
> +	struct xfs_attri_log_item	*attrip;
> +	struct xfs_attr_item		*attr;
> +
> +	ASSERT(count == 1);
> +
> +	if (!xfs_hasdelattr(mp))
> +		return NULL;
> +
> +	xfs_sb_add_incompat_log_features(&mp->m_sb,
> +					 XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);

Um, I think I see a bug here--

I don't see anything in this series that calls xfs_add_incompat_log_feature.

Adding a log incompat feature on a filesystem is a tricky dance, because
one has to write the primary superblock (which means logging the sb
change, forcing the log, and pushing the AIL) /before/ logging the intent.

All this is to prevent an old kernel from tripping over a logged item
that it doesn't understand, because an earlier recovered transaction
contained the bit it needed to conclude that it shouldn't have tried to
recover.


/*
 * Get permission to use log-assisted atomic exchange of file extents.
 *
 * Callers must not be running any transactions or hold any inode locks, and
 * they must release the permission by calling xlog_drop_incompat_feat
 * when they're done.
 */
int
xfs_attr_use_log_assist(
	struct xfs_mount	*mp)
{
	int			error = 0;

	/*
	 * Protect ourselves from an idle log clearing the logged xattrs
	 * log incompat feature bit.
	 */
	xlog_use_incompat_feat(mp->m_log);

	/*
	 * If log-assisted xattrs are already enabled, the caller can use the
	 * log assisted swap functions with the log-incompat reference we got.
	 */
	if (xfs_sb_version_hasdelattr(&mp->m_sb))
		return 0;

	/* Enable log-assisted xattrs. */
	xfs_warn_once(mp,
 "EXPERIMENTAL logged extended attributes feature added. Use at your own risk!");
	error = xfs_add_incompat_log_feature(mp,
			XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);
	if (error)
		goto drop_incompat;

	return 0;
drop_incompat:
	xlog_drop_incompat_feat(mp->m_log);
	return error;
}

...and then in xfs_attr_set, you need something like:

	/* Signal the log that we want to use logged xattrs. */
	use_log_items = xfs_hasdelattr(mp);
	if (use_log_items) {
		error = xfs_attr_use_log_assist(mp);
		if (error)
			goto barf;
	}

	error = xfs_trans_alloc(..., &tp);

	if (valuelen > 0)
		error = xfs_attr_set_args(...);
	else
		error = xfs_attr_remove_args(...);

	error = xfs_trans_commit(...);

	/* Signal the log that we're done with using logged xattrs. */
	if (use_log_items)
		xlog_drop_incompat_feat(mp->m_log);

The xfs_add_incompat_log_feature function ensures that the log has been
pushed out to disk and that the ondisk primary superblock has been
written (manually!) with the new incompat feature set.  The
xlog_use_incompat_feat prevents the log from clearing the feature bit
until we're done using it.

Once xfs_attr_use_log_assist completes, we know that it is safe to begin
using deferred xattrs, because there's no possible way that an old
kernel can screw things up.

> +
> +	attrip = xfs_attri_init(mp, 0);
> +	if (attrip == NULL)
> +		return NULL;
> +
> +	xfs_trans_add_item(tp, &attrip->attri_item);
> +	list_for_each_entry(attr, items, xattri_list)
> +		xfs_attr_log_item(tp, attrip, attr);
> +	return &attrip->attri_item;
> +}
> +
> +/* Process an attr. */
> +STATIC int
> +xfs_attr_finish_item(
> +	struct xfs_trans		*tp,
> +	struct xfs_log_item		*done,
> +	struct list_head		*item,
> +	struct xfs_btree_cur		**state)
> +{
> +	struct xfs_attr_item		*attr;
> +	struct xfs_attrd_log_item	*done_item = NULL;
> +	int				error;
> +	struct xfs_delattr_context	*dac;
> +
> +	attr = container_of(item, struct xfs_attr_item, xattri_list);
> +	dac = &attr->xattri_dac;
> +	if (done)
> +		done_item = ATTRD_ITEM(done);
> +
> +	/*
> +	 * Corner case that can happen during a recovery.  Because the first
> +	 * iteration of a multi part delay op happens in xfs_attri_item_recover
> +	 * to maintain the order of the log replay items.  But the new
> +	 * transactions do not automatically rejoin during a recovery as they do
> +	 * in a standard delay op, so we need to catch this here and rejoin the
> +	 * leaf to the new transaction
> +	 */
> +	if (attr->xattri_dac.leaf_bp &&
> +	    attr->xattri_dac.leaf_bp->b_transp != tp) {
> +		xfs_trans_bjoin(tp, attr->xattri_dac.leaf_bp);
> +		xfs_trans_bhold(tp, attr->xattri_dac.leaf_bp);
> +	}
> +
> +	/*
> +	 * Always reset trans after EAGAIN cycle
> +	 * since the transaction is new
> +	 */
> +	dac->da_args->trans = tp;
> +
> +	error = xfs_trans_attr_finish_update(dac, done_item, &dac->leaf_bp,
> +					     attr->xattri_op_flags);
> +	if (error != -EAGAIN)
> +		kmem_free(attr);
> +
> +	return error;
> +}
> +
> +/* Abort all pending ATTRs. */
> +STATIC void
> +xfs_attr_abort_intent(
> +	struct xfs_log_item		*intent)
> +{
> +	xfs_attri_release(ATTRI_ITEM(intent));
> +}
> +
> +/* Cancel an attr */
> +STATIC void
> +xfs_attr_cancel_item(
> +	struct list_head		*item)
> +{
> +	struct xfs_attr_item		*attr;
> +
> +	attr = container_of(item, struct xfs_attr_item, xattri_list);
> +	kmem_free(attr);
> +}
> +
>  STATIC xfs_lsn_t
>  xfs_attri_item_committed(
>  	struct xfs_log_item		*lip,
> @@ -313,6 +490,30 @@ xfs_attri_item_match(
>  	return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id;
>  }
>  
> +/*
> + * This routine is called to allocate an "attr free done" log item.
> + */
> +struct xfs_attrd_log_item *
> +xfs_trans_get_attrd(struct xfs_trans		*tp,
> +		  struct xfs_attri_log_item	*attrip)
> +{
> +	struct xfs_attrd_log_item		*attrdp;
> +	uint					size;
> +
> +	ASSERT(tp != NULL);
> +
> +	size = sizeof(struct xfs_attrd_log_item);
> +	attrdp = kmem_zalloc(size, 0);
> +
> +	xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
> +			  &xfs_attrd_item_ops);
> +	attrdp->attrd_attrip = attrip;
> +	attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
> +
> +	xfs_trans_add_item(tp, &attrdp->attrd_item);
> +	return attrdp;
> +}
> +
>  static const struct xfs_item_ops xfs_attrd_item_ops = {
>  	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
>  	.iop_size	= xfs_attrd_item_size,
> @@ -320,6 +521,29 @@ static const struct xfs_item_ops xfs_attrd_item_ops = {
>  	.iop_release    = xfs_attrd_item_release,
>  };
>  
> +
> +/* Get an ATTRD so we can process all the attrs. */
> +static struct xfs_log_item *
> +xfs_attr_create_done(
> +	struct xfs_trans		*tp,
> +	struct xfs_log_item		*intent,
> +	unsigned int			count)
> +{
> +	if (!intent)
> +		return NULL;
> +
> +	return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
> +}
> +
> +const struct xfs_defer_op_type xfs_attr_defer_type = {
> +	.max_items	= 1,
> +	.create_intent	= xfs_attr_create_intent,
> +	.abort_intent	= xfs_attr_abort_intent,
> +	.create_done	= xfs_attr_create_done,
> +	.finish_item	= xfs_attr_finish_item,
> +	.cancel_item	= xfs_attr_cancel_item,
> +};
> +
>  /* Is this recovered ATTRI ok? */
>  static inline bool
>  xfs_attri_validate(
> @@ -346,13 +570,183 @@ xfs_attri_validate(
>  	return xfs_hasdelattr(mp);
>  }
>  
> +/*
> + * Process an attr intent item that was recovered from the log.  We need to
> + * delete the attr that it describes.
> + */
> +STATIC int
> +xfs_attri_item_recover(
> +	struct xfs_log_item		*lip,
> +	struct list_head		*capture_list)
> +{
> +	struct xfs_attri_log_item	*attrip = ATTRI_ITEM(lip);
> +	struct xfs_attr_item		*new_attr;
> +	struct xfs_mount		*mp = lip->li_mountp;
> +	struct xfs_inode		*ip;
> +	struct xfs_da_args		args;
> +	struct xfs_da_args		*new_args;
> +	struct xfs_trans_res		tres;
> +	bool				rsvd;
> +	struct xfs_attri_log_format	*attrp;
> +	int				error;
> +	int				total;
> +	int				local;
> +	struct xfs_attrd_log_item	*done_item = NULL;
> +	struct xfs_attr_item		attr = {
> +		.xattri_op_flags	= attrip->attri_format.alfi_op_flags,
> +		.xattri_dac.da_args	= &args,
> +	};
> +
> +	/*
> +	 * First check the validity of the attr described by the ATTRI.  If any
> +	 * are bad, then assume that all are bad and just toss the ATTRI.
> +	 */
> +	attrp = &attrip->attri_format;
> +	if (!xfs_attri_validate(mp, attrip))
> +		return -EFSCORRUPTED;
> +

Everything from here...

> +	error = xfs_iget(mp, 0, attrp->alfi_ino, 0, 0, &ip);
> +	if (error)
> +		return error;
> +
> +	if (VFS_I(ip)->i_nlink == 0)
> +		xfs_iflags_set(ip, XFS_IRECOVERY);

...to here should be a call to xlog_recover_iget(), which is in this
patch:

https://djwong.org/docs/kernel/171-xfs-log-recovery-refactor-iget.patch

> +
> +	memset(&args, 0, sizeof(struct xfs_da_args));
> +	args.dp = ip;
> +	args.geo = mp->m_attr_geo;
> +	args.op_flags = attrp->alfi_op_flags;
> +	args.whichfork = XFS_ATTR_FORK;
> +	args.name = attrip->attri_name;
> +	args.namelen = attrp->alfi_name_len;
> +	args.hashval = xfs_da_hashname(args.name, args.namelen);
> +	args.attr_filter = attrp->alfi_attr_flags;
> +
> +	if (attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET) {
> +		args.value = attrip->attri_value;
> +		args.valuelen = attrp->alfi_value_len;
> +		args.total = xfs_attr_calc_size(&args, &local);
> +
> +		tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
> +				 M_RES(mp)->tr_attrsetrt.tr_logres *
> +					args.total;
> +		tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
> +		tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
> +		total = args.total;
> +	} else {
> +		tres = M_RES(mp)->tr_attrrm;
> +		total = XFS_ATTRRM_SPACE_RES(mp);
> +	}
> +	error = xfs_trans_alloc(mp, &tres, total, 0,
> +				rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
> +	if (error)
> +		return error;
> +
> +	done_item = xfs_trans_get_attrd(args.trans, attrip);
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	xfs_trans_ijoin(args.trans, ip, 0);
> +
> +	error = xfs_trans_attr_finish_update(&attr.xattri_dac, done_item,
> +					     &attr.xattri_dac.leaf_bp,
> +					     attrp->alfi_op_flags);
> +	if (error == -EAGAIN) {
> +		/*
> +		 * There's more work to do, so make a new xfs_attr_item and add
> +		 * it to this transaction.  We don't use xfs_attr_item_init here
> +		 * because we need the info stored in the current attr to
> +		 * continue with this multi-part operation.  So, alloc space
> +		 * for it and the args and copy everything there.
> +		 */
> +		new_attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
> +				       sizeof(struct xfs_da_args), KM_NOFS);
> +		new_args = (struct xfs_da_args *)((char *)new_attr +
> +			   sizeof(struct xfs_attr_item));
> +
> +		memcpy(new_args, &args, sizeof(struct xfs_da_args));
> +		memcpy(new_attr, &attr, sizeof(struct xfs_attr_item));
> +
> +		new_attr->xattri_dac.da_args = new_args;
> +		memset(&new_attr->xattri_list, 0, sizeof(struct list_head));
> +
> +		xfs_defer_add(args.trans, XFS_DEFER_OPS_TYPE_ATTR,
> +			      &new_attr->xattri_list);

Rather than allocating a huge struct xfs_attr_item (aka the incore state
tracking mechanism) on the stack, filling it out, and copying it if
there's more work to do, why not kmem_zalloc it from the start, and
either attach it to the transaction if there's more work to do, or free
it if (unlikely) we actually finished the xattr update?

> +
> +		/* Do not send -EAGAIN back to caller */
> +		error = 0;
> +	} else if (error) {
> +		xfs_trans_cancel(args.trans);
> +		goto out;
> +	}
> +
> +	xfs_defer_ops_capture_and_commit(args.trans, ip, capture_list);

No error checking here?

--D

> +
> +out:
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	xfs_irele(ip);
> +	return error;
> +}
> +
> +/* Re-log an intent item to push the log tail forward. */
> +static struct xfs_log_item *
> +xfs_attri_item_relog(
> +	struct xfs_log_item		*intent,
> +	struct xfs_trans		*tp)
> +{
> +	struct xfs_attrd_log_item	*attrdp;
> +	struct xfs_attri_log_item	*old_attrip;
> +	struct xfs_attri_log_item	*new_attrip;
> +	struct xfs_attri_log_format	*new_attrp;
> +	struct xfs_attri_log_format	*old_attrp;
> +	int				buffer_size;
> +
> +	old_attrip = ATTRI_ITEM(intent);
> +	old_attrp = &old_attrip->attri_format;
> +	buffer_size = old_attrp->alfi_value_len + old_attrp->alfi_name_len;
> +
> +	tp->t_flags |= XFS_TRANS_DIRTY;
> +	attrdp = xfs_trans_get_attrd(tp, old_attrip);
> +	set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
> +
> +	new_attrip = xfs_attri_init(tp->t_mountp, buffer_size);
> +	new_attrp = &new_attrip->attri_format;
> +
> +	new_attrp->alfi_ino = old_attrp->alfi_ino;
> +	new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
> +	new_attrp->alfi_value_len = old_attrp->alfi_value_len;
> +	new_attrp->alfi_name_len = old_attrp->alfi_name_len;
> +	new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
> +
> +	new_attrip->attri_name_len = old_attrip->attri_name_len;
> +	new_attrip->attri_name = ((char *)new_attrip) +
> +				 sizeof(struct xfs_attri_log_item);
> +	memcpy(new_attrip->attri_name, old_attrip->attri_name,
> +		new_attrip->attri_name_len);
> +
> +	new_attrip->attri_value_len = old_attrip->attri_value_len;
> +	if (new_attrip->attri_value_len > 0) {
> +		new_attrip->attri_value = new_attrip->attri_name +
> +					  new_attrip->attri_name_len;
> +
> +		memcpy(new_attrip->attri_value, old_attrip->attri_value,
> +		       new_attrip->attri_value_len);
> +	}
> +
> +	xfs_trans_add_item(tp, &new_attrip->attri_item);
> +	set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
> +
> +	return &new_attrip->attri_item;
> +}
> +
>  static const struct xfs_item_ops xfs_attri_item_ops = {
>  	.iop_size	= xfs_attri_item_size,
>  	.iop_format	= xfs_attri_item_format,
>  	.iop_unpin	= xfs_attri_item_unpin,
>  	.iop_committed	= xfs_attri_item_committed,
>  	.iop_release    = xfs_attri_item_release,
> +	.iop_recover	= xfs_attri_item_recover,
>  	.iop_match	= xfs_attri_item_match,
> +	.iop_relog	= xfs_attri_item_relog,
>  };
>  
>  
> -- 
> 2.7.4
>
Allison Henderson July 13, 2021, 6:52 p.m. UTC | #2
On 7/9/21 6:08 PM, Darrick J. Wong wrote:
> On Wed, Jul 07, 2021 at 03:21:04PM -0700, Allison Henderson wrote:
>> This patch adds the needed routines to create, log and replay attr
>> intents
> 
> I might call that last part "...and recover logged extended attribute
> intents."
Sure, will update the commit message here

> 
>>
>> Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
>> ---
>>   fs/xfs/libxfs/xfs_defer.c  |   1 +
>>   fs/xfs/libxfs/xfs_defer.h  |   1 +
>>   fs/xfs/libxfs/xfs_format.h |   4 +-
>>   fs/xfs/xfs_attr_item.c     | 394 +++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 399 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
>> index eff4a12..e9caff7 100644
>> --- a/fs/xfs/libxfs/xfs_defer.c
>> +++ b/fs/xfs/libxfs/xfs_defer.c
>> @@ -178,6 +178,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
>>   	[XFS_DEFER_OPS_TYPE_RMAP]	= &xfs_rmap_update_defer_type,
>>   	[XFS_DEFER_OPS_TYPE_FREE]	= &xfs_extent_free_defer_type,
>>   	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
>> +	[XFS_DEFER_OPS_TYPE_ATTR]	= &xfs_attr_defer_type,
>>   };
>>   
>>   static void
>> diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
>> index 0ed9dfa..72a5789 100644
>> --- a/fs/xfs/libxfs/xfs_defer.h
>> +++ b/fs/xfs/libxfs/xfs_defer.h
>> @@ -19,6 +19,7 @@ enum xfs_defer_ops_type {
>>   	XFS_DEFER_OPS_TYPE_RMAP,
>>   	XFS_DEFER_OPS_TYPE_FREE,
>>   	XFS_DEFER_OPS_TYPE_AGFL_FREE,
>> +	XFS_DEFER_OPS_TYPE_ATTR,
>>   	XFS_DEFER_OPS_TYPE_MAX,
>>   };
>>   
>> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
>> index 3a4da111..477e815 100644
>> --- a/fs/xfs/libxfs/xfs_format.h
>> +++ b/fs/xfs/libxfs/xfs_format.h
>> @@ -485,7 +485,9 @@ xfs_sb_has_incompat_feature(
>>   	return (sbp->sb_features_incompat & feature) != 0;
>>   }
>>   
>> -#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
>> +#define XFS_SB_FEAT_INCOMPAT_LOG_DELATTR   (1 << 0)	/* Delayed Attributes */
>> +#define XFS_SB_FEAT_INCOMPAT_LOG_ALL \
>> +	(XFS_SB_FEAT_INCOMPAT_LOG_DELATTR)
>>   #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
>>   static inline bool
>>   xfs_sb_has_incompat_log_feature(
>> diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
>> index c9033e2..eda6ae3 100644
>> --- a/fs/xfs/xfs_attr_item.c
>> +++ b/fs/xfs/xfs_attr_item.c
>> @@ -282,6 +282,183 @@ xfs_attrd_item_release(
>>   	xfs_attrd_item_free(attrdp);
>>   }
>>   
>> +/*
>> + * Performs one step of an attribute update intent and marks the attrd item
>> + * dirty..  An attr operation may be a set or a remove.  Note that the
>> + * transaction is marked dirty regardless of whether the operation succeeds or
>> + * fails to support the ATTRI/ATTRD lifecycle rules.
>> + */
>> +int
>> +xfs_trans_attr_finish_update(
>> +	struct xfs_delattr_context	*dac,
>> +	struct xfs_attrd_log_item	*attrdp,
>> +	struct xfs_buf			**leaf_bp,
>> +	uint32_t			op_flags)
>> +{
>> +	struct xfs_da_args		*args = dac->da_args;
>> +	int				error;
>> +
>> +	error = xfs_qm_dqattach_locked(args->dp, 0);
> 
> Hmm, this function is called at runtime and recovery time.  It's
> /really/ late to be attaching dquots.
> 
> OH, this is for the recovery case, isn't it?
> 
> Hmm, xfs_attri_item_recover really ought to share the same
> iget/dqattach/set-irecovery thing that bmap and swapext use.  I should
> really post that cleanup as a patch.

Sure, I'll put it on my todo was well, that one I think can happen 
independantly of this set.

> 
>> +	if (error)
>> +		return error;
>> +
>> +	switch (op_flags) {
> 
> op_flags ought to be masked with XFS_ATTR_OP_FLAGS_TYPE_MASK.
Will fix

> 
>> +	case XFS_ATTR_OP_FLAGS_SET:
>> +		args->op_flags |= XFS_DA_OP_ADDNAME;
>> +		error = xfs_attr_set_iter(dac, leaf_bp);
>> +		break;
>> +	case XFS_ATTR_OP_FLAGS_REMOVE:
>> +		ASSERT(XFS_IFORK_Q(args->dp));
>> +		error = xfs_attr_remove_iter(dac);
>> +		break;
>> +	default:
>> +		error = -EFSCORRUPTED;
>> +		break;
>> +	}
>> +
>> +	/*
>> +	 * Mark the transaction dirty, even on error. This ensures the
>> +	 * transaction is aborted, which:
>> +	 *
>> +	 * 1.) releases the ATTRI and frees the ATTRD
>> +	 * 2.) shuts down the filesystem
>> +	 */
>> +	args->trans->t_flags |= XFS_TRANS_DIRTY;
>> +
>> +	/*
>> +	 * attr intent/done items are null when delayed attributes are disabled
>> +	 */
>> +	if (attrdp)
>> +		set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
>> +
>> +	return error;
>> +}
>> +
>> +/* Log an attr to the intent item. */
>> +STATIC void
>> +xfs_attr_log_item(
>> +	struct xfs_trans		*tp,
>> +	struct xfs_attri_log_item	*attrip,
>> +	struct xfs_attr_item		*attr)
>> +{
>> +	struct xfs_attri_log_format	*attrp;
>> +
>> +	tp->t_flags |= XFS_TRANS_DIRTY;
>> +	set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
>> +
>> +	/*
>> +	 * At this point the xfs_attr_item has been constructed, and we've
>> +	 * created the log intent. Fill in the attri log item and log format
>> +	 * structure with fields from this xfs_attr_item
>> +	 */
>> +	attrp = &attrip->attri_format;
>> +	attrp->alfi_ino = attr->xattri_dac.da_args->dp->i_ino;
>> +	attrp->alfi_op_flags = attr->xattri_op_flags;
>> +	attrp->alfi_value_len = attr->xattri_dac.da_args->valuelen;
>> +	attrp->alfi_name_len = attr->xattri_dac.da_args->namelen;
>> +	attrp->alfi_attr_flags = attr->xattri_dac.da_args->attr_filter;
>> +
>> +	attrip->attri_name = (void *)attr->xattri_dac.da_args->name;
>> +	attrip->attri_value = attr->xattri_dac.da_args->value;
>> +	attrip->attri_name_len = attr->xattri_dac.da_args->namelen;
>> +	attrip->attri_value_len = attr->xattri_dac.da_args->valuelen;
>> +}
>> +
>> +/* Get an ATTRI. */
>> +static struct xfs_log_item *
>> +xfs_attr_create_intent(
>> +	struct xfs_trans		*tp,
>> +	struct list_head		*items,
>> +	unsigned int			count,
>> +	bool				sort)
>> +{
>> +	struct xfs_mount		*mp = tp->t_mountp;
>> +	struct xfs_attri_log_item	*attrip;
>> +	struct xfs_attr_item		*attr;
>> +
>> +	ASSERT(count == 1);
>> +
>> +	if (!xfs_hasdelattr(mp))
>> +		return NULL;
>> +
>> +	xfs_sb_add_incompat_log_features(&mp->m_sb,
>> +					 XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);
> 
> Um, I think I see a bug here--
> 
> I don't see anything in this series that calls xfs_add_incompat_log_feature.
> 
> Adding a log incompat feature on a filesystem is a tricky dance, because
> one has to write the primary superblock (which means logging the sb
> change, forcing the log, and pushing the AIL) /before/ logging the intent.
> 
> All this is to prevent an old kernel from tripping over a logged item
> that it doesn't understand, because an earlier recovered transaction
> contained the bit it needed to conclude that it shouldn't have tried to
> recover.
> 
> 
> /*
>   * Get permission to use log-assisted atomic exchange of file extents.
>   *
>   * Callers must not be running any transactions or hold any inode locks, and
>   * they must release the permission by calling xlog_drop_incompat_feat
>   * when they're done.
>   */
> int
> xfs_attr_use_log_assist(
> 	struct xfs_mount	*mp)
> {
> 	int			error = 0;
> 
> 	/*
> 	 * Protect ourselves from an idle log clearing the logged xattrs
> 	 * log incompat feature bit.
> 	 */
> 	xlog_use_incompat_feat(mp->m_log);
> 
> 	/*
> 	 * If log-assisted xattrs are already enabled, the caller can use the
> 	 * log assisted swap functions with the log-incompat reference we got.
> 	 */
> 	if (xfs_sb_version_hasdelattr(&mp->m_sb))
> 		return 0;
> 
> 	/* Enable log-assisted xattrs. */
> 	xfs_warn_once(mp,
>   "EXPERIMENTAL logged extended attributes feature added. Use at your own risk!");
> 	error = xfs_add_incompat_log_feature(mp,
> 			XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);
> 	if (error)
> 		goto drop_incompat;
> 
> 	return 0;
> drop_incompat:
> 	xlog_drop_incompat_feat(mp->m_log);
> 	return error;
> }
> 
> ...and then in xfs_attr_set, you need something like:
> 
> 	/* Signal the log that we want to use logged xattrs. */
> 	use_log_items = xfs_hasdelattr(mp);
> 	if (use_log_items) {
> 		error = xfs_attr_use_log_assist(mp);
> 		if (error)
> 			goto barf;
> 	}
> 
> 	error = xfs_trans_alloc(..., &tp);
> 
> 	if (valuelen > 0)
> 		error = xfs_attr_set_args(...);
> 	else
> 		error = xfs_attr_remove_args(...);
> 
> 	error = xfs_trans_commit(...);
> 
> 	/* Signal the log that we're done with using logged xattrs. */
> 	if (use_log_items)
> 		xlog_drop_incompat_feat(mp->m_log);
> 
> The xfs_add_incompat_log_feature function ensures that the log has been
> pushed out to disk and that the ondisk primary superblock has been
> written (manually!) with the new incompat feature set.  The
> xlog_use_incompat_feat prevents the log from clearing the feature bit
> until we're done using it.
> 
> Once xfs_attr_use_log_assist completes, we know that it is safe to begin
> using deferred xattrs, because there's no possible way that an old
> kernel can screw things up.
> 
Ah, ok.  I'll see if I can find a place to work that in.  Maybe patch 8 
is a good place to do this since that is where the xfs_attr_set 
modifications happen.


>> +
>> +	attrip = xfs_attri_init(mp, 0);
>> +	if (attrip == NULL)
>> +		return NULL;
>> +
>> +	xfs_trans_add_item(tp, &attrip->attri_item);
>> +	list_for_each_entry(attr, items, xattri_list)
>> +		xfs_attr_log_item(tp, attrip, attr);
>> +	return &attrip->attri_item;
>> +}
>> +
>> +/* Process an attr. */
>> +STATIC int
>> +xfs_attr_finish_item(
>> +	struct xfs_trans		*tp,
>> +	struct xfs_log_item		*done,
>> +	struct list_head		*item,
>> +	struct xfs_btree_cur		**state)
>> +{
>> +	struct xfs_attr_item		*attr;
>> +	struct xfs_attrd_log_item	*done_item = NULL;
>> +	int				error;
>> +	struct xfs_delattr_context	*dac;
>> +
>> +	attr = container_of(item, struct xfs_attr_item, xattri_list);
>> +	dac = &attr->xattri_dac;
>> +	if (done)
>> +		done_item = ATTRD_ITEM(done);
>> +
>> +	/*
>> +	 * Corner case that can happen during a recovery.  Because the first
>> +	 * iteration of a multi part delay op happens in xfs_attri_item_recover
>> +	 * to maintain the order of the log replay items.  But the new
>> +	 * transactions do not automatically rejoin during a recovery as they do
>> +	 * in a standard delay op, so we need to catch this here and rejoin the
>> +	 * leaf to the new transaction
>> +	 */
>> +	if (attr->xattri_dac.leaf_bp &&
>> +	    attr->xattri_dac.leaf_bp->b_transp != tp) {
>> +		xfs_trans_bjoin(tp, attr->xattri_dac.leaf_bp);
>> +		xfs_trans_bhold(tp, attr->xattri_dac.leaf_bp);
>> +	}
>> +
>> +	/*
>> +	 * Always reset trans after EAGAIN cycle
>> +	 * since the transaction is new
>> +	 */
>> +	dac->da_args->trans = tp;
>> +
>> +	error = xfs_trans_attr_finish_update(dac, done_item, &dac->leaf_bp,
>> +					     attr->xattri_op_flags);
>> +	if (error != -EAGAIN)
>> +		kmem_free(attr);
>> +
>> +	return error;
>> +}
>> +
>> +/* Abort all pending ATTRs. */
>> +STATIC void
>> +xfs_attr_abort_intent(
>> +	struct xfs_log_item		*intent)
>> +{
>> +	xfs_attri_release(ATTRI_ITEM(intent));
>> +}
>> +
>> +/* Cancel an attr */
>> +STATIC void
>> +xfs_attr_cancel_item(
>> +	struct list_head		*item)
>> +{
>> +	struct xfs_attr_item		*attr;
>> +
>> +	attr = container_of(item, struct xfs_attr_item, xattri_list);
>> +	kmem_free(attr);
>> +}
>> +
>>   STATIC xfs_lsn_t
>>   xfs_attri_item_committed(
>>   	struct xfs_log_item		*lip,
>> @@ -313,6 +490,30 @@ xfs_attri_item_match(
>>   	return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id;
>>   }
>>   
>> +/*
>> + * This routine is called to allocate an "attr free done" log item.
>> + */
>> +struct xfs_attrd_log_item *
>> +xfs_trans_get_attrd(struct xfs_trans		*tp,
>> +		  struct xfs_attri_log_item	*attrip)
>> +{
>> +	struct xfs_attrd_log_item		*attrdp;
>> +	uint					size;
>> +
>> +	ASSERT(tp != NULL);
>> +
>> +	size = sizeof(struct xfs_attrd_log_item);
>> +	attrdp = kmem_zalloc(size, 0);
>> +
>> +	xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
>> +			  &xfs_attrd_item_ops);
>> +	attrdp->attrd_attrip = attrip;
>> +	attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
>> +
>> +	xfs_trans_add_item(tp, &attrdp->attrd_item);
>> +	return attrdp;
>> +}
>> +
>>   static const struct xfs_item_ops xfs_attrd_item_ops = {
>>   	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
>>   	.iop_size	= xfs_attrd_item_size,
>> @@ -320,6 +521,29 @@ static const struct xfs_item_ops xfs_attrd_item_ops = {
>>   	.iop_release    = xfs_attrd_item_release,
>>   };
>>   
>> +
>> +/* Get an ATTRD so we can process all the attrs. */
>> +static struct xfs_log_item *
>> +xfs_attr_create_done(
>> +	struct xfs_trans		*tp,
>> +	struct xfs_log_item		*intent,
>> +	unsigned int			count)
>> +{
>> +	if (!intent)
>> +		return NULL;
>> +
>> +	return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
>> +}
>> +
>> +const struct xfs_defer_op_type xfs_attr_defer_type = {
>> +	.max_items	= 1,
>> +	.create_intent	= xfs_attr_create_intent,
>> +	.abort_intent	= xfs_attr_abort_intent,
>> +	.create_done	= xfs_attr_create_done,
>> +	.finish_item	= xfs_attr_finish_item,
>> +	.cancel_item	= xfs_attr_cancel_item,
>> +};
>> +
>>   /* Is this recovered ATTRI ok? */
>>   static inline bool
>>   xfs_attri_validate(
>> @@ -346,13 +570,183 @@ xfs_attri_validate(
>>   	return xfs_hasdelattr(mp);
>>   }
>>   
>> +/*
>> + * Process an attr intent item that was recovered from the log.  We need to
>> + * delete the attr that it describes.
>> + */
>> +STATIC int
>> +xfs_attri_item_recover(
>> +	struct xfs_log_item		*lip,
>> +	struct list_head		*capture_list)
>> +{
>> +	struct xfs_attri_log_item	*attrip = ATTRI_ITEM(lip);
>> +	struct xfs_attr_item		*new_attr;
>> +	struct xfs_mount		*mp = lip->li_mountp;
>> +	struct xfs_inode		*ip;
>> +	struct xfs_da_args		args;
>> +	struct xfs_da_args		*new_args;
>> +	struct xfs_trans_res		tres;
>> +	bool				rsvd;
>> +	struct xfs_attri_log_format	*attrp;
>> +	int				error;
>> +	int				total;
>> +	int				local;
>> +	struct xfs_attrd_log_item	*done_item = NULL;
>> +	struct xfs_attr_item		attr = {
>> +		.xattri_op_flags	= attrip->attri_format.alfi_op_flags,
>> +		.xattri_dac.da_args	= &args,
>> +	};
>> +
>> +	/*
>> +	 * First check the validity of the attr described by the ATTRI.  If any
>> +	 * are bad, then assume that all are bad and just toss the ATTRI.
>> +	 */
>> +	attrp = &attrip->attri_format;
>> +	if (!xfs_attri_validate(mp, attrip))
>> +		return -EFSCORRUPTED;
>> +
> 
> Everything from here...
> 
>> +	error = xfs_iget(mp, 0, attrp->alfi_ino, 0, 0, &ip);
>> +	if (error)
>> +		return error;
>> +
>> +	if (VFS_I(ip)->i_nlink == 0)
>> +		xfs_iflags_set(ip, XFS_IRECOVERY);
> 
> ...to here should be a call to xlog_recover_iget(), which is in this
> patch:
> 
> https://djwong.org/docs/kernel/171-xfs-log-recovery-refactor-iget.patch
Alrighty, I will pick up that patch as well and add it to the set

> 
>> +
>> +	memset(&args, 0, sizeof(struct xfs_da_args));
>> +	args.dp = ip;
>> +	args.geo = mp->m_attr_geo;
>> +	args.op_flags = attrp->alfi_op_flags;
>> +	args.whichfork = XFS_ATTR_FORK;
>> +	args.name = attrip->attri_name;
>> +	args.namelen = attrp->alfi_name_len;
>> +	args.hashval = xfs_da_hashname(args.name, args.namelen);
>> +	args.attr_filter = attrp->alfi_attr_flags;
>> +
>> +	if (attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET) {
>> +		args.value = attrip->attri_value;
>> +		args.valuelen = attrp->alfi_value_len;
>> +		args.total = xfs_attr_calc_size(&args, &local);
>> +
>> +		tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
>> +				 M_RES(mp)->tr_attrsetrt.tr_logres *
>> +					args.total;
>> +		tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
>> +		tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
>> +		total = args.total;
>> +	} else {
>> +		tres = M_RES(mp)->tr_attrrm;
>> +		total = XFS_ATTRRM_SPACE_RES(mp);
>> +	}
>> +	error = xfs_trans_alloc(mp, &tres, total, 0,
>> +				rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
>> +	if (error)
>> +		return error;
>> +
>> +	done_item = xfs_trans_get_attrd(args.trans, attrip);
>> +
>> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
>> +	xfs_trans_ijoin(args.trans, ip, 0);
>> +
>> +	error = xfs_trans_attr_finish_update(&attr.xattri_dac, done_item,
>> +					     &attr.xattri_dac.leaf_bp,
>> +					     attrp->alfi_op_flags);
>> +	if (error == -EAGAIN) {
>> +		/*
>> +		 * There's more work to do, so make a new xfs_attr_item and add
>> +		 * it to this transaction.  We don't use xfs_attr_item_init here
>> +		 * because we need the info stored in the current attr to
>> +		 * continue with this multi-part operation.  So, alloc space
>> +		 * for it and the args and copy everything there.
>> +		 */
>> +		new_attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
>> +				       sizeof(struct xfs_da_args), KM_NOFS);
>> +		new_args = (struct xfs_da_args *)((char *)new_attr +
>> +			   sizeof(struct xfs_attr_item));
>> +
>> +		memcpy(new_args, &args, sizeof(struct xfs_da_args));
>> +		memcpy(new_attr, &attr, sizeof(struct xfs_attr_item));
>> +
>> +		new_attr->xattri_dac.da_args = new_args;
>> +		memset(&new_attr->xattri_list, 0, sizeof(struct list_head));
>> +
>> +		xfs_defer_add(args.trans, XFS_DEFER_OPS_TYPE_ATTR,
>> +			      &new_attr->xattri_list);
> 
> Rather than allocating a huge struct xfs_attr_item (aka the incore state
> tracking mechanism) on the stack, filling it out, and copying it if
> there's more work to do, why not kmem_zalloc it from the start, and
> either attach it to the transaction if there's more work to do, or free
> it if (unlikely) we actually finished the xattr update?
That should work too.  I guess I just assumed people would want to see 
alloc/free minimized if it can be, but most likley there will be some 
back and forth with most attr operations.  will update.

> 
>> +
>> +		/* Do not send -EAGAIN back to caller */
>> +		error = 0;
>> +	} else if (error) {
>> +		xfs_trans_cancel(args.trans);
>> +		goto out;
>> +	}
>> +
>> +	xfs_defer_ops_capture_and_commit(args.trans, ip, capture_list);
> 
> No error checking here?
will fix.

Thanks for the reviews!
Allison

> 
> --D
> 
>> +
>> +out:
>> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
>> +	xfs_irele(ip);
>> +	return error;
>> +}
>> +
>> +/* Re-log an intent item to push the log tail forward. */
>> +static struct xfs_log_item *
>> +xfs_attri_item_relog(
>> +	struct xfs_log_item		*intent,
>> +	struct xfs_trans		*tp)
>> +{
>> +	struct xfs_attrd_log_item	*attrdp;
>> +	struct xfs_attri_log_item	*old_attrip;
>> +	struct xfs_attri_log_item	*new_attrip;
>> +	struct xfs_attri_log_format	*new_attrp;
>> +	struct xfs_attri_log_format	*old_attrp;
>> +	int				buffer_size;
>> +
>> +	old_attrip = ATTRI_ITEM(intent);
>> +	old_attrp = &old_attrip->attri_format;
>> +	buffer_size = old_attrp->alfi_value_len + old_attrp->alfi_name_len;
>> +
>> +	tp->t_flags |= XFS_TRANS_DIRTY;
>> +	attrdp = xfs_trans_get_attrd(tp, old_attrip);
>> +	set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
>> +
>> +	new_attrip = xfs_attri_init(tp->t_mountp, buffer_size);
>> +	new_attrp = &new_attrip->attri_format;
>> +
>> +	new_attrp->alfi_ino = old_attrp->alfi_ino;
>> +	new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
>> +	new_attrp->alfi_value_len = old_attrp->alfi_value_len;
>> +	new_attrp->alfi_name_len = old_attrp->alfi_name_len;
>> +	new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
>> +
>> +	new_attrip->attri_name_len = old_attrip->attri_name_len;
>> +	new_attrip->attri_name = ((char *)new_attrip) +
>> +				 sizeof(struct xfs_attri_log_item);
>> +	memcpy(new_attrip->attri_name, old_attrip->attri_name,
>> +		new_attrip->attri_name_len);
>> +
>> +	new_attrip->attri_value_len = old_attrip->attri_value_len;
>> +	if (new_attrip->attri_value_len > 0) {
>> +		new_attrip->attri_value = new_attrip->attri_name +
>> +					  new_attrip->attri_name_len;
>> +
>> +		memcpy(new_attrip->attri_value, old_attrip->attri_value,
>> +		       new_attrip->attri_value_len);
>> +	}
>> +
>> +	xfs_trans_add_item(tp, &new_attrip->attri_item);
>> +	set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
>> +
>> +	return &new_attrip->attri_item;
>> +}
>> +
>>   static const struct xfs_item_ops xfs_attri_item_ops = {
>>   	.iop_size	= xfs_attri_item_size,
>>   	.iop_format	= xfs_attri_item_format,
>>   	.iop_unpin	= xfs_attri_item_unpin,
>>   	.iop_committed	= xfs_attri_item_committed,
>>   	.iop_release    = xfs_attri_item_release,
>> +	.iop_recover	= xfs_attri_item_recover,
>>   	.iop_match	= xfs_attri_item_match,
>> +	.iop_relog	= xfs_attri_item_relog,
>>   };
>>   
>>   
>> -- 
>> 2.7.4
>>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index eff4a12..e9caff7 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -178,6 +178,7 @@  static const struct xfs_defer_op_type *defer_op_types[] = {
 	[XFS_DEFER_OPS_TYPE_RMAP]	= &xfs_rmap_update_defer_type,
 	[XFS_DEFER_OPS_TYPE_FREE]	= &xfs_extent_free_defer_type,
 	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
+	[XFS_DEFER_OPS_TYPE_ATTR]	= &xfs_attr_defer_type,
 };
 
 static void
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 0ed9dfa..72a5789 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -19,6 +19,7 @@  enum xfs_defer_ops_type {
 	XFS_DEFER_OPS_TYPE_RMAP,
 	XFS_DEFER_OPS_TYPE_FREE,
 	XFS_DEFER_OPS_TYPE_AGFL_FREE,
+	XFS_DEFER_OPS_TYPE_ATTR,
 	XFS_DEFER_OPS_TYPE_MAX,
 };
 
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 3a4da111..477e815 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -485,7 +485,9 @@  xfs_sb_has_incompat_feature(
 	return (sbp->sb_features_incompat & feature) != 0;
 }
 
-#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
+#define XFS_SB_FEAT_INCOMPAT_LOG_DELATTR   (1 << 0)	/* Delayed Attributes */
+#define XFS_SB_FEAT_INCOMPAT_LOG_ALL \
+	(XFS_SB_FEAT_INCOMPAT_LOG_DELATTR)
 #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
 static inline bool
 xfs_sb_has_incompat_log_feature(
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index c9033e2..eda6ae3 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -282,6 +282,183 @@  xfs_attrd_item_release(
 	xfs_attrd_item_free(attrdp);
 }
 
+/*
+ * Performs one step of an attribute update intent and marks the attrd item
+ * dirty..  An attr operation may be a set or a remove.  Note that the
+ * transaction is marked dirty regardless of whether the operation succeeds or
+ * fails to support the ATTRI/ATTRD lifecycle rules.
+ */
+int
+xfs_trans_attr_finish_update(
+	struct xfs_delattr_context	*dac,
+	struct xfs_attrd_log_item	*attrdp,
+	struct xfs_buf			**leaf_bp,
+	uint32_t			op_flags)
+{
+	struct xfs_da_args		*args = dac->da_args;
+	int				error;
+
+	error = xfs_qm_dqattach_locked(args->dp, 0);
+	if (error)
+		return error;
+
+	switch (op_flags) {
+	case XFS_ATTR_OP_FLAGS_SET:
+		args->op_flags |= XFS_DA_OP_ADDNAME;
+		error = xfs_attr_set_iter(dac, leaf_bp);
+		break;
+	case XFS_ATTR_OP_FLAGS_REMOVE:
+		ASSERT(XFS_IFORK_Q(args->dp));
+		error = xfs_attr_remove_iter(dac);
+		break;
+	default:
+		error = -EFSCORRUPTED;
+		break;
+	}
+
+	/*
+	 * Mark the transaction dirty, even on error. This ensures the
+	 * transaction is aborted, which:
+	 *
+	 * 1.) releases the ATTRI and frees the ATTRD
+	 * 2.) shuts down the filesystem
+	 */
+	args->trans->t_flags |= XFS_TRANS_DIRTY;
+
+	/*
+	 * attr intent/done items are null when delayed attributes are disabled
+	 */
+	if (attrdp)
+		set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+	return error;
+}
+
+/* Log an attr to the intent item. */
+STATIC void
+xfs_attr_log_item(
+	struct xfs_trans		*tp,
+	struct xfs_attri_log_item	*attrip,
+	struct xfs_attr_item		*attr)
+{
+	struct xfs_attri_log_format	*attrp;
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
+
+	/*
+	 * At this point the xfs_attr_item has been constructed, and we've
+	 * created the log intent. Fill in the attri log item and log format
+	 * structure with fields from this xfs_attr_item
+	 */
+	attrp = &attrip->attri_format;
+	attrp->alfi_ino = attr->xattri_dac.da_args->dp->i_ino;
+	attrp->alfi_op_flags = attr->xattri_op_flags;
+	attrp->alfi_value_len = attr->xattri_dac.da_args->valuelen;
+	attrp->alfi_name_len = attr->xattri_dac.da_args->namelen;
+	attrp->alfi_attr_flags = attr->xattri_dac.da_args->attr_filter;
+
+	attrip->attri_name = (void *)attr->xattri_dac.da_args->name;
+	attrip->attri_value = attr->xattri_dac.da_args->value;
+	attrip->attri_name_len = attr->xattri_dac.da_args->namelen;
+	attrip->attri_value_len = attr->xattri_dac.da_args->valuelen;
+}
+
+/* Get an ATTRI. */
+static struct xfs_log_item *
+xfs_attr_create_intent(
+	struct xfs_trans		*tp,
+	struct list_head		*items,
+	unsigned int			count,
+	bool				sort)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_attri_log_item	*attrip;
+	struct xfs_attr_item		*attr;
+
+	ASSERT(count == 1);
+
+	if (!xfs_hasdelattr(mp))
+		return NULL;
+
+	xfs_sb_add_incompat_log_features(&mp->m_sb,
+					 XFS_SB_FEAT_INCOMPAT_LOG_DELATTR);
+
+	attrip = xfs_attri_init(mp, 0);
+	if (attrip == NULL)
+		return NULL;
+
+	xfs_trans_add_item(tp, &attrip->attri_item);
+	list_for_each_entry(attr, items, xattri_list)
+		xfs_attr_log_item(tp, attrip, attr);
+	return &attrip->attri_item;
+}
+
+/* Process an attr. */
+STATIC int
+xfs_attr_finish_item(
+	struct xfs_trans		*tp,
+	struct xfs_log_item		*done,
+	struct list_head		*item,
+	struct xfs_btree_cur		**state)
+{
+	struct xfs_attr_item		*attr;
+	struct xfs_attrd_log_item	*done_item = NULL;
+	int				error;
+	struct xfs_delattr_context	*dac;
+
+	attr = container_of(item, struct xfs_attr_item, xattri_list);
+	dac = &attr->xattri_dac;
+	if (done)
+		done_item = ATTRD_ITEM(done);
+
+	/*
+	 * Corner case that can happen during a recovery.  Because the first
+	 * iteration of a multi part delay op happens in xfs_attri_item_recover
+	 * to maintain the order of the log replay items.  But the new
+	 * transactions do not automatically rejoin during a recovery as they do
+	 * in a standard delay op, so we need to catch this here and rejoin the
+	 * leaf to the new transaction
+	 */
+	if (attr->xattri_dac.leaf_bp &&
+	    attr->xattri_dac.leaf_bp->b_transp != tp) {
+		xfs_trans_bjoin(tp, attr->xattri_dac.leaf_bp);
+		xfs_trans_bhold(tp, attr->xattri_dac.leaf_bp);
+	}
+
+	/*
+	 * Always reset trans after EAGAIN cycle
+	 * since the transaction is new
+	 */
+	dac->da_args->trans = tp;
+
+	error = xfs_trans_attr_finish_update(dac, done_item, &dac->leaf_bp,
+					     attr->xattri_op_flags);
+	if (error != -EAGAIN)
+		kmem_free(attr);
+
+	return error;
+}
+
+/* Abort all pending ATTRs. */
+STATIC void
+xfs_attr_abort_intent(
+	struct xfs_log_item		*intent)
+{
+	xfs_attri_release(ATTRI_ITEM(intent));
+}
+
+/* Cancel an attr */
+STATIC void
+xfs_attr_cancel_item(
+	struct list_head		*item)
+{
+	struct xfs_attr_item		*attr;
+
+	attr = container_of(item, struct xfs_attr_item, xattri_list);
+	kmem_free(attr);
+}
+
 STATIC xfs_lsn_t
 xfs_attri_item_committed(
 	struct xfs_log_item		*lip,
@@ -313,6 +490,30 @@  xfs_attri_item_match(
 	return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id;
 }
 
+/*
+ * This routine is called to allocate an "attr free done" log item.
+ */
+struct xfs_attrd_log_item *
+xfs_trans_get_attrd(struct xfs_trans		*tp,
+		  struct xfs_attri_log_item	*attrip)
+{
+	struct xfs_attrd_log_item		*attrdp;
+	uint					size;
+
+	ASSERT(tp != NULL);
+
+	size = sizeof(struct xfs_attrd_log_item);
+	attrdp = kmem_zalloc(size, 0);
+
+	xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
+			  &xfs_attrd_item_ops);
+	attrdp->attrd_attrip = attrip;
+	attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
+
+	xfs_trans_add_item(tp, &attrdp->attrd_item);
+	return attrdp;
+}
+
 static const struct xfs_item_ops xfs_attrd_item_ops = {
 	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED,
 	.iop_size	= xfs_attrd_item_size,
@@ -320,6 +521,29 @@  static const struct xfs_item_ops xfs_attrd_item_ops = {
 	.iop_release    = xfs_attrd_item_release,
 };
 
+
+/* Get an ATTRD so we can process all the attrs. */
+static struct xfs_log_item *
+xfs_attr_create_done(
+	struct xfs_trans		*tp,
+	struct xfs_log_item		*intent,
+	unsigned int			count)
+{
+	if (!intent)
+		return NULL;
+
+	return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
+}
+
+const struct xfs_defer_op_type xfs_attr_defer_type = {
+	.max_items	= 1,
+	.create_intent	= xfs_attr_create_intent,
+	.abort_intent	= xfs_attr_abort_intent,
+	.create_done	= xfs_attr_create_done,
+	.finish_item	= xfs_attr_finish_item,
+	.cancel_item	= xfs_attr_cancel_item,
+};
+
 /* Is this recovered ATTRI ok? */
 static inline bool
 xfs_attri_validate(
@@ -346,13 +570,183 @@  xfs_attri_validate(
 	return xfs_hasdelattr(mp);
 }
 
+/*
+ * Process an attr intent item that was recovered from the log.  We need to
+ * delete the attr that it describes.
+ */
+STATIC int
+xfs_attri_item_recover(
+	struct xfs_log_item		*lip,
+	struct list_head		*capture_list)
+{
+	struct xfs_attri_log_item	*attrip = ATTRI_ITEM(lip);
+	struct xfs_attr_item		*new_attr;
+	struct xfs_mount		*mp = lip->li_mountp;
+	struct xfs_inode		*ip;
+	struct xfs_da_args		args;
+	struct xfs_da_args		*new_args;
+	struct xfs_trans_res		tres;
+	bool				rsvd;
+	struct xfs_attri_log_format	*attrp;
+	int				error;
+	int				total;
+	int				local;
+	struct xfs_attrd_log_item	*done_item = NULL;
+	struct xfs_attr_item		attr = {
+		.xattri_op_flags	= attrip->attri_format.alfi_op_flags,
+		.xattri_dac.da_args	= &args,
+	};
+
+	/*
+	 * First check the validity of the attr described by the ATTRI.  If any
+	 * are bad, then assume that all are bad and just toss the ATTRI.
+	 */
+	attrp = &attrip->attri_format;
+	if (!xfs_attri_validate(mp, attrip))
+		return -EFSCORRUPTED;
+
+	error = xfs_iget(mp, 0, attrp->alfi_ino, 0, 0, &ip);
+	if (error)
+		return error;
+
+	if (VFS_I(ip)->i_nlink == 0)
+		xfs_iflags_set(ip, XFS_IRECOVERY);
+
+	memset(&args, 0, sizeof(struct xfs_da_args));
+	args.dp = ip;
+	args.geo = mp->m_attr_geo;
+	args.op_flags = attrp->alfi_op_flags;
+	args.whichfork = XFS_ATTR_FORK;
+	args.name = attrip->attri_name;
+	args.namelen = attrp->alfi_name_len;
+	args.hashval = xfs_da_hashname(args.name, args.namelen);
+	args.attr_filter = attrp->alfi_attr_flags;
+
+	if (attrp->alfi_op_flags == XFS_ATTR_OP_FLAGS_SET) {
+		args.value = attrip->attri_value;
+		args.valuelen = attrp->alfi_value_len;
+		args.total = xfs_attr_calc_size(&args, &local);
+
+		tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+				 M_RES(mp)->tr_attrsetrt.tr_logres *
+					args.total;
+		tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+		tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+		total = args.total;
+	} else {
+		tres = M_RES(mp)->tr_attrrm;
+		total = XFS_ATTRRM_SPACE_RES(mp);
+	}
+	error = xfs_trans_alloc(mp, &tres, total, 0,
+				rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
+	if (error)
+		return error;
+
+	done_item = xfs_trans_get_attrd(args.trans, attrip);
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(args.trans, ip, 0);
+
+	error = xfs_trans_attr_finish_update(&attr.xattri_dac, done_item,
+					     &attr.xattri_dac.leaf_bp,
+					     attrp->alfi_op_flags);
+	if (error == -EAGAIN) {
+		/*
+		 * There's more work to do, so make a new xfs_attr_item and add
+		 * it to this transaction.  We don't use xfs_attr_item_init here
+		 * because we need the info stored in the current attr to
+		 * continue with this multi-part operation.  So, alloc space
+		 * for it and the args and copy everything there.
+		 */
+		new_attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
+				       sizeof(struct xfs_da_args), KM_NOFS);
+		new_args = (struct xfs_da_args *)((char *)new_attr +
+			   sizeof(struct xfs_attr_item));
+
+		memcpy(new_args, &args, sizeof(struct xfs_da_args));
+		memcpy(new_attr, &attr, sizeof(struct xfs_attr_item));
+
+		new_attr->xattri_dac.da_args = new_args;
+		memset(&new_attr->xattri_list, 0, sizeof(struct list_head));
+
+		xfs_defer_add(args.trans, XFS_DEFER_OPS_TYPE_ATTR,
+			      &new_attr->xattri_list);
+
+		/* Do not send -EAGAIN back to caller */
+		error = 0;
+	} else if (error) {
+		xfs_trans_cancel(args.trans);
+		goto out;
+	}
+
+	xfs_defer_ops_capture_and_commit(args.trans, ip, capture_list);
+
+out:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_irele(ip);
+	return error;
+}
+
+/* Re-log an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_attri_item_relog(
+	struct xfs_log_item		*intent,
+	struct xfs_trans		*tp)
+{
+	struct xfs_attrd_log_item	*attrdp;
+	struct xfs_attri_log_item	*old_attrip;
+	struct xfs_attri_log_item	*new_attrip;
+	struct xfs_attri_log_format	*new_attrp;
+	struct xfs_attri_log_format	*old_attrp;
+	int				buffer_size;
+
+	old_attrip = ATTRI_ITEM(intent);
+	old_attrp = &old_attrip->attri_format;
+	buffer_size = old_attrp->alfi_value_len + old_attrp->alfi_name_len;
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	attrdp = xfs_trans_get_attrd(tp, old_attrip);
+	set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+	new_attrip = xfs_attri_init(tp->t_mountp, buffer_size);
+	new_attrp = &new_attrip->attri_format;
+
+	new_attrp->alfi_ino = old_attrp->alfi_ino;
+	new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
+	new_attrp->alfi_value_len = old_attrp->alfi_value_len;
+	new_attrp->alfi_name_len = old_attrp->alfi_name_len;
+	new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
+
+	new_attrip->attri_name_len = old_attrip->attri_name_len;
+	new_attrip->attri_name = ((char *)new_attrip) +
+				 sizeof(struct xfs_attri_log_item);
+	memcpy(new_attrip->attri_name, old_attrip->attri_name,
+		new_attrip->attri_name_len);
+
+	new_attrip->attri_value_len = old_attrip->attri_value_len;
+	if (new_attrip->attri_value_len > 0) {
+		new_attrip->attri_value = new_attrip->attri_name +
+					  new_attrip->attri_name_len;
+
+		memcpy(new_attrip->attri_value, old_attrip->attri_value,
+		       new_attrip->attri_value_len);
+	}
+
+	xfs_trans_add_item(tp, &new_attrip->attri_item);
+	set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
+
+	return &new_attrip->attri_item;
+}
+
 static const struct xfs_item_ops xfs_attri_item_ops = {
 	.iop_size	= xfs_attri_item_size,
 	.iop_format	= xfs_attri_item_format,
 	.iop_unpin	= xfs_attri_item_unpin,
 	.iop_committed	= xfs_attri_item_committed,
 	.iop_release    = xfs_attri_item_release,
+	.iop_recover	= xfs_attri_item_recover,
 	.iop_match	= xfs_attri_item_match,
+	.iop_relog	= xfs_attri_item_relog,
 };