diff mbox series

[41/45] xfs: move CIL ordering to the logvec chain

Message ID 20210305051143.182133-42-david@fromorbit.com (mailing list archive)
State New
Headers show
Series xfs: consolidated log and optimisation changes | expand

Commit Message

Dave Chinner March 5, 2021, 5:11 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Adding a list_sort() call to the CIL push work while the xc_ctx_lock
is held exclusively has resulted in fairly long lock hold times and
that stops all front end transaction commits from making progress.

We can move the sorting out of the xc_ctx_lock if we can transfer
the ordering information to the log vectors as they are detached
from the log items and then we can sort the log vectors. This
requires log vectors to use a list_head rather than a single linked
list and to hold an order ID field. With these changes, we can move
the list_sort() call to just before we call xlog_write() when we
aren't holding any locks at all.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log.c        | 46 +++++++++++++++++++++---------
 fs/xfs/xfs_log.h        |  3 +-
 fs/xfs/xfs_log_cil.c    | 63 +++++++++++++++++++++++++----------------
 fs/xfs/xfs_log_priv.h   |  4 +--
 fs/xfs/xfs_trans.c      |  4 +--
 fs/xfs/xfs_trans_priv.h |  4 +--
 6 files changed, 78 insertions(+), 46 deletions(-)

Comments

Darrick J. Wong March 11, 2021, 1:34 a.m. UTC | #1
On Fri, Mar 05, 2021 at 04:11:39PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Adding a list_sort() call to the CIL push work while the xc_ctx_lock
> is held exclusively has resulted in fairly long lock hold times and
> that stops all front end transaction commits from making progress.

Heh, nice solution. :)

> We can move the sorting out of the xc_ctx_lock if we can transfer
> the ordering information to the log vectors as they are detached
> from the log items and then we can sort the log vectors. This
> requires log vectors to use a list_head rather than a single linked
> list

Ergh, could pull out the list conversion into a separate piece?
Some of the lv_chain usage is ... not entirely textbook.

> and to hold an order ID field. With these changes, we can move
> the list_sort() call to just before we call xlog_write() when we
> aren't holding any locks at all.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_log.c        | 46 +++++++++++++++++++++---------
>  fs/xfs/xfs_log.h        |  3 +-
>  fs/xfs/xfs_log_cil.c    | 63 +++++++++++++++++++++++++----------------
>  fs/xfs/xfs_log_priv.h   |  4 +--
>  fs/xfs/xfs_trans.c      |  4 +--
>  fs/xfs/xfs_trans_priv.h |  4 +--
>  6 files changed, 78 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 46a006d41184..fd58c3213ebf 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -846,6 +846,9 @@ xlog_write_unmount_record(
>  		.lv_niovecs = 1,
>  		.lv_iovecp = &reg,
>  	};
> +	LIST_HEAD(lv_chain);
> +	INIT_LIST_HEAD(&vec.lv_chain);
> +	list_add(&vec.lv_chain, &lv_chain);
>  
>  	/* account for space used by record data */
>  	ticket->t_curr_res -= sizeof(unmount_rec);
> @@ -857,8 +860,8 @@ xlog_write_unmount_record(
>  	 */
>  	if (log->l_targ != log->l_mp->m_ddev_targp)
>  		blkdev_issue_flush(log->l_targ->bt_bdev);
> -	return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS,
> -				reg.i_len);
> +	return xlog_write(log, &lv_chain, ticket, NULL, NULL,
> +				XLOG_UNMOUNT_TRANS, reg.i_len);
>  }
>  
>  /*
> @@ -1571,14 +1574,17 @@ xlog_commit_record(
>  		.lv_iovecp = &reg,
>  	};
>  	int	error;
> +	LIST_HEAD(lv_chain);
> +	INIT_LIST_HEAD(&vec.lv_chain);
> +	list_add(&vec.lv_chain, &lv_chain);
>  
>  	if (XLOG_FORCED_SHUTDOWN(log))
>  		return -EIO;
>  
>  	/* account for space used by record data */
>  	ticket->t_curr_res -= reg.i_len;
> -	error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS,
> -				reg.i_len);
> +	error = xlog_write(log, &lv_chain, ticket, lsn, iclog,
> +				XLOG_COMMIT_TRANS, reg.i_len);
>  	if (error)
>  		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
>  	return error;
> @@ -2109,6 +2115,7 @@ xlog_print_trans(
>   */
>  static struct xfs_log_vec *
>  xlog_write_single(
> +	struct list_head	*lv_chain,
>  	struct xfs_log_vec	*log_vector,
>  	struct xlog_ticket	*ticket,
>  	struct xlog_in_core	*iclog,
> @@ -2117,7 +2124,7 @@ xlog_write_single(
>  	uint32_t		*record_cnt,
>  	uint32_t		*data_cnt)
>  {
> -	struct xfs_log_vec	*lv = log_vector;
> +	struct xfs_log_vec	*lv;
>  	void			*ptr;
>  	int			index;
>  
> @@ -2125,10 +2132,13 @@ xlog_write_single(
>  		iclog->ic_state == XLOG_STATE_WANT_SYNC);
>  
>  	ptr = iclog->ic_datap + *log_offset;
> -	for (lv = log_vector; lv; lv = lv->lv_next) {
> +	for (lv = log_vector;
> +	     !list_entry_is_head(lv, lv_chain, lv_chain);
> +	     lv = list_next_entry(lv, lv_chain)) {
>  		/*
> -		 * If the entire log vec does not fit in the iclog, punt it to
> -		 * the partial copy loop which can handle this case.
> +		 * If the log vec contains data that needs to be copied and does
> +		 * not entirely fit in the iclog, punt it to the partial copy
> +		 * loop which can handle this case.
>  		 */
>  		if (lv->lv_niovecs &&
>  		    lv->lv_bytes > iclog->ic_size - *log_offset)
> @@ -2154,6 +2164,8 @@ xlog_write_single(
>  			*data_cnt += reg->i_len;
>  		}
>  	}
> +	if (list_entry_is_head(lv, lv_chain, lv_chain))
> +		lv = NULL;
>  	ASSERT(*len == 0 || lv);
>  	return lv;
>  }
> @@ -2199,6 +2211,7 @@ xlog_write_get_more_iclog_space(
>  static struct xfs_log_vec *
>  xlog_write_partial(
>  	struct xlog		*log,
> +	struct list_head	*lv_chain,
>  	struct xfs_log_vec	*log_vector,
>  	struct xlog_ticket	*ticket,
>  	struct xlog_in_core	**iclogp,
> @@ -2338,7 +2351,10 @@ xlog_write_partial(
>  	 * the caller so it can go back to fast path copying.
>  	 */
>  	*iclogp = iclog;
> -	return lv->lv_next;
> +	lv = list_next_entry(lv, lv_chain);
> +	if (list_entry_is_head(lv, lv_chain, lv_chain))
> +		return NULL;
> +	return lv;
>  }
>  
>  /*
> @@ -2384,7 +2400,7 @@ xlog_write_partial(
>  int
>  xlog_write(
>  	struct xlog		*log,
> -	struct xfs_log_vec	*log_vector,
> +	struct list_head	*lv_chain,
>  	struct xlog_ticket	*ticket,
>  	xfs_lsn_t		*start_lsn,
>  	struct xlog_in_core	**commit_iclog,
> @@ -2392,7 +2408,7 @@ xlog_write(
>  	uint32_t		len)
>  {
>  	struct xlog_in_core	*iclog = NULL;
> -	struct xfs_log_vec	*lv = log_vector;
> +	struct xfs_log_vec	*lv;
>  	int			record_cnt = 0;
>  	int			data_cnt = 0;
>  	int			error = 0;
> @@ -2424,15 +2440,17 @@ xlog_write(
>  	if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
>  		iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
>  
> +	lv = list_first_entry_or_null(lv_chain, struct xfs_log_vec, lv_chain);
>  	while (lv) {
> -		lv = xlog_write_single(lv, ticket, iclog, &log_offset,
> +		lv = xlog_write_single(lv_chain, lv, ticket, iclog, &log_offset,
>  					&len, &record_cnt, &data_cnt);
>  		if (!lv)
>  			break;
>  
>  		ASSERT(!(optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)));
> -		lv = xlog_write_partial(log, lv, ticket, &iclog, &log_offset,
> -					&len, &record_cnt, &data_cnt);
> +		lv = xlog_write_partial(log, lv_chain, lv, ticket, &iclog,
> +					&log_offset, &len, &record_cnt,
> +					&data_cnt);
>  		if (IS_ERR_OR_NULL(lv)) {
>  			error = PTR_ERR_OR_ZERO(lv);
>  			break;
> diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> index af54ea3f8c90..0445dd6acbce 100644
> --- a/fs/xfs/xfs_log.h
> +++ b/fs/xfs/xfs_log.h
> @@ -9,7 +9,8 @@
>  struct xfs_cil_ctx;
>  
>  struct xfs_log_vec {
> -	struct xfs_log_vec	*lv_next;	/* next lv in build list */
> +	struct list_head	lv_chain;	/* lv chain ptrs */
> +	int			lv_order_id;	/* chain ordering info */

uint32_t to match li_order_id?

>  	int			lv_niovecs;	/* number of iovecs in lv */
>  	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
>  	struct xfs_log_item	*lv_item;	/* owner */
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index 3d43a5088154..6dcc23829bef 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -72,6 +72,7 @@ xlog_cil_ctx_alloc(void)
>  	ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
>  	INIT_LIST_HEAD(&ctx->committing);
>  	INIT_LIST_HEAD(&ctx->busy_extents);
> +	INIT_LIST_HEAD(&ctx->lv_chain);
>  	INIT_WORK(&ctx->push_work, xlog_cil_push_work);
>  	return ctx;
>  }
> @@ -237,6 +238,7 @@ xlog_cil_alloc_shadow_bufs(
>  			lv = kmem_alloc_large(buf_size, KM_NOFS);
>  			memset(lv, 0, xlog_cil_iovec_space(niovecs));
>  
> +			INIT_LIST_HEAD(&lv->lv_chain);
>  			lv->lv_item = lip;
>  			lv->lv_size = buf_size;
>  			if (ordered)
> @@ -252,7 +254,6 @@ xlog_cil_alloc_shadow_bufs(
>  			else
>  				lv->lv_buf_len = 0;
>  			lv->lv_bytes = 0;
> -			lv->lv_next = NULL;
>  		}
>  
>  		/* Ensure the lv is set up according to ->iop_size */
> @@ -379,8 +380,6 @@ xlog_cil_insert_format_items(
>  		if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
>  			/* same or smaller, optimise common overwrite case */
>  			lv = lip->li_lv;
> -			lv->lv_next = NULL;

What /did/ these null assignments do?

> -
>  			if (ordered)
>  				goto insert;
>  
> @@ -547,14 +546,14 @@ xlog_cil_insert_items(
>  
>  static void
>  xlog_cil_free_logvec(
> -	struct xfs_log_vec	*log_vector)
> +	struct list_head	*lv_chain)
>  {
>  	struct xfs_log_vec	*lv;
>  
> -	for (lv = log_vector; lv; ) {
> -		struct xfs_log_vec *next = lv->lv_next;
> +	while(!list_empty(lv_chain)) {

Nit: space after "while".

> +		lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_chain);
> +		list_del_init(&lv->lv_chain);
>  		kmem_free(lv);
> -		lv = next;
>  	}
>  }
>  
> @@ -653,7 +652,7 @@ xlog_cil_committed(
>  		spin_unlock(&ctx->cil->xc_push_lock);
>  	}
>  
> -	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
> +	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain,
>  					ctx->start_lsn, abort);
>  
>  	xfs_extent_busy_sort(&ctx->busy_extents);
> @@ -664,7 +663,7 @@ xlog_cil_committed(
>  	list_del(&ctx->committing);
>  	spin_unlock(&ctx->cil->xc_push_lock);
>  
> -	xlog_cil_free_logvec(ctx->lv_chain);
> +	xlog_cil_free_logvec(&ctx->lv_chain);
>  
>  	if (!list_empty(&ctx->busy_extents))
>  		xlog_discard_busy_extents(mp, ctx);
> @@ -744,7 +743,7 @@ xlog_cil_build_trans_hdr(
>  	lvhdr->lv_niovecs = 2;
>  	lvhdr->lv_iovecp = &hdr->lhdr[0];
>  	lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
> -	lvhdr->lv_next = ctx->lv_chain;
> +	list_add(&lvhdr->lv_chain, &ctx->lv_chain);
>  
>  	tic->t_curr_res -= lvhdr->lv_bytes;
>  }
> @@ -755,12 +754,14 @@ xlog_cil_order_cmp(
>  	struct list_head	*a,
>  	struct list_head	*b)
>  {
> -	struct xfs_log_item	*l1 = container_of(a, struct xfs_log_item, li_cil);
> -	struct xfs_log_item	*l2 = container_of(b, struct xfs_log_item, li_cil);
> +	struct xfs_log_vec	*l1 = container_of(a, struct xfs_log_vec,
> +							lv_chain);
> +	struct xfs_log_vec	*l2 = container_of(b, struct xfs_log_vec,
> +							lv_chain);
>  
> -	if (l1->li_order_id > l2->li_order_id)
> +	if (l1->lv_order_id > l2->lv_order_id)
>  		return 1;
> -	if (l1->li_order_id < l2->li_order_id)
> +	if (l1->lv_order_id < l2->lv_order_id)
>  		return -1;
>  	return 0;
>  }
> @@ -907,26 +908,25 @@ xlog_cil_push_work(
>  	 * needed on the transaction commit side which is currently locked out
>  	 * by the flush lock.
>  	 */
> -	list_sort(NULL, &log_items, xlog_cil_order_cmp);
>  	lv = NULL;
>  	while (!list_empty(&log_items)) {
>  		struct xfs_log_item	*item;
>  
>  		item = list_first_entry(&log_items,
>  					struct xfs_log_item, li_cil);
> -		list_del_init(&item->li_cil);
> -		item->li_order_id = 0;
> -		if (!ctx->lv_chain)
> -			ctx->lv_chain = item->li_lv;
> -		else
> -			lv->lv_next = item->li_lv;
> +
>  		lv = item->li_lv;
> -		item->li_lv = NULL;
> +		lv->lv_order_id = item->li_order_id;
>  		num_iovecs += lv->lv_niovecs;
> -
>  		/* we don't write ordered log vectors */
>  		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
>  			num_bytes += lv->lv_bytes;
> +		list_add_tail(&lv->lv_chain, &ctx->lv_chain);
> +
> +		list_del_init(&item->li_cil);

Do the list manipulations need moving, or could they have stayed further
up in the loop body for a cleaner patch?

> +		item->li_order_id = 0;
> +		item->li_lv = NULL;
> +
>  	}
>  
>  	/*
> @@ -959,6 +959,13 @@ xlog_cil_push_work(
>  	spin_unlock(&cil->xc_push_lock);
>  	up_write(&cil->xc_ctx_lock);
>  
> +	/*
> +	 * Sort the log vector chain before we add the transaction headers.
> +	 * This ensures we always have the transaction headers at the start
> +	 * of the chain.
> +	 */
> +	list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp);
> +
>  	/*
>  	 * Build a checkpoint transaction header and write it to the log to
>  	 * begin the transaction. We need to account for the space used by the
> @@ -981,8 +988,14 @@ xlog_cil_push_work(
>  	 * use the commit record lsn then we can move the tail beyond the grant
>  	 * write head.
>  	 */
> -	error = xlog_write(log, &lvhdr, ctx->ticket, &ctx->start_lsn, NULL,
> -				XLOG_START_TRANS, num_bytes);
> +	error = xlog_write(log, &ctx->lv_chain, ctx->ticket, &ctx->start_lsn,
> +				NULL, XLOG_START_TRANS, num_bytes);
> +
> +	/*
> +	 * Take the lvhdr back off the lv_chain as it should not be passed
> +	 * to log IO completion.
> +	 */
> +	list_del(&lvhdr.lv_chain);
>  	if (error)
>  		goto out_abort_free_ticket;
>  
> diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
> index 12a1a36eef7e..6a4160200417 100644
> --- a/fs/xfs/xfs_log_priv.h
> +++ b/fs/xfs/xfs_log_priv.h
> @@ -224,7 +224,7 @@ struct xfs_cil_ctx {
>  	int			nvecs;		/* number of regions */
>  	atomic_t		space_used;	/* aggregate size of regions */
>  	struct list_head	busy_extents;	/* busy extents in chkpt */
> -	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
> +	struct list_head	lv_chain;	/* logvecs being pushed */
>  	struct list_head	iclog_entry;
>  	struct list_head	committing;	/* ctx committing list */
>  	struct work_struct	discard_endio_work;
> @@ -480,7 +480,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
>  
>  void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
>  void	xlog_print_trans(struct xfs_trans *);
> -int	xlog_write(struct xlog *log, struct xfs_log_vec *log_vector,
> +int	xlog_write(struct xlog *log, struct list_head *lv_chain,
>  		struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
>  		struct xlog_in_core **commit_iclog, uint optype, uint32_t len);
>  int	xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
> diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
> index 83c2b7f22eb7..b20e68279808 100644
> --- a/fs/xfs/xfs_trans.c
> +++ b/fs/xfs/xfs_trans.c
> @@ -747,7 +747,7 @@ xfs_log_item_batch_insert(
>  void
>  xfs_trans_committed_bulk(
>  	struct xfs_ail		*ailp,
> -	struct xfs_log_vec	*log_vector,
> +	struct list_head	*lv_chain,
>  	xfs_lsn_t		commit_lsn,
>  	bool			aborted)
>  {
> @@ -762,7 +762,7 @@ xfs_trans_committed_bulk(
>  	spin_unlock(&ailp->ail_lock);
>  
>  	/* unpin all the log items */
> -	for (lv = log_vector; lv; lv = lv->lv_next ) {
> +	list_for_each_entry(lv, lv_chain, lv_chain) {
>  		struct xfs_log_item	*lip = lv->lv_item;
>  		xfs_lsn_t		item_lsn;
>  
> diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
> index 3004aeac9110..b0bf78e6ff76 100644
> --- a/fs/xfs/xfs_trans_priv.h
> +++ b/fs/xfs/xfs_trans_priv.h
> @@ -18,8 +18,8 @@ void	xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
>  void	xfs_trans_del_item(struct xfs_log_item *);
>  void	xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
>  
> -void	xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
> -				xfs_lsn_t commit_lsn, bool aborted);
> +void	xfs_trans_committed_bulk(struct xfs_ail *ailp,
> +		struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted);
>  /*
>   * AIL traversal cursor.
>   *
> -- 
> 2.28.0
>
Dave Chinner March 12, 2021, 2:29 a.m. UTC | #2
On Wed, Mar 10, 2021 at 05:34:52PM -0800, Darrick J. Wong wrote:
> On Fri, Mar 05, 2021 at 04:11:39PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Adding a list_sort() call to the CIL push work while the xc_ctx_lock
> > is held exclusively has resulted in fairly long lock hold times and
> > that stops all front end transaction commits from making progress.
> 
> Heh, nice solution. :)
> 
> > We can move the sorting out of the xc_ctx_lock if we can transfer
> > the ordering information to the log vectors as they are detached
> > from the log items and then we can sort the log vectors. This
> > requires log vectors to use a list_head rather than a single linked
> > list
> 
> Ergh, could pull out the list conversion into a separate piece?
> Some of the lv_chain usage is ... not entirely textbook.

Yes, I can probably do that.

> > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
> > index af54ea3f8c90..0445dd6acbce 100644
> > --- a/fs/xfs/xfs_log.h
> > +++ b/fs/xfs/xfs_log.h
> > @@ -9,7 +9,8 @@
> >  struct xfs_cil_ctx;
> >  
> >  struct xfs_log_vec {
> > -	struct xfs_log_vec	*lv_next;	/* next lv in build list */
> > +	struct list_head	lv_chain;	/* lv chain ptrs */
> > +	int			lv_order_id;	/* chain ordering info */
> 
> uint32_t to match li_order_id?

*nod*

> > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> > index 3d43a5088154..6dcc23829bef 100644
> > --- a/fs/xfs/xfs_log_cil.c
> > +++ b/fs/xfs/xfs_log_cil.c
> > @@ -72,6 +72,7 @@ xlog_cil_ctx_alloc(void)
> >  	ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
> >  	INIT_LIST_HEAD(&ctx->committing);
> >  	INIT_LIST_HEAD(&ctx->busy_extents);
> > +	INIT_LIST_HEAD(&ctx->lv_chain);
> >  	INIT_WORK(&ctx->push_work, xlog_cil_push_work);
> >  	return ctx;
> >  }
> > @@ -237,6 +238,7 @@ xlog_cil_alloc_shadow_bufs(
> >  			lv = kmem_alloc_large(buf_size, KM_NOFS);
> >  			memset(lv, 0, xlog_cil_iovec_space(niovecs));
> >  
> > +			INIT_LIST_HEAD(&lv->lv_chain);
> >  			lv->lv_item = lip;
> >  			lv->lv_size = buf_size;
> >  			if (ordered)
> > @@ -252,7 +254,6 @@ xlog_cil_alloc_shadow_bufs(
> >  			else
> >  				lv->lv_buf_len = 0;
> >  			lv->lv_bytes = 0;
> > -			lv->lv_next = NULL;
> >  		}
> >  
> >  		/* Ensure the lv is set up according to ->iop_size */
> > @@ -379,8 +380,6 @@ xlog_cil_insert_format_items(
> >  		if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
> >  			/* same or smaller, optimise common overwrite case */
> >  			lv = lip->li_lv;
> > -			lv->lv_next = NULL;
> 
> What /did/ these null assignments do?

IIRC, at one point they ensured that the lv chain was correctly
terminated when a lv was reused and added to the tail of an existing
chain. I think that became redundant when we added the shadow
buffers to allow allocation outside the CIL lock contexts...

> > -		list_del_init(&item->li_cil);
> > -		item->li_order_id = 0;
> > -		if (!ctx->lv_chain)
> > -			ctx->lv_chain = item->li_lv;
> > -		else
> > -			lv->lv_next = item->li_lv;
> > +
> >  		lv = item->li_lv;
> > -		item->li_lv = NULL;
> > +		lv->lv_order_id = item->li_order_id;
> >  		num_iovecs += lv->lv_niovecs;
> > -
> >  		/* we don't write ordered log vectors */
> >  		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
> >  			num_bytes += lv->lv_bytes;
> > +		list_add_tail(&lv->lv_chain, &ctx->lv_chain);
> > +
> > +		list_del_init(&item->li_cil);
> 
> Do the list manipulations need moving, or could they have stayed further
> up in the loop body for a cleaner patch?

I moved them so the code was structured as:

		<transfer item state to log vec>
		<manipulate lists>
		<clear item state>

Because there was no clear separation between state and list
manipulations. This will clean up if I separate the list
manipulations into their own patch...

Cheers,

Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46a006d41184..fd58c3213ebf 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -846,6 +846,9 @@  xlog_write_unmount_record(
 		.lv_niovecs = 1,
 		.lv_iovecp = &reg,
 	};
+	LIST_HEAD(lv_chain);
+	INIT_LIST_HEAD(&vec.lv_chain);
+	list_add(&vec.lv_chain, &lv_chain);
 
 	/* account for space used by record data */
 	ticket->t_curr_res -= sizeof(unmount_rec);
@@ -857,8 +860,8 @@  xlog_write_unmount_record(
 	 */
 	if (log->l_targ != log->l_mp->m_ddev_targp)
 		blkdev_issue_flush(log->l_targ->bt_bdev);
-	return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS,
-				reg.i_len);
+	return xlog_write(log, &lv_chain, ticket, NULL, NULL,
+				XLOG_UNMOUNT_TRANS, reg.i_len);
 }
 
 /*
@@ -1571,14 +1574,17 @@  xlog_commit_record(
 		.lv_iovecp = &reg,
 	};
 	int	error;
+	LIST_HEAD(lv_chain);
+	INIT_LIST_HEAD(&vec.lv_chain);
+	list_add(&vec.lv_chain, &lv_chain);
 
 	if (XLOG_FORCED_SHUTDOWN(log))
 		return -EIO;
 
 	/* account for space used by record data */
 	ticket->t_curr_res -= reg.i_len;
-	error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS,
-				reg.i_len);
+	error = xlog_write(log, &lv_chain, ticket, lsn, iclog,
+				XLOG_COMMIT_TRANS, reg.i_len);
 	if (error)
 		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
 	return error;
@@ -2109,6 +2115,7 @@  xlog_print_trans(
  */
 static struct xfs_log_vec *
 xlog_write_single(
+	struct list_head	*lv_chain,
 	struct xfs_log_vec	*log_vector,
 	struct xlog_ticket	*ticket,
 	struct xlog_in_core	*iclog,
@@ -2117,7 +2124,7 @@  xlog_write_single(
 	uint32_t		*record_cnt,
 	uint32_t		*data_cnt)
 {
-	struct xfs_log_vec	*lv = log_vector;
+	struct xfs_log_vec	*lv;
 	void			*ptr;
 	int			index;
 
@@ -2125,10 +2132,13 @@  xlog_write_single(
 		iclog->ic_state == XLOG_STATE_WANT_SYNC);
 
 	ptr = iclog->ic_datap + *log_offset;
-	for (lv = log_vector; lv; lv = lv->lv_next) {
+	for (lv = log_vector;
+	     !list_entry_is_head(lv, lv_chain, lv_chain);
+	     lv = list_next_entry(lv, lv_chain)) {
 		/*
-		 * If the entire log vec does not fit in the iclog, punt it to
-		 * the partial copy loop which can handle this case.
+		 * If the log vec contains data that needs to be copied and does
+		 * not entirely fit in the iclog, punt it to the partial copy
+		 * loop which can handle this case.
 		 */
 		if (lv->lv_niovecs &&
 		    lv->lv_bytes > iclog->ic_size - *log_offset)
@@ -2154,6 +2164,8 @@  xlog_write_single(
 			*data_cnt += reg->i_len;
 		}
 	}
+	if (list_entry_is_head(lv, lv_chain, lv_chain))
+		lv = NULL;
 	ASSERT(*len == 0 || lv);
 	return lv;
 }
@@ -2199,6 +2211,7 @@  xlog_write_get_more_iclog_space(
 static struct xfs_log_vec *
 xlog_write_partial(
 	struct xlog		*log,
+	struct list_head	*lv_chain,
 	struct xfs_log_vec	*log_vector,
 	struct xlog_ticket	*ticket,
 	struct xlog_in_core	**iclogp,
@@ -2338,7 +2351,10 @@  xlog_write_partial(
 	 * the caller so it can go back to fast path copying.
 	 */
 	*iclogp = iclog;
-	return lv->lv_next;
+	lv = list_next_entry(lv, lv_chain);
+	if (list_entry_is_head(lv, lv_chain, lv_chain))
+		return NULL;
+	return lv;
 }
 
 /*
@@ -2384,7 +2400,7 @@  xlog_write_partial(
 int
 xlog_write(
 	struct xlog		*log,
-	struct xfs_log_vec	*log_vector,
+	struct list_head	*lv_chain,
 	struct xlog_ticket	*ticket,
 	xfs_lsn_t		*start_lsn,
 	struct xlog_in_core	**commit_iclog,
@@ -2392,7 +2408,7 @@  xlog_write(
 	uint32_t		len)
 {
 	struct xlog_in_core	*iclog = NULL;
-	struct xfs_log_vec	*lv = log_vector;
+	struct xfs_log_vec	*lv;
 	int			record_cnt = 0;
 	int			data_cnt = 0;
 	int			error = 0;
@@ -2424,15 +2440,17 @@  xlog_write(
 	if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
 		iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
+	lv = list_first_entry_or_null(lv_chain, struct xfs_log_vec, lv_chain);
 	while (lv) {
-		lv = xlog_write_single(lv, ticket, iclog, &log_offset,
+		lv = xlog_write_single(lv_chain, lv, ticket, iclog, &log_offset,
 					&len, &record_cnt, &data_cnt);
 		if (!lv)
 			break;
 
 		ASSERT(!(optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)));
-		lv = xlog_write_partial(log, lv, ticket, &iclog, &log_offset,
-					&len, &record_cnt, &data_cnt);
+		lv = xlog_write_partial(log, lv_chain, lv, ticket, &iclog,
+					&log_offset, &len, &record_cnt,
+					&data_cnt);
 		if (IS_ERR_OR_NULL(lv)) {
 			error = PTR_ERR_OR_ZERO(lv);
 			break;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index af54ea3f8c90..0445dd6acbce 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -9,7 +9,8 @@ 
 struct xfs_cil_ctx;
 
 struct xfs_log_vec {
-	struct xfs_log_vec	*lv_next;	/* next lv in build list */
+	struct list_head	lv_chain;	/* lv chain ptrs */
+	int			lv_order_id;	/* chain ordering info */
 	int			lv_niovecs;	/* number of iovecs in lv */
 	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
 	struct xfs_log_item	*lv_item;	/* owner */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 3d43a5088154..6dcc23829bef 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -72,6 +72,7 @@  xlog_cil_ctx_alloc(void)
 	ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
 	INIT_LIST_HEAD(&ctx->committing);
 	INIT_LIST_HEAD(&ctx->busy_extents);
+	INIT_LIST_HEAD(&ctx->lv_chain);
 	INIT_WORK(&ctx->push_work, xlog_cil_push_work);
 	return ctx;
 }
@@ -237,6 +238,7 @@  xlog_cil_alloc_shadow_bufs(
 			lv = kmem_alloc_large(buf_size, KM_NOFS);
 			memset(lv, 0, xlog_cil_iovec_space(niovecs));
 
+			INIT_LIST_HEAD(&lv->lv_chain);
 			lv->lv_item = lip;
 			lv->lv_size = buf_size;
 			if (ordered)
@@ -252,7 +254,6 @@  xlog_cil_alloc_shadow_bufs(
 			else
 				lv->lv_buf_len = 0;
 			lv->lv_bytes = 0;
-			lv->lv_next = NULL;
 		}
 
 		/* Ensure the lv is set up according to ->iop_size */
@@ -379,8 +380,6 @@  xlog_cil_insert_format_items(
 		if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
 			/* same or smaller, optimise common overwrite case */
 			lv = lip->li_lv;
-			lv->lv_next = NULL;
-
 			if (ordered)
 				goto insert;
 
@@ -547,14 +546,14 @@  xlog_cil_insert_items(
 
 static void
 xlog_cil_free_logvec(
-	struct xfs_log_vec	*log_vector)
+	struct list_head	*lv_chain)
 {
 	struct xfs_log_vec	*lv;
 
-	for (lv = log_vector; lv; ) {
-		struct xfs_log_vec *next = lv->lv_next;
+	while(!list_empty(lv_chain)) {
+		lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_chain);
+		list_del_init(&lv->lv_chain);
 		kmem_free(lv);
-		lv = next;
 	}
 }
 
@@ -653,7 +652,7 @@  xlog_cil_committed(
 		spin_unlock(&ctx->cil->xc_push_lock);
 	}
 
-	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
+	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain,
 					ctx->start_lsn, abort);
 
 	xfs_extent_busy_sort(&ctx->busy_extents);
@@ -664,7 +663,7 @@  xlog_cil_committed(
 	list_del(&ctx->committing);
 	spin_unlock(&ctx->cil->xc_push_lock);
 
-	xlog_cil_free_logvec(ctx->lv_chain);
+	xlog_cil_free_logvec(&ctx->lv_chain);
 
 	if (!list_empty(&ctx->busy_extents))
 		xlog_discard_busy_extents(mp, ctx);
@@ -744,7 +743,7 @@  xlog_cil_build_trans_hdr(
 	lvhdr->lv_niovecs = 2;
 	lvhdr->lv_iovecp = &hdr->lhdr[0];
 	lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
-	lvhdr->lv_next = ctx->lv_chain;
+	list_add(&lvhdr->lv_chain, &ctx->lv_chain);
 
 	tic->t_curr_res -= lvhdr->lv_bytes;
 }
@@ -755,12 +754,14 @@  xlog_cil_order_cmp(
 	struct list_head	*a,
 	struct list_head	*b)
 {
-	struct xfs_log_item	*l1 = container_of(a, struct xfs_log_item, li_cil);
-	struct xfs_log_item	*l2 = container_of(b, struct xfs_log_item, li_cil);
+	struct xfs_log_vec	*l1 = container_of(a, struct xfs_log_vec,
+							lv_chain);
+	struct xfs_log_vec	*l2 = container_of(b, struct xfs_log_vec,
+							lv_chain);
 
-	if (l1->li_order_id > l2->li_order_id)
+	if (l1->lv_order_id > l2->lv_order_id)
 		return 1;
-	if (l1->li_order_id < l2->li_order_id)
+	if (l1->lv_order_id < l2->lv_order_id)
 		return -1;
 	return 0;
 }
@@ -907,26 +908,25 @@  xlog_cil_push_work(
 	 * needed on the transaction commit side which is currently locked out
 	 * by the flush lock.
 	 */
-	list_sort(NULL, &log_items, xlog_cil_order_cmp);
 	lv = NULL;
 	while (!list_empty(&log_items)) {
 		struct xfs_log_item	*item;
 
 		item = list_first_entry(&log_items,
 					struct xfs_log_item, li_cil);
-		list_del_init(&item->li_cil);
-		item->li_order_id = 0;
-		if (!ctx->lv_chain)
-			ctx->lv_chain = item->li_lv;
-		else
-			lv->lv_next = item->li_lv;
+
 		lv = item->li_lv;
-		item->li_lv = NULL;
+		lv->lv_order_id = item->li_order_id;
 		num_iovecs += lv->lv_niovecs;
-
 		/* we don't write ordered log vectors */
 		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
 			num_bytes += lv->lv_bytes;
+		list_add_tail(&lv->lv_chain, &ctx->lv_chain);
+
+		list_del_init(&item->li_cil);
+		item->li_order_id = 0;
+		item->li_lv = NULL;
+
 	}
 
 	/*
@@ -959,6 +959,13 @@  xlog_cil_push_work(
 	spin_unlock(&cil->xc_push_lock);
 	up_write(&cil->xc_ctx_lock);
 
+	/*
+	 * Sort the log vector chain before we add the transaction headers.
+	 * This ensures we always have the transaction headers at the start
+	 * of the chain.
+	 */
+	list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp);
+
 	/*
 	 * Build a checkpoint transaction header and write it to the log to
 	 * begin the transaction. We need to account for the space used by the
@@ -981,8 +988,14 @@  xlog_cil_push_work(
 	 * use the commit record lsn then we can move the tail beyond the grant
 	 * write head.
 	 */
-	error = xlog_write(log, &lvhdr, ctx->ticket, &ctx->start_lsn, NULL,
-				XLOG_START_TRANS, num_bytes);
+	error = xlog_write(log, &ctx->lv_chain, ctx->ticket, &ctx->start_lsn,
+				NULL, XLOG_START_TRANS, num_bytes);
+
+	/*
+	 * Take the lvhdr back off the lv_chain as it should not be passed
+	 * to log IO completion.
+	 */
+	list_del(&lvhdr.lv_chain);
 	if (error)
 		goto out_abort_free_ticket;
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 12a1a36eef7e..6a4160200417 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -224,7 +224,7 @@  struct xfs_cil_ctx {
 	int			nvecs;		/* number of regions */
 	atomic_t		space_used;	/* aggregate size of regions */
 	struct list_head	busy_extents;	/* busy extents in chkpt */
-	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
+	struct list_head	lv_chain;	/* logvecs being pushed */
 	struct list_head	iclog_entry;
 	struct list_head	committing;	/* ctx committing list */
 	struct work_struct	discard_endio_work;
@@ -480,7 +480,7 @@  xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
 
 void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
 void	xlog_print_trans(struct xfs_trans *);
-int	xlog_write(struct xlog *log, struct xfs_log_vec *log_vector,
+int	xlog_write(struct xlog *log, struct list_head *lv_chain,
 		struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
 		struct xlog_in_core **commit_iclog, uint optype, uint32_t len);
 int	xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 83c2b7f22eb7..b20e68279808 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -747,7 +747,7 @@  xfs_log_item_batch_insert(
 void
 xfs_trans_committed_bulk(
 	struct xfs_ail		*ailp,
-	struct xfs_log_vec	*log_vector,
+	struct list_head	*lv_chain,
 	xfs_lsn_t		commit_lsn,
 	bool			aborted)
 {
@@ -762,7 +762,7 @@  xfs_trans_committed_bulk(
 	spin_unlock(&ailp->ail_lock);
 
 	/* unpin all the log items */
-	for (lv = log_vector; lv; lv = lv->lv_next ) {
+	list_for_each_entry(lv, lv_chain, lv_chain) {
 		struct xfs_log_item	*lip = lv->lv_item;
 		xfs_lsn_t		item_lsn;
 
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 3004aeac9110..b0bf78e6ff76 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -18,8 +18,8 @@  void	xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
 void	xfs_trans_del_item(struct xfs_log_item *);
 void	xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
 
-void	xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
-				xfs_lsn_t commit_lsn, bool aborted);
+void	xfs_trans_committed_bulk(struct xfs_ail *ailp,
+		struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted);
 /*
  * AIL traversal cursor.
  *