Message ID | 20210305051143.182133-42-david@fromorbit.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xfs: consolidated log and optimisation changes | expand |
On Fri, Mar 05, 2021 at 04:11:39PM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > Adding a list_sort() call to the CIL push work while the xc_ctx_lock > is held exclusively has resulted in fairly long lock hold times and > that stops all front end transaction commits from making progress. Heh, nice solution. :) > We can move the sorting out of the xc_ctx_lock if we can transfer > the ordering information to the log vectors as they are detached > from the log items and then we can sort the log vectors. This > requires log vectors to use a list_head rather than a single linked > list Ergh, could pull out the list conversion into a separate piece? Some of the lv_chain usage is ... not entirely textbook. > and to hold an order ID field. With these changes, we can move > the list_sort() call to just before we call xlog_write() when we > aren't holding any locks at all. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/xfs/xfs_log.c | 46 +++++++++++++++++++++--------- > fs/xfs/xfs_log.h | 3 +- > fs/xfs/xfs_log_cil.c | 63 +++++++++++++++++++++++++---------------- > fs/xfs/xfs_log_priv.h | 4 +-- > fs/xfs/xfs_trans.c | 4 +-- > fs/xfs/xfs_trans_priv.h | 4 +-- > 6 files changed, 78 insertions(+), 46 deletions(-) > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > index 46a006d41184..fd58c3213ebf 100644 > --- a/fs/xfs/xfs_log.c > +++ b/fs/xfs/xfs_log.c > @@ -846,6 +846,9 @@ xlog_write_unmount_record( > .lv_niovecs = 1, > .lv_iovecp = ®, > }; > + LIST_HEAD(lv_chain); > + INIT_LIST_HEAD(&vec.lv_chain); > + list_add(&vec.lv_chain, &lv_chain); > > /* account for space used by record data */ > ticket->t_curr_res -= sizeof(unmount_rec); > @@ -857,8 +860,8 @@ xlog_write_unmount_record( > */ > if (log->l_targ != log->l_mp->m_ddev_targp) > blkdev_issue_flush(log->l_targ->bt_bdev); > - return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS, > - reg.i_len); > + return xlog_write(log, &lv_chain, ticket, NULL, NULL, > + XLOG_UNMOUNT_TRANS, reg.i_len); > } > > /* > @@ -1571,14 +1574,17 @@ xlog_commit_record( > .lv_iovecp = ®, > }; > int error; > + LIST_HEAD(lv_chain); > + INIT_LIST_HEAD(&vec.lv_chain); > + list_add(&vec.lv_chain, &lv_chain); > > if (XLOG_FORCED_SHUTDOWN(log)) > return -EIO; > > /* account for space used by record data */ > ticket->t_curr_res -= reg.i_len; > - error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS, > - reg.i_len); > + error = xlog_write(log, &lv_chain, ticket, lsn, iclog, > + XLOG_COMMIT_TRANS, reg.i_len); > if (error) > xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); > return error; > @@ -2109,6 +2115,7 @@ xlog_print_trans( > */ > static struct xfs_log_vec * > xlog_write_single( > + struct list_head *lv_chain, > struct xfs_log_vec *log_vector, > struct xlog_ticket *ticket, > struct xlog_in_core *iclog, > @@ -2117,7 +2124,7 @@ xlog_write_single( > uint32_t *record_cnt, > uint32_t *data_cnt) > { > - struct xfs_log_vec *lv = log_vector; > + struct xfs_log_vec *lv; > void *ptr; > int index; > > @@ -2125,10 +2132,13 @@ xlog_write_single( > iclog->ic_state == XLOG_STATE_WANT_SYNC); > > ptr = iclog->ic_datap + *log_offset; > - for (lv = log_vector; lv; lv = lv->lv_next) { > + for (lv = log_vector; > + !list_entry_is_head(lv, lv_chain, lv_chain); > + lv = list_next_entry(lv, lv_chain)) { > /* > - * If the entire log vec does not fit in the iclog, punt it to > - * the partial copy loop which can handle this case. > + * If the log vec contains data that needs to be copied and does > + * not entirely fit in the iclog, punt it to the partial copy > + * loop which can handle this case. > */ > if (lv->lv_niovecs && > lv->lv_bytes > iclog->ic_size - *log_offset) > @@ -2154,6 +2164,8 @@ xlog_write_single( > *data_cnt += reg->i_len; > } > } > + if (list_entry_is_head(lv, lv_chain, lv_chain)) > + lv = NULL; > ASSERT(*len == 0 || lv); > return lv; > } > @@ -2199,6 +2211,7 @@ xlog_write_get_more_iclog_space( > static struct xfs_log_vec * > xlog_write_partial( > struct xlog *log, > + struct list_head *lv_chain, > struct xfs_log_vec *log_vector, > struct xlog_ticket *ticket, > struct xlog_in_core **iclogp, > @@ -2338,7 +2351,10 @@ xlog_write_partial( > * the caller so it can go back to fast path copying. > */ > *iclogp = iclog; > - return lv->lv_next; > + lv = list_next_entry(lv, lv_chain); > + if (list_entry_is_head(lv, lv_chain, lv_chain)) > + return NULL; > + return lv; > } > > /* > @@ -2384,7 +2400,7 @@ xlog_write_partial( > int > xlog_write( > struct xlog *log, > - struct xfs_log_vec *log_vector, > + struct list_head *lv_chain, > struct xlog_ticket *ticket, > xfs_lsn_t *start_lsn, > struct xlog_in_core **commit_iclog, > @@ -2392,7 +2408,7 @@ xlog_write( > uint32_t len) > { > struct xlog_in_core *iclog = NULL; > - struct xfs_log_vec *lv = log_vector; > + struct xfs_log_vec *lv; > int record_cnt = 0; > int data_cnt = 0; > int error = 0; > @@ -2424,15 +2440,17 @@ xlog_write( > if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) > iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); > > + lv = list_first_entry_or_null(lv_chain, struct xfs_log_vec, lv_chain); > while (lv) { > - lv = xlog_write_single(lv, ticket, iclog, &log_offset, > + lv = xlog_write_single(lv_chain, lv, ticket, iclog, &log_offset, > &len, &record_cnt, &data_cnt); > if (!lv) > break; > > ASSERT(!(optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))); > - lv = xlog_write_partial(log, lv, ticket, &iclog, &log_offset, > - &len, &record_cnt, &data_cnt); > + lv = xlog_write_partial(log, lv_chain, lv, ticket, &iclog, > + &log_offset, &len, &record_cnt, > + &data_cnt); > if (IS_ERR_OR_NULL(lv)) { > error = PTR_ERR_OR_ZERO(lv); > break; > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h > index af54ea3f8c90..0445dd6acbce 100644 > --- a/fs/xfs/xfs_log.h > +++ b/fs/xfs/xfs_log.h > @@ -9,7 +9,8 @@ > struct xfs_cil_ctx; > > struct xfs_log_vec { > - struct xfs_log_vec *lv_next; /* next lv in build list */ > + struct list_head lv_chain; /* lv chain ptrs */ > + int lv_order_id; /* chain ordering info */ uint32_t to match li_order_id? > int lv_niovecs; /* number of iovecs in lv */ > struct xfs_log_iovec *lv_iovecp; /* iovec array */ > struct xfs_log_item *lv_item; /* owner */ > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > index 3d43a5088154..6dcc23829bef 100644 > --- a/fs/xfs/xfs_log_cil.c > +++ b/fs/xfs/xfs_log_cil.c > @@ -72,6 +72,7 @@ xlog_cil_ctx_alloc(void) > ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); > INIT_LIST_HEAD(&ctx->committing); > INIT_LIST_HEAD(&ctx->busy_extents); > + INIT_LIST_HEAD(&ctx->lv_chain); > INIT_WORK(&ctx->push_work, xlog_cil_push_work); > return ctx; > } > @@ -237,6 +238,7 @@ xlog_cil_alloc_shadow_bufs( > lv = kmem_alloc_large(buf_size, KM_NOFS); > memset(lv, 0, xlog_cil_iovec_space(niovecs)); > > + INIT_LIST_HEAD(&lv->lv_chain); > lv->lv_item = lip; > lv->lv_size = buf_size; > if (ordered) > @@ -252,7 +254,6 @@ xlog_cil_alloc_shadow_bufs( > else > lv->lv_buf_len = 0; > lv->lv_bytes = 0; > - lv->lv_next = NULL; > } > > /* Ensure the lv is set up according to ->iop_size */ > @@ -379,8 +380,6 @@ xlog_cil_insert_format_items( > if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) { > /* same or smaller, optimise common overwrite case */ > lv = lip->li_lv; > - lv->lv_next = NULL; What /did/ these null assignments do? > - > if (ordered) > goto insert; > > @@ -547,14 +546,14 @@ xlog_cil_insert_items( > > static void > xlog_cil_free_logvec( > - struct xfs_log_vec *log_vector) > + struct list_head *lv_chain) > { > struct xfs_log_vec *lv; > > - for (lv = log_vector; lv; ) { > - struct xfs_log_vec *next = lv->lv_next; > + while(!list_empty(lv_chain)) { Nit: space after "while". > + lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_chain); > + list_del_init(&lv->lv_chain); > kmem_free(lv); > - lv = next; > } > } > > @@ -653,7 +652,7 @@ xlog_cil_committed( > spin_unlock(&ctx->cil->xc_push_lock); > } > > - xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, > + xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain, > ctx->start_lsn, abort); > > xfs_extent_busy_sort(&ctx->busy_extents); > @@ -664,7 +663,7 @@ xlog_cil_committed( > list_del(&ctx->committing); > spin_unlock(&ctx->cil->xc_push_lock); > > - xlog_cil_free_logvec(ctx->lv_chain); > + xlog_cil_free_logvec(&ctx->lv_chain); > > if (!list_empty(&ctx->busy_extents)) > xlog_discard_busy_extents(mp, ctx); > @@ -744,7 +743,7 @@ xlog_cil_build_trans_hdr( > lvhdr->lv_niovecs = 2; > lvhdr->lv_iovecp = &hdr->lhdr[0]; > lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; > - lvhdr->lv_next = ctx->lv_chain; > + list_add(&lvhdr->lv_chain, &ctx->lv_chain); > > tic->t_curr_res -= lvhdr->lv_bytes; > } > @@ -755,12 +754,14 @@ xlog_cil_order_cmp( > struct list_head *a, > struct list_head *b) > { > - struct xfs_log_item *l1 = container_of(a, struct xfs_log_item, li_cil); > - struct xfs_log_item *l2 = container_of(b, struct xfs_log_item, li_cil); > + struct xfs_log_vec *l1 = container_of(a, struct xfs_log_vec, > + lv_chain); > + struct xfs_log_vec *l2 = container_of(b, struct xfs_log_vec, > + lv_chain); > > - if (l1->li_order_id > l2->li_order_id) > + if (l1->lv_order_id > l2->lv_order_id) > return 1; > - if (l1->li_order_id < l2->li_order_id) > + if (l1->lv_order_id < l2->lv_order_id) > return -1; > return 0; > } > @@ -907,26 +908,25 @@ xlog_cil_push_work( > * needed on the transaction commit side which is currently locked out > * by the flush lock. > */ > - list_sort(NULL, &log_items, xlog_cil_order_cmp); > lv = NULL; > while (!list_empty(&log_items)) { > struct xfs_log_item *item; > > item = list_first_entry(&log_items, > struct xfs_log_item, li_cil); > - list_del_init(&item->li_cil); > - item->li_order_id = 0; > - if (!ctx->lv_chain) > - ctx->lv_chain = item->li_lv; > - else > - lv->lv_next = item->li_lv; > + > lv = item->li_lv; > - item->li_lv = NULL; > + lv->lv_order_id = item->li_order_id; > num_iovecs += lv->lv_niovecs; > - > /* we don't write ordered log vectors */ > if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) > num_bytes += lv->lv_bytes; > + list_add_tail(&lv->lv_chain, &ctx->lv_chain); > + > + list_del_init(&item->li_cil); Do the list manipulations need moving, or could they have stayed further up in the loop body for a cleaner patch? > + item->li_order_id = 0; > + item->li_lv = NULL; > + > } > > /* > @@ -959,6 +959,13 @@ xlog_cil_push_work( > spin_unlock(&cil->xc_push_lock); > up_write(&cil->xc_ctx_lock); > > + /* > + * Sort the log vector chain before we add the transaction headers. > + * This ensures we always have the transaction headers at the start > + * of the chain. > + */ > + list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp); > + > /* > * Build a checkpoint transaction header and write it to the log to > * begin the transaction. We need to account for the space used by the > @@ -981,8 +988,14 @@ xlog_cil_push_work( > * use the commit record lsn then we can move the tail beyond the grant > * write head. > */ > - error = xlog_write(log, &lvhdr, ctx->ticket, &ctx->start_lsn, NULL, > - XLOG_START_TRANS, num_bytes); > + error = xlog_write(log, &ctx->lv_chain, ctx->ticket, &ctx->start_lsn, > + NULL, XLOG_START_TRANS, num_bytes); > + > + /* > + * Take the lvhdr back off the lv_chain as it should not be passed > + * to log IO completion. > + */ > + list_del(&lvhdr.lv_chain); > if (error) > goto out_abort_free_ticket; > > diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h > index 12a1a36eef7e..6a4160200417 100644 > --- a/fs/xfs/xfs_log_priv.h > +++ b/fs/xfs/xfs_log_priv.h > @@ -224,7 +224,7 @@ struct xfs_cil_ctx { > int nvecs; /* number of regions */ > atomic_t space_used; /* aggregate size of regions */ > struct list_head busy_extents; /* busy extents in chkpt */ > - struct xfs_log_vec *lv_chain; /* logvecs being pushed */ > + struct list_head lv_chain; /* logvecs being pushed */ > struct list_head iclog_entry; > struct list_head committing; /* ctx committing list */ > struct work_struct discard_endio_work; > @@ -480,7 +480,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) > > void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); > void xlog_print_trans(struct xfs_trans *); > -int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, > +int xlog_write(struct xlog *log, struct list_head *lv_chain, > struct xlog_ticket *tic, xfs_lsn_t *start_lsn, > struct xlog_in_core **commit_iclog, uint optype, uint32_t len); > int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > index 83c2b7f22eb7..b20e68279808 100644 > --- a/fs/xfs/xfs_trans.c > +++ b/fs/xfs/xfs_trans.c > @@ -747,7 +747,7 @@ xfs_log_item_batch_insert( > void > xfs_trans_committed_bulk( > struct xfs_ail *ailp, > - struct xfs_log_vec *log_vector, > + struct list_head *lv_chain, > xfs_lsn_t commit_lsn, > bool aborted) > { > @@ -762,7 +762,7 @@ xfs_trans_committed_bulk( > spin_unlock(&ailp->ail_lock); > > /* unpin all the log items */ > - for (lv = log_vector; lv; lv = lv->lv_next ) { > + list_for_each_entry(lv, lv_chain, lv_chain) { > struct xfs_log_item *lip = lv->lv_item; > xfs_lsn_t item_lsn; > > diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h > index 3004aeac9110..b0bf78e6ff76 100644 > --- a/fs/xfs/xfs_trans_priv.h > +++ b/fs/xfs/xfs_trans_priv.h > @@ -18,8 +18,8 @@ void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); > void xfs_trans_del_item(struct xfs_log_item *); > void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); > > -void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, > - xfs_lsn_t commit_lsn, bool aborted); > +void xfs_trans_committed_bulk(struct xfs_ail *ailp, > + struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted); > /* > * AIL traversal cursor. > * > -- > 2.28.0 >
On Wed, Mar 10, 2021 at 05:34:52PM -0800, Darrick J. Wong wrote: > On Fri, Mar 05, 2021 at 04:11:39PM +1100, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > > > Adding a list_sort() call to the CIL push work while the xc_ctx_lock > > is held exclusively has resulted in fairly long lock hold times and > > that stops all front end transaction commits from making progress. > > Heh, nice solution. :) > > > We can move the sorting out of the xc_ctx_lock if we can transfer > > the ordering information to the log vectors as they are detached > > from the log items and then we can sort the log vectors. This > > requires log vectors to use a list_head rather than a single linked > > list > > Ergh, could pull out the list conversion into a separate piece? > Some of the lv_chain usage is ... not entirely textbook. Yes, I can probably do that. > > diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h > > index af54ea3f8c90..0445dd6acbce 100644 > > --- a/fs/xfs/xfs_log.h > > +++ b/fs/xfs/xfs_log.h > > @@ -9,7 +9,8 @@ > > struct xfs_cil_ctx; > > > > struct xfs_log_vec { > > - struct xfs_log_vec *lv_next; /* next lv in build list */ > > + struct list_head lv_chain; /* lv chain ptrs */ > > + int lv_order_id; /* chain ordering info */ > > uint32_t to match li_order_id? *nod* > > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > > index 3d43a5088154..6dcc23829bef 100644 > > --- a/fs/xfs/xfs_log_cil.c > > +++ b/fs/xfs/xfs_log_cil.c > > @@ -72,6 +72,7 @@ xlog_cil_ctx_alloc(void) > > ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); > > INIT_LIST_HEAD(&ctx->committing); > > INIT_LIST_HEAD(&ctx->busy_extents); > > + INIT_LIST_HEAD(&ctx->lv_chain); > > INIT_WORK(&ctx->push_work, xlog_cil_push_work); > > return ctx; > > } > > @@ -237,6 +238,7 @@ xlog_cil_alloc_shadow_bufs( > > lv = kmem_alloc_large(buf_size, KM_NOFS); > > memset(lv, 0, xlog_cil_iovec_space(niovecs)); > > > > + INIT_LIST_HEAD(&lv->lv_chain); > > lv->lv_item = lip; > > lv->lv_size = buf_size; > > if (ordered) > > @@ -252,7 +254,6 @@ xlog_cil_alloc_shadow_bufs( > > else > > lv->lv_buf_len = 0; > > lv->lv_bytes = 0; > > - lv->lv_next = NULL; > > } > > > > /* Ensure the lv is set up according to ->iop_size */ > > @@ -379,8 +380,6 @@ xlog_cil_insert_format_items( > > if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) { > > /* same or smaller, optimise common overwrite case */ > > lv = lip->li_lv; > > - lv->lv_next = NULL; > > What /did/ these null assignments do? IIRC, at one point they ensured that the lv chain was correctly terminated when a lv was reused and added to the tail of an existing chain. I think that became redundant when we added the shadow buffers to allow allocation outside the CIL lock contexts... > > - list_del_init(&item->li_cil); > > - item->li_order_id = 0; > > - if (!ctx->lv_chain) > > - ctx->lv_chain = item->li_lv; > > - else > > - lv->lv_next = item->li_lv; > > + > > lv = item->li_lv; > > - item->li_lv = NULL; > > + lv->lv_order_id = item->li_order_id; > > num_iovecs += lv->lv_niovecs; > > - > > /* we don't write ordered log vectors */ > > if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) > > num_bytes += lv->lv_bytes; > > + list_add_tail(&lv->lv_chain, &ctx->lv_chain); > > + > > + list_del_init(&item->li_cil); > > Do the list manipulations need moving, or could they have stayed further > up in the loop body for a cleaner patch? I moved them so the code was structured as: <transfer item state to log vec> <manipulate lists> <clear item state> Because there was no clear separation between state and list manipulations. This will clean up if I separate the list manipulations into their own patch... Cheers, Dave.
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 46a006d41184..fd58c3213ebf 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -846,6 +846,9 @@ xlog_write_unmount_record( .lv_niovecs = 1, .lv_iovecp = ®, }; + LIST_HEAD(lv_chain); + INIT_LIST_HEAD(&vec.lv_chain); + list_add(&vec.lv_chain, &lv_chain); /* account for space used by record data */ ticket->t_curr_res -= sizeof(unmount_rec); @@ -857,8 +860,8 @@ xlog_write_unmount_record( */ if (log->l_targ != log->l_mp->m_ddev_targp) blkdev_issue_flush(log->l_targ->bt_bdev); - return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS, - reg.i_len); + return xlog_write(log, &lv_chain, ticket, NULL, NULL, + XLOG_UNMOUNT_TRANS, reg.i_len); } /* @@ -1571,14 +1574,17 @@ xlog_commit_record( .lv_iovecp = ®, }; int error; + LIST_HEAD(lv_chain); + INIT_LIST_HEAD(&vec.lv_chain); + list_add(&vec.lv_chain, &lv_chain); if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; /* account for space used by record data */ ticket->t_curr_res -= reg.i_len; - error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS, - reg.i_len); + error = xlog_write(log, &lv_chain, ticket, lsn, iclog, + XLOG_COMMIT_TRANS, reg.i_len); if (error) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); return error; @@ -2109,6 +2115,7 @@ xlog_print_trans( */ static struct xfs_log_vec * xlog_write_single( + struct list_head *lv_chain, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, struct xlog_in_core *iclog, @@ -2117,7 +2124,7 @@ xlog_write_single( uint32_t *record_cnt, uint32_t *data_cnt) { - struct xfs_log_vec *lv = log_vector; + struct xfs_log_vec *lv; void *ptr; int index; @@ -2125,10 +2132,13 @@ xlog_write_single( iclog->ic_state == XLOG_STATE_WANT_SYNC); ptr = iclog->ic_datap + *log_offset; - for (lv = log_vector; lv; lv = lv->lv_next) { + for (lv = log_vector; + !list_entry_is_head(lv, lv_chain, lv_chain); + lv = list_next_entry(lv, lv_chain)) { /* - * If the entire log vec does not fit in the iclog, punt it to - * the partial copy loop which can handle this case. + * If the log vec contains data that needs to be copied and does + * not entirely fit in the iclog, punt it to the partial copy + * loop which can handle this case. */ if (lv->lv_niovecs && lv->lv_bytes > iclog->ic_size - *log_offset) @@ -2154,6 +2164,8 @@ xlog_write_single( *data_cnt += reg->i_len; } } + if (list_entry_is_head(lv, lv_chain, lv_chain)) + lv = NULL; ASSERT(*len == 0 || lv); return lv; } @@ -2199,6 +2211,7 @@ xlog_write_get_more_iclog_space( static struct xfs_log_vec * xlog_write_partial( struct xlog *log, + struct list_head *lv_chain, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, struct xlog_in_core **iclogp, @@ -2338,7 +2351,10 @@ xlog_write_partial( * the caller so it can go back to fast path copying. */ *iclogp = iclog; - return lv->lv_next; + lv = list_next_entry(lv, lv_chain); + if (list_entry_is_head(lv, lv_chain, lv_chain)) + return NULL; + return lv; } /* @@ -2384,7 +2400,7 @@ xlog_write_partial( int xlog_write( struct xlog *log, - struct xfs_log_vec *log_vector, + struct list_head *lv_chain, struct xlog_ticket *ticket, xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, @@ -2392,7 +2408,7 @@ xlog_write( uint32_t len) { struct xlog_in_core *iclog = NULL; - struct xfs_log_vec *lv = log_vector; + struct xfs_log_vec *lv; int record_cnt = 0; int data_cnt = 0; int error = 0; @@ -2424,15 +2440,17 @@ xlog_write( if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); + lv = list_first_entry_or_null(lv_chain, struct xfs_log_vec, lv_chain); while (lv) { - lv = xlog_write_single(lv, ticket, iclog, &log_offset, + lv = xlog_write_single(lv_chain, lv, ticket, iclog, &log_offset, &len, &record_cnt, &data_cnt); if (!lv) break; ASSERT(!(optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))); - lv = xlog_write_partial(log, lv, ticket, &iclog, &log_offset, - &len, &record_cnt, &data_cnt); + lv = xlog_write_partial(log, lv_chain, lv, ticket, &iclog, + &log_offset, &len, &record_cnt, + &data_cnt); if (IS_ERR_OR_NULL(lv)) { error = PTR_ERR_OR_ZERO(lv); break; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index af54ea3f8c90..0445dd6acbce 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -9,7 +9,8 @@ struct xfs_cil_ctx; struct xfs_log_vec { - struct xfs_log_vec *lv_next; /* next lv in build list */ + struct list_head lv_chain; /* lv chain ptrs */ + int lv_order_id; /* chain ordering info */ int lv_niovecs; /* number of iovecs in lv */ struct xfs_log_iovec *lv_iovecp; /* iovec array */ struct xfs_log_item *lv_item; /* owner */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 3d43a5088154..6dcc23829bef 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -72,6 +72,7 @@ xlog_cil_ctx_alloc(void) ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents); + INIT_LIST_HEAD(&ctx->lv_chain); INIT_WORK(&ctx->push_work, xlog_cil_push_work); return ctx; } @@ -237,6 +238,7 @@ xlog_cil_alloc_shadow_bufs( lv = kmem_alloc_large(buf_size, KM_NOFS); memset(lv, 0, xlog_cil_iovec_space(niovecs)); + INIT_LIST_HEAD(&lv->lv_chain); lv->lv_item = lip; lv->lv_size = buf_size; if (ordered) @@ -252,7 +254,6 @@ xlog_cil_alloc_shadow_bufs( else lv->lv_buf_len = 0; lv->lv_bytes = 0; - lv->lv_next = NULL; } /* Ensure the lv is set up according to ->iop_size */ @@ -379,8 +380,6 @@ xlog_cil_insert_format_items( if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) { /* same or smaller, optimise common overwrite case */ lv = lip->li_lv; - lv->lv_next = NULL; - if (ordered) goto insert; @@ -547,14 +546,14 @@ xlog_cil_insert_items( static void xlog_cil_free_logvec( - struct xfs_log_vec *log_vector) + struct list_head *lv_chain) { struct xfs_log_vec *lv; - for (lv = log_vector; lv; ) { - struct xfs_log_vec *next = lv->lv_next; + while(!list_empty(lv_chain)) { + lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_chain); + list_del_init(&lv->lv_chain); kmem_free(lv); - lv = next; } } @@ -653,7 +652,7 @@ xlog_cil_committed( spin_unlock(&ctx->cil->xc_push_lock); } - xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, + xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain, ctx->start_lsn, abort); xfs_extent_busy_sort(&ctx->busy_extents); @@ -664,7 +663,7 @@ xlog_cil_committed( list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_push_lock); - xlog_cil_free_logvec(ctx->lv_chain); + xlog_cil_free_logvec(&ctx->lv_chain); if (!list_empty(&ctx->busy_extents)) xlog_discard_busy_extents(mp, ctx); @@ -744,7 +743,7 @@ xlog_cil_build_trans_hdr( lvhdr->lv_niovecs = 2; lvhdr->lv_iovecp = &hdr->lhdr[0]; lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; - lvhdr->lv_next = ctx->lv_chain; + list_add(&lvhdr->lv_chain, &ctx->lv_chain); tic->t_curr_res -= lvhdr->lv_bytes; } @@ -755,12 +754,14 @@ xlog_cil_order_cmp( struct list_head *a, struct list_head *b) { - struct xfs_log_item *l1 = container_of(a, struct xfs_log_item, li_cil); - struct xfs_log_item *l2 = container_of(b, struct xfs_log_item, li_cil); + struct xfs_log_vec *l1 = container_of(a, struct xfs_log_vec, + lv_chain); + struct xfs_log_vec *l2 = container_of(b, struct xfs_log_vec, + lv_chain); - if (l1->li_order_id > l2->li_order_id) + if (l1->lv_order_id > l2->lv_order_id) return 1; - if (l1->li_order_id < l2->li_order_id) + if (l1->lv_order_id < l2->lv_order_id) return -1; return 0; } @@ -907,26 +908,25 @@ xlog_cil_push_work( * needed on the transaction commit side which is currently locked out * by the flush lock. */ - list_sort(NULL, &log_items, xlog_cil_order_cmp); lv = NULL; while (!list_empty(&log_items)) { struct xfs_log_item *item; item = list_first_entry(&log_items, struct xfs_log_item, li_cil); - list_del_init(&item->li_cil); - item->li_order_id = 0; - if (!ctx->lv_chain) - ctx->lv_chain = item->li_lv; - else - lv->lv_next = item->li_lv; + lv = item->li_lv; - item->li_lv = NULL; + lv->lv_order_id = item->li_order_id; num_iovecs += lv->lv_niovecs; - /* we don't write ordered log vectors */ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) num_bytes += lv->lv_bytes; + list_add_tail(&lv->lv_chain, &ctx->lv_chain); + + list_del_init(&item->li_cil); + item->li_order_id = 0; + item->li_lv = NULL; + } /* @@ -959,6 +959,13 @@ xlog_cil_push_work( spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); + /* + * Sort the log vector chain before we add the transaction headers. + * This ensures we always have the transaction headers at the start + * of the chain. + */ + list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp); + /* * Build a checkpoint transaction header and write it to the log to * begin the transaction. We need to account for the space used by the @@ -981,8 +988,14 @@ xlog_cil_push_work( * use the commit record lsn then we can move the tail beyond the grant * write head. */ - error = xlog_write(log, &lvhdr, ctx->ticket, &ctx->start_lsn, NULL, - XLOG_START_TRANS, num_bytes); + error = xlog_write(log, &ctx->lv_chain, ctx->ticket, &ctx->start_lsn, + NULL, XLOG_START_TRANS, num_bytes); + + /* + * Take the lvhdr back off the lv_chain as it should not be passed + * to log IO completion. + */ + list_del(&lvhdr.lv_chain); if (error) goto out_abort_free_ticket; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 12a1a36eef7e..6a4160200417 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -224,7 +224,7 @@ struct xfs_cil_ctx { int nvecs; /* number of regions */ atomic_t space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ - struct xfs_log_vec *lv_chain; /* logvecs being pushed */ + struct list_head lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ struct work_struct discard_endio_work; @@ -480,7 +480,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); -int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, +int xlog_write(struct xlog *log, struct list_head *lv_chain, struct xlog_ticket *tic, xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, uint optype, uint32_t len); int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 83c2b7f22eb7..b20e68279808 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -747,7 +747,7 @@ xfs_log_item_batch_insert( void xfs_trans_committed_bulk( struct xfs_ail *ailp, - struct xfs_log_vec *log_vector, + struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted) { @@ -762,7 +762,7 @@ xfs_trans_committed_bulk( spin_unlock(&ailp->ail_lock); /* unpin all the log items */ - for (lv = log_vector; lv; lv = lv->lv_next ) { + list_for_each_entry(lv, lv_chain, lv_chain) { struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 3004aeac9110..b0bf78e6ff76 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -18,8 +18,8 @@ void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); -void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, - xfs_lsn_t commit_lsn, bool aborted); +void xfs_trans_committed_bulk(struct xfs_ail *ailp, + struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted); /* * AIL traversal cursor. *