@@ -1264,47 +1264,6 @@ xfs_log_cover(
return error;
}
-/*
- * We may be holding the log iclog lock upon entering this routine.
- */
-xfs_lsn_t
-xlog_assign_tail_lsn_locked(
- struct xfs_mount *mp)
-{
- struct xlog *log = mp->m_log;
- struct xfs_log_item *lip;
- xfs_lsn_t tail_lsn;
-
- assert_spin_locked(&mp->m_ail->ail_lock);
-
- /*
- * To make sure we always have a valid LSN for the log tail we keep
- * track of the last LSN which was committed in log->l_last_sync_lsn,
- * and use that when the AIL was empty.
- */
- lip = xfs_ail_min(mp->m_ail);
- if (lip)
- tail_lsn = lip->li_lsn;
- else
- tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- trace_xfs_log_assign_tail_lsn(log, tail_lsn);
- atomic64_set(&log->l_tail_lsn, tail_lsn);
- return tail_lsn;
-}
-
-xfs_lsn_t
-xlog_assign_tail_lsn(
- struct xfs_mount *mp)
-{
- xfs_lsn_t tail_lsn;
-
- spin_lock(&mp->m_ail->ail_lock);
- tail_lsn = xlog_assign_tail_lsn_locked(mp);
- spin_unlock(&mp->m_ail->ail_lock);
-
- return tail_lsn;
-}
-
/*
* Return the space in the log between the tail and the head. The head
* is passed in the cycle/bytes formal parms. In the special case where
@@ -1538,7 +1497,6 @@ xlog_alloc_log(
log->l_prev_block = -1;
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
- xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1)
@@ -2586,44 +2544,23 @@ xlog_get_lowest_lsn(
return lowest_lsn;
}
-/*
- * Completion of a iclog IO does not imply that a transaction has completed, as
- * transactions can be large enough to span many iclogs. We cannot change the
- * tail of the log half way through a transaction as this may be the only
- * transaction in the log and moving the tail to point to the middle of it
- * will prevent recovery from finding the start of the transaction. Hence we
- * should only update the last_sync_lsn if this iclog contains transaction
- * completion callbacks on it.
- *
- * We have to do this before we drop the icloglock to ensure we are the only one
- * that can update it.
- *
- * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
- * the reservation grant head pushing. This is due to the fact that the push
- * target is bound by the current last_sync_lsn value. Hence if we have a large
- * amount of log space bound up in this committing transaction then the
- * last_sync_lsn value may be the limiting factor preventing tail pushing from
- * freeing space in the log. Hence once we've updated the last_sync_lsn we
- * should push the AIL to ensure the push target (and hence the grant head) is
- * no longer bound by the old log head location and can move forwards and make
- * progress again.
- */
static void
xlog_state_set_callback(
struct xlog *log,
struct xlog_in_core *iclog,
xfs_lsn_t header_lsn)
{
+ /*
+ * If there are no callbacks on this iclog, we can mark it clean
+ * immediately and return. Otherwise we need to run the
+ * callbacks.
+ */
+ if (list_empty(&iclog->ic_callbacks)) {
+ xlog_state_clean_iclog(log, iclog);
+ return;
+ }
trace_xlog_iclog_callback(iclog, _RET_IP_);
iclog->ic_state = XLOG_STATE_CALLBACK;
-
- ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
- header_lsn) <= 0);
-
- if (list_empty_careful(&iclog->ic_callbacks))
- return;
-
- atomic64_set(&log->l_last_sync_lsn, header_lsn);
}
/*
@@ -710,6 +710,24 @@ xlog_cil_ail_insert_batch(
* items into the the AIL. This uses bulk insertion techniques to minimise AIL
* lock traffic.
*
+ * The AIL tracks log items via the start record LSN of the checkpoint,
+ * not the commit record LSN. This is because we can pipeline multiple
+ * checkpoints, and so the start record of checkpoint N+1 can be
+ * written before the commit record of checkpoint N. i.e:
+ *
+ * start N commit N
+ * +-------------+------------+----------------+
+ * start N+1 commit N+1
+ *
+ * The tail of the log cannot be moved to the LSN of commit N when all
+ * the items of that checkpoint are written back, because then the
+ * start record for N+1 is no longer in the active portion of the log
+ * and recovery will fail/corrupt the filesystem.
+ *
+ * Hence when all the log items in checkpoint N are written back, the
+ * tail of the log most now only move as far forwards as the start LSN
+ * of checkpoint N+1.
+ *
* If we are called with the aborted flag set, it is because a log write during
* a CIL checkpoint commit has failed. In this case, all the items in the
* checkpoint have already gone through iop_committed and iop_committing, which
@@ -727,24 +745,33 @@ xlog_cil_ail_insert_batch(
*/
static void
xlog_cil_ail_insert(
- struct xlog *log,
- struct list_head *lv_chain,
- xfs_lsn_t commit_lsn,
+ struct xfs_cil_ctx *ctx,
bool aborted)
{
#define LOG_ITEM_BATCH_SIZE 32
- struct xfs_ail *ailp = log->l_ailp;
+ struct xfs_ail *ailp = ctx->cil->xc_log->l_ailp;
struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
struct xfs_log_vec *lv;
struct xfs_ail_cursor cur;
int i = 0;
+ /*
+ * Update the AIL head LSN with the commit record LSN of this
+ * checkpoint. As iclogs are always completed in order, this should
+ * always be the same (as iclogs can contain multiple commit records) or
+ * higher LSN than the current head. We do this before insertion of the
+ * items so that log space checks during insertion will reflect the
+ * space that this checkpoint has already consumed.
+ */
+ ASSERT(XFS_LSN_CMP(ctx->commit_lsn, ailp->ail_head_lsn) >= 0 ||
+ aborted);
spin_lock(&ailp->ail_lock);
- xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
+ ailp->ail_head_lsn = ctx->commit_lsn;
+ xfs_trans_ail_cursor_last(ailp, &cur, ctx->start_lsn);
spin_unlock(&ailp->ail_lock);
/* unpin all the log items */
- list_for_each_entry(lv, lv_chain, lv_list) {
+ list_for_each_entry(lv, &ctx->lv_chain, lv_list) {
struct xfs_log_item *lip = lv->lv_item;
xfs_lsn_t item_lsn;
@@ -757,9 +784,10 @@ xlog_cil_ail_insert(
}
if (lip->li_ops->iop_committed)
- item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+ item_lsn = lip->li_ops->iop_committed(lip,
+ ctx->start_lsn);
else
- item_lsn = commit_lsn;
+ item_lsn = ctx->start_lsn;
/* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -776,7 +804,7 @@ xlog_cil_ail_insert(
continue;
}
- if (item_lsn != commit_lsn) {
+ if (item_lsn != ctx->start_lsn) {
/*
* Not a bulk update option due to unusual item_lsn.
@@ -799,14 +827,15 @@ xlog_cil_ail_insert(
log_items[i++] = lv->lv_item;
if (i >= LOG_ITEM_BATCH_SIZE) {
xlog_cil_ail_insert_batch(ailp, &cur, log_items,
- LOG_ITEM_BATCH_SIZE, commit_lsn);
+ LOG_ITEM_BATCH_SIZE, ctx->start_lsn);
i = 0;
}
}
/* make sure we insert the remainder! */
if (i)
- xlog_cil_ail_insert_batch(ailp, &cur, log_items, i, commit_lsn);
+ xlog_cil_ail_insert_batch(ailp, &cur, log_items, i,
+ ctx->start_lsn);
spin_lock(&ailp->ail_lock);
xfs_trans_ail_cursor_done(&cur);
@@ -922,8 +951,7 @@ xlog_cil_committed(
spin_unlock(&ctx->cil->xc_push_lock);
}
- xlog_cil_ail_insert(ctx->cil->xc_log, &ctx->lv_chain,
- ctx->start_lsn, abort);
+ xlog_cil_ail_insert(ctx, abort);
xfs_extent_busy_sort(&ctx->busy_extents);
xfs_extent_busy_clear(mp, &ctx->busy_extents,
@@ -426,13 +426,10 @@ struct xlog {
int l_prev_block; /* previous logical log block */
/*
- * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
- * read without needing to hold specific locks. To avoid operations
- * contending with other hot objects, place each of them on a separate
- * cacheline.
+ * l_tail_lsn is atomic so it can be set and read without needing to
+ * hold specific locks. To avoid operations contending with other hot
+ * objects, it on a separate cacheline.
*/
- /* lsn of last LR on disk */
- atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
/* lsn of 1st LR with unflushed * buffers */
atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
@@ -1177,8 +1177,8 @@ xlog_check_unmount_rec(
*/
xlog_assign_atomic_lsn(&log->l_tail_lsn,
log->l_curr_cycle, after_umount_blk);
- xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
- log->l_curr_cycle, after_umount_blk);
+ log->l_ailp->ail_head_lsn =
+ atomic64_read(&log->l_tail_lsn);
*tail_blk = after_umount_blk;
*clean = true;
@@ -1212,7 +1212,7 @@ xlog_set_state(
if (bump_cycle)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
- atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+ log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn);
xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
BBTOB(log->l_curr_block));
xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
@@ -3300,14 +3300,13 @@ xlog_do_recover(
/*
* We now update the tail_lsn since much of the recovery has completed
- * and there may be space available to use. If there were no extent
- * or iunlinks, we can free up the entire log and set the tail_lsn to
- * be the last_sync_lsn. This was set in xlog_find_tail to be the
- * lsn of the last known good LR on disk. If there are extent frees
- * or iunlinks they will have some entries in the AIL; so we look at
- * the AIL to determine how to set the tail_lsn.
+ * and there may be space available to use. If there were no extent or
+ * iunlinks, we can free up the entire log. This was set in
+ * xlog_find_tail to be the lsn of the last known good LR on disk. If
+ * there are extent frees or iunlinks they will have some entries in the
+ * AIL; so we look at the AIL to determine how to set the tail_lsn.
*/
- xlog_assign_tail_lsn(mp);
+ xfs_ail_assign_tail_lsn(log->l_ailp);
/*
* Now that we've finished replaying all buffer and inode updates,
@@ -22,6 +22,7 @@
#include "xfs_trans.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_quota.h"
#include "xfs_dquot_item.h"
@@ -1386,19 +1386,19 @@ TRACE_EVENT(xfs_log_assign_tail_lsn,
__field(dev_t, dev)
__field(xfs_lsn_t, new_lsn)
__field(xfs_lsn_t, old_lsn)
- __field(xfs_lsn_t, last_sync_lsn)
+ __field(xfs_lsn_t, head_lsn)
),
TP_fast_assign(
__entry->dev = log->l_mp->m_super->s_dev;
__entry->new_lsn = new_lsn;
__entry->old_lsn = atomic64_read(&log->l_tail_lsn);
- __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn);
+ __entry->head_lsn = log->l_ailp->ail_head_lsn;
),
- TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d",
+ TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, head lsn %d/%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn),
CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
- CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn))
+ CYCLE_LSN(__entry->head_lsn), BLOCK_LSN(__entry->head_lsn))
)
DECLARE_EVENT_CLASS(xfs_file_class,
@@ -715,6 +715,26 @@ xfs_ail_push_all_sync(
finish_wait(&ailp->ail_empty, &wait);
}
+void
+__xfs_ail_assign_tail_lsn(
+ struct xfs_ail *ailp)
+{
+ struct xlog *log = ailp->ail_log;
+ xfs_lsn_t tail_lsn;
+
+ assert_spin_locked(&ailp->ail_lock);
+
+ if (xlog_is_shutdown(log))
+ return;
+
+ tail_lsn = __xfs_ail_min_lsn(ailp);
+ if (!tail_lsn)
+ tail_lsn = ailp->ail_head_lsn;
+
+ trace_xfs_log_assign_tail_lsn(log, tail_lsn);
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
+}
+
/*
* Callers should pass the the original tail lsn so that we can detect if the
* tail has moved as a result of the operation that was performed. If the caller
@@ -729,15 +749,13 @@ xfs_ail_update_finish(
{
struct xlog *log = ailp->ail_log;
- /* if the tail lsn hasn't changed, don't do updates or wakeups. */
+ /* If the tail lsn hasn't changed, don't do updates or wakeups. */
if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
spin_unlock(&ailp->ail_lock);
return;
}
- if (!xlog_is_shutdown(log))
- xlog_assign_tail_lsn_locked(log->l_mp);
-
+ __xfs_ail_assign_tail_lsn(ailp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
spin_unlock(&ailp->ail_lock);
@@ -55,6 +55,7 @@ struct xfs_ail {
struct list_head ail_cursors;
spinlock_t ail_lock;
xfs_lsn_t ail_last_pushed_lsn;
+ xfs_lsn_t ail_head_lsn;
int ail_log_flush;
unsigned long ail_opstate;
struct list_head ail_buf_list;
@@ -135,6 +136,18 @@ struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur);
void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);
+void __xfs_ail_assign_tail_lsn(struct xfs_ail *ailp);
+
+static inline void
+xfs_ail_assign_tail_lsn(
+ struct xfs_ail *ailp)
+{
+
+ spin_lock(&ailp->ail_lock);
+ __xfs_ail_assign_tail_lsn(ailp);
+ spin_unlock(&ailp->ail_lock);
+}
+
#if BITS_PER_LONG != 64
static inline void
xfs_trans_ail_copy_lsn(