From patchwork Tue Aug 10 05:21:16 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12427945 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7E396C4338F for ; Tue, 10 Aug 2021 05:21:44 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 66B1A6101E for ; Tue, 10 Aug 2021 05:21:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237625AbhHJFWD (ORCPT ); Tue, 10 Aug 2021 01:22:03 -0400 Received: from mail108.syd.optusnet.com.au ([211.29.132.59]:53353 "EHLO mail108.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237633AbhHJFVy (ORCPT ); Tue, 10 Aug 2021 01:21:54 -0400 Received: from dread.disaster.area (pa49-195-182-146.pa.nsw.optusnet.com.au [49.195.182.146]) by mail108.syd.optusnet.com.au (Postfix) with ESMTPS id CA6361B3DFA for ; Tue, 10 Aug 2021 15:21:25 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1mDKCY-00GZcE-Or for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1mDKCY-000Ags-H7 for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 1/5] xfs: move xlog_commit_record to xfs_log_cil.c Date: Tue, 10 Aug 2021 15:21:16 +1000 Message-Id: <20210810052120.41019-2-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210810052120.41019-1-david@fromorbit.com> References: <20210810052120.41019-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Tu+Yewfh c=1 sm=1 tr=0 a=QpfB3wCSrn/dqEBSktpwZQ==:117 a=QpfB3wCSrn/dqEBSktpwZQ==:17 a=MhDmnRu9jo8A:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=HOSJ-cswk-H18GUH334A:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner It is only used by the CIL checkpoints, and is the counterpart to start record formatting and writing that is already local to xfs_log_cil.c. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 31 ------------------------------- fs/xfs/xfs_log_cil.c | 35 ++++++++++++++++++++++++++++++++++- fs/xfs/xfs_log_priv.h | 2 -- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a26c7909cbe7..fbcf70f7804b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1658,37 +1658,6 @@ xlog_alloc_log( return ERR_PTR(error); } /* xlog_alloc_log */ -/* - * Write out the commit record of a transaction associated with the given - * ticket to close off a running log write. Return the lsn of the commit record. - */ -int -xlog_commit_record( - struct xlog *log, - struct xlog_ticket *ticket, - struct xlog_in_core **iclog, - xfs_lsn_t *lsn) -{ - struct xfs_log_iovec reg = { - .i_addr = NULL, - .i_len = 0, - .i_type = XLOG_REG_TYPE_COMMIT, - }; - struct xfs_log_vec vec = { - .lv_niovecs = 1, - .lv_iovecp = ®, - }; - int error; - - if (xlog_is_shutdown(log)) - return -EIO; - - error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS); - if (error) - xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); - return error; -} - /* * Compute the LSN that we'd need to push the log tail towards in order to have * (a) enough on-disk log space to log the number of bytes specified, (b) at diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index e18b539d26fb..5ebb5737d73f 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -631,6 +631,38 @@ xlog_cil_process_committed( } } +/* + * Write out the commit record of a checkpoint transaction associated with the + * given ticket to close off a running log write. Return the lsn of the commit + * record. + */ +static int +xlog_cil_write_commit_record( + struct xlog *log, + struct xlog_ticket *ticket, + struct xlog_in_core **iclog, + xfs_lsn_t *lsn) +{ + struct xfs_log_iovec reg = { + .i_addr = NULL, + .i_len = 0, + .i_type = XLOG_REG_TYPE_COMMIT, + }; + struct xfs_log_vec vec = { + .lv_niovecs = 1, + .lv_iovecp = ®, + }; + int error; + + if (xlog_is_shutdown(log)) + return -EIO; + + error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS); + if (error) + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + return error; +} + /* * Push the Committed Item List to the log. * @@ -884,7 +916,8 @@ xlog_cil_push_work( } spin_unlock(&cil->xc_push_lock); - error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn); + error = xlog_cil_write_commit_record(log, tic, &commit_iclog, + &commit_lsn); if (error) goto out_abort_free_ticket; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 86ddd0f9cecf..951447fc0414 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -515,8 +515,6 @@ void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, uint optype); -int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, - struct xlog_in_core **iclog, xfs_lsn_t *lsn); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); From patchwork Tue Aug 10 05:21:17 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12427939 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,TVD_PH_BODY_ACCOUNTS_PRE, URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 77D32C432BE for ; Tue, 10 Aug 2021 05:21:36 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 5EA196101D for ; Tue, 10 Aug 2021 05:21:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237642AbhHJFVz (ORCPT ); Tue, 10 Aug 2021 01:21:55 -0400 Received: from mail109.syd.optusnet.com.au ([211.29.132.80]:44258 "EHLO mail109.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237627AbhHJFVs (ORCPT ); Tue, 10 Aug 2021 01:21:48 -0400 Received: from dread.disaster.area (pa49-195-182-146.pa.nsw.optusnet.com.au [49.195.182.146]) by mail109.syd.optusnet.com.au (Postfix) with ESMTPS id 528D484C19 for ; Tue, 10 Aug 2021 15:21:23 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1mDKCY-00GZcF-Q1 for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1mDKCY-000Agv-IG for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 2/5] xfs: pass a CIL context to xlog_write() Date: Tue, 10 Aug 2021 15:21:17 +1000 Message-Id: <20210810052120.41019-3-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210810052120.41019-1-david@fromorbit.com> References: <20210810052120.41019-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 a=QpfB3wCSrn/dqEBSktpwZQ==:117 a=QpfB3wCSrn/dqEBSktpwZQ==:17 a=MhDmnRu9jo8A:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=mA0rlXmpgBq_sLbHoTEA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Pass the CIL context to xlog_write() rather than a pointer to a LSN variable. Only the CIL checkpoint calls to xlog_write() need to know about the start LSN of the writes, so rework xlog_write to directly write the LSNs into the CIL context structure. This removes the commit_lsn variable from xlog_cil_push_work(), so now we only have to issue the commit record ordering wakeup from there. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 18 +++++++++------ fs/xfs/xfs_log_cil.c | 52 ++++++++++++++++++++++++++++++------------- fs/xfs/xfs_log_priv.h | 7 ++++-- 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fbcf70f7804b..6ac5d52f573d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -933,7 +933,7 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); + return xlog_write(log, NULL, &vec, ticket, NULL, XLOG_UNMOUNT_TRANS); } /* @@ -2470,9 +2470,9 @@ xlog_write_copy_finish( int xlog_write( struct xlog *log, + struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn, struct xlog_in_core **commit_iclog, uint optype) { @@ -2503,8 +2503,6 @@ xlog_write( } len = xlog_write_calc_vec_length(ticket, log_vector, optype); - if (start_lsn) - *start_lsn = 0; while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { void *ptr; int log_offset; @@ -2517,9 +2515,15 @@ xlog_write( ASSERT(log_offset <= iclog->ic_size - 1); ptr = iclog->ic_datap + log_offset; - /* Start_lsn is the first lsn written to. */ - if (start_lsn && !*start_lsn) - *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); + /* + * If we have a context pointer, pass it the first iclog we are + * writing to so it can record state needed for iclog write + * ordering. + */ + if (ctx) { + xlog_cil_set_ctx_write_state(ctx, iclog); + ctx = NULL; + } /* * This loop writes out as many regions as can fit in the amount diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5ebb5737d73f..651118fbaa61 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -631,6 +631,30 @@ xlog_cil_process_committed( } } +/* +* Record the LSN of the iclog we were just granted space to start writing into. +* If the context doesn't have a start_lsn recorded, then this iclog will +* contain the start record for the checkpoint. Otherwise this write contains +* the commit record for the checkpoint. +*/ +void +xlog_cil_set_ctx_write_state( + struct xfs_cil_ctx *ctx, + struct xlog_in_core *iclog) +{ + struct xfs_cil *cil = ctx->cil; + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + + ASSERT(!ctx->commit_lsn); + spin_lock(&cil->xc_push_lock); + if (!ctx->start_lsn) + ctx->start_lsn = lsn; + else + ctx->commit_lsn = lsn; + spin_unlock(&cil->xc_push_lock); +} + + /* * Write out the commit record of a checkpoint transaction associated with the * given ticket to close off a running log write. Return the lsn of the commit @@ -638,26 +662,26 @@ xlog_cil_process_committed( */ static int xlog_cil_write_commit_record( - struct xlog *log, - struct xlog_ticket *ticket, - struct xlog_in_core **iclog, - xfs_lsn_t *lsn) + struct xfs_cil_ctx *ctx, + struct xlog_in_core **iclog) { - struct xfs_log_iovec reg = { + struct xlog *log = ctx->cil->xc_log; + struct xfs_log_iovec reg = { .i_addr = NULL, .i_len = 0, .i_type = XLOG_REG_TYPE_COMMIT, }; - struct xfs_log_vec vec = { + struct xfs_log_vec vec = { .lv_niovecs = 1, .lv_iovecp = ®, }; - int error; + int error; if (xlog_is_shutdown(log)) return -EIO; - error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS); + error = xlog_write(log, ctx, &vec, ctx->ticket, iclog, + XLOG_COMMIT_TRANS); if (error) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); return error; @@ -695,7 +719,6 @@ xlog_cil_push_work( struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; xfs_lsn_t preflush_tail_lsn; - xfs_lsn_t commit_lsn; xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); @@ -877,8 +900,7 @@ xlog_cil_push_work( */ wait_for_completion(&bdev_flush); - error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, - XLOG_START_TRANS); + error = xlog_write(log, ctx, &lvhdr, tic, NULL, XLOG_START_TRANS); if (error) goto out_abort_free_ticket; @@ -916,8 +938,7 @@ xlog_cil_push_work( } spin_unlock(&cil->xc_push_lock); - error = xlog_cil_write_commit_record(log, tic, &commit_iclog, - &commit_lsn); + error = xlog_cil_write_commit_record(ctx, &commit_iclog); if (error) goto out_abort_free_ticket; @@ -944,7 +965,6 @@ xlog_cil_push_work( * and wake up anyone who is waiting for the commit to complete. */ spin_lock(&cil->xc_push_lock); - ctx->commit_lsn = commit_lsn; wake_up_all(&cil->xc_commit_wait); spin_unlock(&cil->xc_push_lock); @@ -960,11 +980,11 @@ xlog_cil_push_work( * iclog header lsn and compare it to the commit lsn to determine if we * need to wait on iclogs or not. */ - if (ctx->start_lsn != commit_lsn) { + if (ctx->start_lsn != ctx->commit_lsn) { xfs_lsn_t plsn; plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn); - if (plsn && XFS_LSN_CMP(plsn, commit_lsn) < 0) { + if (plsn && XFS_LSN_CMP(plsn, ctx->commit_lsn) < 0) { /* * Waiting on ic_force_wait orders the completion of * iclogs older than ic_prev. Hence we only need to wait diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 951447fc0414..09156399b46c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -512,8 +512,8 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); -int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector, - struct xlog_ticket *tic, xfs_lsn_t *start_lsn, +int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, + struct xfs_log_vec *log_vector, struct xlog_ticket *tic, struct xlog_in_core **commit_iclog, uint optype); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); @@ -585,6 +585,9 @@ void xlog_cil_destroy(struct xlog *log); bool xlog_cil_empty(struct xlog *log); void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, xfs_csn_t *commit_seq, bool regrant); +void xlog_cil_set_ctx_write_state(struct xfs_cil_ctx *ctx, + struct xlog_in_core *iclog); + /* * CIL force routines From patchwork Tue Aug 10 05:21:18 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12427943 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id EF2E9C4338F for ; Tue, 10 Aug 2021 05:21:41 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D3A7060EC0 for ; Tue, 10 Aug 2021 05:21:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237074AbhHJFWA (ORCPT ); Tue, 10 Aug 2021 01:22:00 -0400 Received: from mail106.syd.optusnet.com.au ([211.29.132.42]:40899 "EHLO mail106.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237625AbhHJFVw (ORCPT ); Tue, 10 Aug 2021 01:21:52 -0400 Received: from dread.disaster.area (pa49-195-182-146.pa.nsw.optusnet.com.au [49.195.182.146]) by mail106.syd.optusnet.com.au (Postfix) with ESMTPS id 59FC780BBD1 for ; Tue, 10 Aug 2021 15:21:23 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1mDKCY-00GZcH-Qm for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1mDKCY-000Agy-JJ for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 3/5] xfs: factor out log write ordering from xlog_cil_push_work() Date: Tue, 10 Aug 2021 15:21:18 +1000 Message-Id: <20210810052120.41019-4-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210810052120.41019-1-david@fromorbit.com> References: <20210810052120.41019-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Tu+Yewfh c=1 sm=1 tr=0 a=QpfB3wCSrn/dqEBSktpwZQ==:117 a=QpfB3wCSrn/dqEBSktpwZQ==:17 a=MhDmnRu9jo8A:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=k7W3vL2-wnp397sw1bIA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner So we can use it for start record ordering as well as commit record ordering in future. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log_cil.c | 87 ++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 651118fbaa61..38917c97a582 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -656,9 +656,54 @@ xlog_cil_set_ctx_write_state( /* - * Write out the commit record of a checkpoint transaction associated with the - * given ticket to close off a running log write. Return the lsn of the commit - * record. + * Ensure that the order of log writes follows checkpoint sequence order. This + * relies on the context LSN being zero until the log write has guaranteed the + * LSN that the log write will start at via xlog_state_get_iclog_space(). + */ +static int +xlog_cil_order_write( + struct xfs_cil *cil, + xfs_csn_t sequence) +{ + struct xfs_cil_ctx *ctx; + +restart: + spin_lock(&cil->xc_push_lock); + list_for_each_entry(ctx, &cil->xc_committing, committing) { + /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (xlog_is_shutdown(cil->xc_log)) { + spin_unlock(&cil->xc_push_lock); + return -EIO; + } + + /* + * Higher sequences will wait for this one so skip them. + * Don't wait for our own sequence, either. + */ + if (ctx->sequence >= sequence) + continue; + if (!ctx->commit_lsn) { + /* + * It is still being pushed! Wait for the push to + * complete, then start again from the beginning. + */ + xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); + goto restart; + } + } + spin_unlock(&cil->xc_push_lock); + return 0; +} + +/* + * Write out the commit record of a checkpoint transaction to close off a + * running log write. These commit records are strictly ordered in ascending CIL + * sequence order so that log recovery will always replay the checkpoints in the + * correct order. */ static int xlog_cil_write_commit_record( @@ -904,39 +949,9 @@ xlog_cil_push_work( if (error) goto out_abort_free_ticket; - /* - * now that we've written the checkpoint into the log, strictly - * order the commit records so replay will get them in the right order. - */ -restart: - spin_lock(&cil->xc_push_lock); - list_for_each_entry(new_ctx, &cil->xc_committing, committing) { - /* - * Avoid getting stuck in this loop because we were woken by the - * shutdown, but then went back to sleep once already in the - * shutdown state. - */ - if (xlog_is_shutdown(log)) { - spin_unlock(&cil->xc_push_lock); - goto out_abort_free_ticket; - } - - /* - * Higher sequences will wait for this one so skip them. - * Don't wait for our own sequence, either. - */ - if (new_ctx->sequence >= ctx->sequence) - continue; - if (!new_ctx->commit_lsn) { - /* - * It is still being pushed! Wait for the push to - * complete, then start again from the beginning. - */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); - goto restart; - } - } - spin_unlock(&cil->xc_push_lock); + error = xlog_cil_order_write(ctx->cil, ctx->sequence); + if (error) + goto out_abort_free_ticket; error = xlog_cil_write_commit_record(ctx, &commit_iclog); if (error) From patchwork Tue Aug 10 05:21:19 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12427941 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,TVD_PH_BODY_ACCOUNTS_PRE, USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 06E70C4338F for ; Tue, 10 Aug 2021 05:21:38 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D6DF160F35 for ; Tue, 10 Aug 2021 05:21:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237627AbhHJFV5 (ORCPT ); Tue, 10 Aug 2021 01:21:57 -0400 Received: from mail106.syd.optusnet.com.au ([211.29.132.42]:40893 "EHLO mail106.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237620AbhHJFVs (ORCPT ); Tue, 10 Aug 2021 01:21:48 -0400 Received: from dread.disaster.area (pa49-195-182-146.pa.nsw.optusnet.com.au [49.195.182.146]) by mail106.syd.optusnet.com.au (Postfix) with ESMTPS id 66BDB80BC9D for ; Tue, 10 Aug 2021 15:21:23 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1mDKCY-00GZcP-S1 for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1mDKCY-000Ah1-KB for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 4/5] xfs: attach iclog callbacks in xlog_cil_set_ctx_write_state() Date: Tue, 10 Aug 2021 15:21:19 +1000 Message-Id: <20210810052120.41019-5-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210810052120.41019-1-david@fromorbit.com> References: <20210810052120.41019-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Tu+Yewfh c=1 sm=1 tr=0 a=QpfB3wCSrn/dqEBSktpwZQ==:117 a=QpfB3wCSrn/dqEBSktpwZQ==:17 a=MhDmnRu9jo8A:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=Yl4hcIhFekaH0rulqaYA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Now that we have a mechanism to guarantee that the callbacks attached to an iclog are owned by the context that attaches them until they drop their reference to the iclog via xlog_state_release_iclog(), we can attach callbacks to the iclog at any time we have an active reference to the iclog. xlog_state_get_iclog_space() always guarantees that the commit record will fit in the iclog it returns, so we can move this IO callback setting to xlog_cil_set_ctx_write_state(), record the commit iclog in the context and remove the need for the commit iclog to be returned by xlog_write() altogether. This, in turn, allows us to move the wakeup for ordered commit record writes up into xlog_cil_set_ctx_write_state(), too, because we have been guaranteed that this commit record will be physically located in the iclog before any waiting commit record at a higher sequence number will be granted iclog space. This further cleans up the post commit record write processing in the CIL push code, especially as xlog_state_release_iclog() will now clean up the context when shutdown errors occur. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 47 ++++++++-------------- fs/xfs/xfs_log_cil.c | 94 ++++++++++++++++++++++++------------------- fs/xfs/xfs_log_priv.h | 3 +- 3 files changed, 70 insertions(+), 74 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6ac5d52f573d..cc65b3a13ad2 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -933,7 +933,7 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - return xlog_write(log, NULL, &vec, ticket, NULL, XLOG_UNMOUNT_TRANS); + return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS); } /* @@ -2380,8 +2380,7 @@ xlog_write_copy_finish( int *data_cnt, int *partial_copy, int *partial_copy_len, - int log_offset, - struct xlog_in_core **commit_iclog) + int log_offset) { int error; @@ -2400,27 +2399,20 @@ xlog_write_copy_finish( *partial_copy = 0; *partial_copy_len = 0; - if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { - /* no more space in this iclog - push it. */ - spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - *record_cnt = 0; - *data_cnt = 0; - - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - xlog_is_shutdown(log)); - if (!commit_iclog) - goto release_iclog; - spin_unlock(&log->l_icloglock); - ASSERT(flags & XLOG_COMMIT_TRANS); - *commit_iclog = iclog; - } + if (iclog->ic_size - log_offset > sizeof(xlog_op_header_t)) + return 0; - return 0; + /* no more space in this iclog - push it. */ + spin_lock(&log->l_icloglock); + xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); + *record_cnt = 0; + *data_cnt = 0; + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(log, iclog, 0); + else + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || + xlog_is_shutdown(log)); release_iclog: error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); @@ -2473,7 +2465,6 @@ xlog_write( struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, - struct xlog_in_core **commit_iclog, uint optype) { struct xlog_in_core *iclog = NULL; @@ -2602,8 +2593,7 @@ xlog_write( &record_cnt, &data_cnt, &partial_copy, &partial_copy_len, - log_offset, - commit_iclog); + log_offset); if (error) return error; @@ -2641,12 +2631,7 @@ xlog_write( spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); - if (commit_iclog) { - ASSERT(optype & XLOG_COMMIT_TRANS); - *commit_iclog = iclog; - } else { - error = xlog_state_release_iclog(log, iclog, 0); - } + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 38917c97a582..ae6449fd5cf2 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -646,11 +646,41 @@ xlog_cil_set_ctx_write_state( xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); ASSERT(!ctx->commit_lsn); - spin_lock(&cil->xc_push_lock); - if (!ctx->start_lsn) + if (!ctx->start_lsn) { + spin_lock(&cil->xc_push_lock); ctx->start_lsn = lsn; - else - ctx->commit_lsn = lsn; + spin_unlock(&cil->xc_push_lock); + return; + } + + /* + * Take a reference to the iclog for the context so that we still hold + * it when xlog_write is done and has released it. This means the + * context controls when the iclog is released for IO. + */ + atomic_inc(&iclog->ic_refcnt); + + /* + * xlog_state_get_iclog_space() guarantees there is enough space in the + * iclog for an entire commit record, so we can attach the context + * callbacks now. This needs to be done before we make the commit_lsn + * visible to waiters so that checkpoints with commit records in the + * same iclog order their IO completion callbacks in the same order that + * the commit records appear in the iclog. + */ + spin_lock(&cil->xc_log->l_icloglock); + list_add_tail(&ctx->iclog_entry, &iclog->ic_callbacks); + spin_unlock(&cil->xc_log->l_icloglock); + + /* + * Now we can record the commit LSN and wake anyone waiting for this + * sequence to have the ordered commit record assigned to a physical + * location in the log. + */ + spin_lock(&cil->xc_push_lock); + ctx->commit_iclog = iclog; + ctx->commit_lsn = lsn; + wake_up_all(&cil->xc_commit_wait); spin_unlock(&cil->xc_push_lock); } @@ -707,8 +737,7 @@ xlog_cil_order_write( */ static int xlog_cil_write_commit_record( - struct xfs_cil_ctx *ctx, - struct xlog_in_core **iclog) + struct xfs_cil_ctx *ctx) { struct xlog *log = ctx->cil->xc_log; struct xfs_log_iovec reg = { @@ -725,8 +754,7 @@ xlog_cil_write_commit_record( if (xlog_is_shutdown(log)) return -EIO; - error = xlog_write(log, ctx, &vec, ctx->ticket, iclog, - XLOG_COMMIT_TRANS); + error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS); if (error) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); return error; @@ -756,7 +784,6 @@ xlog_cil_push_work( struct xfs_log_vec *lv; struct xfs_cil_ctx *ctx; struct xfs_cil_ctx *new_ctx; - struct xlog_in_core *commit_iclog; struct xlog_ticket *tic; int num_iovecs; int error = 0; @@ -945,7 +972,7 @@ xlog_cil_push_work( */ wait_for_completion(&bdev_flush); - error = xlog_write(log, ctx, &lvhdr, tic, NULL, XLOG_START_TRANS); + error = xlog_write(log, ctx, &lvhdr, tic, XLOG_START_TRANS); if (error) goto out_abort_free_ticket; @@ -953,36 +980,12 @@ xlog_cil_push_work( if (error) goto out_abort_free_ticket; - error = xlog_cil_write_commit_record(ctx, &commit_iclog); + error = xlog_cil_write_commit_record(ctx); if (error) goto out_abort_free_ticket; xfs_log_ticket_ungrant(log, tic); - /* - * Once we attach the ctx to the iclog, it is effectively owned by the - * iclog and we can only use it while we still have an active reference - * to the iclog. i.e. once we call xlog_state_release_iclog() we can no - * longer safely reference the ctx. - */ - spin_lock(&log->l_icloglock); - if (xlog_is_shutdown(log)) { - spin_unlock(&log->l_icloglock); - goto out_abort; - } - ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || - commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); - list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); - - /* - * now the checkpoint commit is complete and we've attached the - * callbacks to the iclog we can assign the commit LSN to the context - * and wake up anyone who is waiting for the commit to complete. - */ - spin_lock(&cil->xc_push_lock); - wake_up_all(&cil->xc_commit_wait); - spin_unlock(&cil->xc_push_lock); - /* * If the checkpoint spans multiple iclogs, wait for all previous iclogs * to complete before we submit the commit_iclog. We can't use state @@ -995,17 +998,18 @@ xlog_cil_push_work( * iclog header lsn and compare it to the commit lsn to determine if we * need to wait on iclogs or not. */ + spin_lock(&log->l_icloglock); if (ctx->start_lsn != ctx->commit_lsn) { xfs_lsn_t plsn; - plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn); + plsn = be64_to_cpu(ctx->commit_iclog->ic_prev->ic_header.h_lsn); if (plsn && XFS_LSN_CMP(plsn, ctx->commit_lsn) < 0) { /* * Waiting on ic_force_wait orders the completion of * iclogs older than ic_prev. Hence we only need to wait * on the most recent older iclog here. */ - xlog_wait_on_iclog(commit_iclog->ic_prev); + xlog_wait_on_iclog(ctx->commit_iclog->ic_prev); spin_lock(&log->l_icloglock); } @@ -1013,7 +1017,7 @@ xlog_cil_push_work( * We need to issue a pre-flush so that the ordering for this * checkpoint is correctly preserved down to stable storage. */ - commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; } /* @@ -1021,8 +1025,8 @@ xlog_cil_push_work( * journal IO vs metadata writeback IO is correctly ordered on stable * storage. */ - commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); + ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; + xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn); /* Not safe to reference ctx now! */ @@ -1037,9 +1041,15 @@ xlog_cil_push_work( out_abort_free_ticket: xfs_log_ticket_ungrant(log, tic); -out_abort: ASSERT(xlog_is_shutdown(log)); - xlog_cil_committed(ctx); + if (!ctx->commit_iclog) { + xlog_cil_committed(ctx); + return; + } + spin_lock(&log->l_icloglock); + xlog_state_release_iclog(log, ctx->commit_iclog, 0); + /* Not safe to reference ctx now! */ + spin_unlock(&log->l_icloglock); } /* diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 09156399b46c..e0934e6aaf8a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -240,6 +240,7 @@ struct xfs_cil_ctx { xfs_csn_t sequence; /* chkpt sequence # */ xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ + struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ int nvecs; /* number of regions */ int space_used; /* aggregate size of regions */ @@ -514,7 +515,7 @@ void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, struct xfs_log_vec *log_vector, struct xlog_ticket *tic, - struct xlog_in_core **commit_iclog, uint optype); + uint optype); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); From patchwork Tue Aug 10 05:21:20 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12427947 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id E2CDCC432BE for ; Tue, 10 Aug 2021 05:21:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C04F260EC0 for ; Tue, 10 Aug 2021 05:21:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237633AbhHJFWE (ORCPT ); Tue, 10 Aug 2021 01:22:04 -0400 Received: from mail110.syd.optusnet.com.au ([211.29.132.97]:47901 "EHLO mail110.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237638AbhHJFVy (ORCPT ); Tue, 10 Aug 2021 01:21:54 -0400 Received: from dread.disaster.area (pa49-195-182-146.pa.nsw.optusnet.com.au [49.195.182.146]) by mail110.syd.optusnet.com.au (Postfix) with ESMTPS id B00F21047F7 for ; Tue, 10 Aug 2021 15:21:28 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1mDKCY-00GZcR-T6 for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1mDKCY-000Ah4-LK for linux-xfs@vger.kernel.org; Tue, 10 Aug 2021 15:21:22 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 5/5] xfs: order CIL checkpoint start records Date: Tue, 10 Aug 2021 15:21:20 +1000 Message-Id: <20210810052120.41019-6-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210810052120.41019-1-david@fromorbit.com> References: <20210810052120.41019-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 a=QpfB3wCSrn/dqEBSktpwZQ==:117 a=QpfB3wCSrn/dqEBSktpwZQ==:17 a=MhDmnRu9jo8A:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=0SoGSWBIiDR9d1GlltwA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Because log recovery depends on strictly ordered start records as well as strictly ordered commit records. This is a zero day bug in the way XFS writes pipelined transactions to the journal which is exposed by fixing the zero day bug that prevents the CIL from pipelining checkpoints. This re-introduces explicit concurrent commits back into the on-disk journal and hence out of order start records. The XFS journal commit code has never ordered start records and we have relied on strict commit record ordering for correct recovery ordering of concurrently written transactions. Unfortunately, root cause analysis uncovered the fact that log recovery uses the LSN of the start record for transaction commit processing. Hence, whilst the commits are processed in strict order by recovery, the LSNs associated with the commits can be out of order and so recovery may stamp incorrect LSNs into objects and/or misorder intents in the AIL for later processing. This can result in log recovery failures and/or on disk corruption, sometimes silent. Because this is a long standing log recovery issue, we can't just fix log recovery and call it good. This still leaves older kernels susceptible to recovery failures and corruption when replaying a log from a kernel that pipelines checkpoints. There is also the issue that in-memory ordering for AIL pushing and data integrity operations are based on checkpoint start LSNs, and if the start LSN is incorrect in the journal, it is also incorrect in memory. Hence there's really only one choice for fixing this zero-day bug: we need to strictly order checkpoint start records in ascending sequence order in the log, the same way we already strictly order commit records. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 1 + fs/xfs/xfs_log_cil.c | 69 +++++++++++++++++++++++++++++++++++-------- fs/xfs/xfs_log_priv.h | 1 + 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index cc65b3a13ad2..098f5c8ceb29 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3894,6 +3894,7 @@ xlog_force_shutdown( * avoid races. */ spin_lock(&log->l_cilp->xc_push_lock); + wake_up_all(&log->l_cilp->xc_start_wait); wake_up_all(&log->l_cilp->xc_commit_wait); spin_unlock(&log->l_cilp->xc_push_lock); xlog_state_shutdown_callbacks(log); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ae6449fd5cf2..f6c4e4e8f112 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -595,6 +595,7 @@ xlog_cil_committed( */ if (abort) { spin_lock(&ctx->cil->xc_push_lock); + wake_up_all(&ctx->cil->xc_start_wait); wake_up_all(&ctx->cil->xc_commit_wait); spin_unlock(&ctx->cil->xc_push_lock); } @@ -648,7 +649,14 @@ xlog_cil_set_ctx_write_state( ASSERT(!ctx->commit_lsn); if (!ctx->start_lsn) { spin_lock(&cil->xc_push_lock); + /* + * The LSN we need to pass to the log items on transaction + * commit is the LSN reported by the first log vector write, not + * the commit lsn. If we use the commit record lsn then we can + * move the tail beyond the grant write head. + */ ctx->start_lsn = lsn; + wake_up_all(&cil->xc_start_wait); spin_unlock(&cil->xc_push_lock); return; } @@ -690,10 +698,16 @@ xlog_cil_set_ctx_write_state( * relies on the context LSN being zero until the log write has guaranteed the * LSN that the log write will start at via xlog_state_get_iclog_space(). */ +enum _record_type { + _START_RECORD, + _COMMIT_RECORD, +}; + static int xlog_cil_order_write( struct xfs_cil *cil, - xfs_csn_t sequence) + xfs_csn_t sequence, + enum _record_type record) { struct xfs_cil_ctx *ctx; @@ -716,19 +730,47 @@ xlog_cil_order_write( */ if (ctx->sequence >= sequence) continue; - if (!ctx->commit_lsn) { - /* - * It is still being pushed! Wait for the push to - * complete, then start again from the beginning. - */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); - goto restart; + + /* Wait until the LSN for the record has been recorded. */ + switch (record) { + case _START_RECORD: + if (!ctx->start_lsn) { + xlog_wait(&cil->xc_start_wait, &cil->xc_push_lock); + goto restart; + } + break; + case _COMMIT_RECORD: + if (!ctx->commit_lsn) { + xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); + goto restart; + } + break; } } spin_unlock(&cil->xc_push_lock); return 0; } +/* + * Write out the log vector change now attached to the CIL context. This will + * write a start record that needs to be strictly ordered in ascending CIL + * sequence order so that log recovery will always use in-order start LSNs when + * replaying checkpoints. + */ +static int +xlog_cil_write_chain( + struct xfs_cil_ctx *ctx, + struct xfs_log_vec *chain) +{ + struct xlog *log = ctx->cil->xc_log; + int error; + + error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD); + if (error) + return error; + return xlog_write(log, ctx, chain, ctx->ticket, XLOG_START_TRANS); +} + /* * Write out the commit record of a checkpoint transaction to close off a * running log write. These commit records are strictly ordered in ascending CIL @@ -754,6 +796,10 @@ xlog_cil_write_commit_record( if (xlog_is_shutdown(log)) return -EIO; + error = xlog_cil_order_write(ctx->cil, ctx->sequence, _COMMIT_RECORD); + if (error) + return error; + error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS); if (error) xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); @@ -972,11 +1018,7 @@ xlog_cil_push_work( */ wait_for_completion(&bdev_flush); - error = xlog_write(log, ctx, &lvhdr, tic, XLOG_START_TRANS); - if (error) - goto out_abort_free_ticket; - - error = xlog_cil_order_write(ctx->cil, ctx->sequence); + error = xlog_cil_write_chain(ctx, &lvhdr); if (error) goto out_abort_free_ticket; @@ -1381,6 +1423,7 @@ xlog_cil_init( spin_lock_init(&cil->xc_push_lock); init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); + init_waitqueue_head(&cil->xc_start_wait); init_waitqueue_head(&cil->xc_commit_wait); INIT_LIST_HEAD(&ctx->committing); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index e0934e6aaf8a..1ed299803904 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -279,6 +279,7 @@ struct xfs_cil { xfs_csn_t xc_push_seq; struct list_head xc_committing; wait_queue_head_t xc_commit_wait; + wait_queue_head_t xc_start_wait; xfs_csn_t xc_current_sequence; struct work_struct xc_push_work; wait_queue_head_t xc_push_wait; /* background push throttle */