From patchwork Wed Jul 14 03:19:50 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375875 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 855A6C07E95 for ; Wed, 14 Jul 2021 03:20:19 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 566E0613AC for ; Wed, 14 Jul 2021 03:20:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237559AbhGNDXJ (ORCPT ); Tue, 13 Jul 2021 23:23:09 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:41228 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237729AbhGNDXI (ORCPT ); Tue, 13 Jul 2021 23:23:08 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id D3B1986480D for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IJ7-4i for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRI-00Ay8t-Sf for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:00 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 1/9] xfs: convert XLOG_FORCED_SHUTDOWN() to xlog_is_shutdown() Date: Wed, 14 Jul 2021 13:19:50 +1000 Message-Id: <20210714031958.2614411-2-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=-psOZVuEB3Rg4DWKb8kA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Make it less shouty and a static inline before adding more calls through the log code. Also convert internal log code that uses XFS_FORCED_SHUTDOWN(mount) to use xlog_is_shutdown(log) as well. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 32 ++++++++++++++++---------------- fs/xfs/xfs_log_cil.c | 10 +++++----- fs/xfs/xfs_log_priv.h | 7 +++++-- fs/xfs/xfs_log_recover.c | 9 +++------ fs/xfs/xfs_trans.c | 2 +- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 36fa2650b081..3596086d0e4d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -247,7 +247,7 @@ xlog_grant_head_wait( list_add_tail(&tic->t_queue, &head->waiters); do { - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) goto shutdown; xlog_grant_push_ail(log, need_bytes); @@ -261,7 +261,7 @@ xlog_grant_head_wait( trace_xfs_log_grant_wake(log, tic); spin_lock(&head->lock); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) goto shutdown; } while (xlog_space_left(log, &head->grant) < need_bytes); @@ -366,7 +366,7 @@ xfs_log_writable( return false; if (xfs_readonly_buftarg(mp->m_log->l_targ)) return false; - if (XFS_FORCED_SHUTDOWN(mp)) + if (xlog_is_shutdown(mp->m_log)) return false; return true; } @@ -383,7 +383,7 @@ xfs_log_regrant( int need_bytes; int error = 0; - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; XFS_STATS_INC(mp, xs_try_logspace); @@ -451,7 +451,7 @@ xfs_log_reserve( ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; XFS_STATS_INC(mp, xs_try_logspace); @@ -787,7 +787,7 @@ xlog_wait_on_iclog( struct xlog *log = iclog->ic_log; trace_xlog_iclog_wait_on(iclog, _RET_IP_); - if (!XLOG_FORCED_SHUTDOWN(log) && + if (!xlog_is_shutdown(log) && iclog->ic_state != XLOG_STATE_ACTIVE && iclog->ic_state != XLOG_STATE_DIRTY) { XFS_STATS_INC(log->l_mp, xs_log_force_sleep); @@ -796,7 +796,7 @@ xlog_wait_on_iclog( spin_unlock(&log->l_icloglock); } - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; return 0; } @@ -915,7 +915,7 @@ xfs_log_unmount_write( xfs_log_force(mp, XFS_LOG_SYNC); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return; /* @@ -1024,7 +1024,7 @@ xfs_log_space_wake( struct xlog *log = mp->m_log; int free_bytes; - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return; if (!list_empty_careful(&log->l_write_head.waiters)) { @@ -1115,7 +1115,7 @@ xfs_log_cover( ASSERT((xlog_cil_empty(mp->m_log) && xlog_iclogs_empty(mp->m_log) && !xfs_ail_min_lsn(mp->m_log->l_ailp)) || - XFS_FORCED_SHUTDOWN(mp)); + xlog_is_shutdown(mp->m_log)); if (!xfs_log_writable(mp)) return 0; @@ -1547,7 +1547,7 @@ xlog_commit_record( }; int error; - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) return -EIO; error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS); @@ -1628,7 +1628,7 @@ xlog_grant_push_ail( xfs_lsn_t threshold_lsn; threshold_lsn = xlog_grant_push_threshold(log, need_bytes); - if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log)) + if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log)) return; /* @@ -2808,7 +2808,7 @@ xlog_state_do_callback( cycled_icloglock = true; spin_lock(&log->l_icloglock); - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) wake_up_all(&iclog->ic_force_wait); else xlog_state_clean_iclog(log, iclog); @@ -2860,7 +2860,7 @@ xlog_state_done_syncing( * split log writes, on the second, we shut down the file system and * no iclogs should ever be attempted to be written to disk again. */ - if (!XLOG_FORCED_SHUTDOWN(log)) { + if (!xlog_is_shutdown(log)) { ASSERT(iclog->ic_state == XLOG_STATE_SYNCING); iclog->ic_state = XLOG_STATE_DONE_SYNC; } @@ -2908,7 +2908,7 @@ xlog_state_get_iclog_space( restart: spin_lock(&log->l_icloglock); - if (XLOG_FORCED_SHUTDOWN(log)) { + if (xlog_is_shutdown(log)) { spin_unlock(&log->l_icloglock); return -EIO; } @@ -3756,7 +3756,7 @@ xfs_log_force_umount( * No need to get locks for this. */ if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) { - ASSERT(XLOG_FORCED_SHUTDOWN(log)); + ASSERT(xlog_is_shutdown(log)); return 1; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b128aaa9b870..8fab7ec1ceb1 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -576,7 +576,7 @@ xlog_cil_committed( struct xfs_cil_ctx *ctx) { struct xfs_mount *mp = ctx->cil->xc_log->l_mp; - bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log); + bool abort = xlog_is_shutdown(ctx->cil->xc_log); /* * If the I/O failed, we're aborting the commit and already shutdown. @@ -845,7 +845,7 @@ xlog_cil_push_work( * shutdown, but then went back to sleep once already in the * shutdown state. */ - if (XLOG_FORCED_SHUTDOWN(log)) { + if (xlog_is_shutdown(log)) { spin_unlock(&cil->xc_push_lock); goto out_abort_free_ticket; } @@ -954,7 +954,7 @@ xlog_cil_push_work( out_abort_free_ticket: xfs_log_ticket_ungrant(log, tic); out_abort: - ASSERT(XLOG_FORCED_SHUTDOWN(log)); + ASSERT(xlog_is_shutdown(log)); xlog_cil_committed(ctx); } @@ -1107,7 +1107,7 @@ xlog_cil_commit( xlog_cil_insert_items(log, tp); - if (regrant && !XLOG_FORCED_SHUTDOWN(log)) + if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); else xfs_log_ticket_ungrant(log, tp->t_ticket); @@ -1180,7 +1180,7 @@ xlog_cil_force_seq( * shutdown, but then went back to sleep once already in the * shutdown state. */ - if (XLOG_FORCED_SHUTDOWN(log)) + if (xlog_is_shutdown(log)) goto out_shutdown; if (ctx->sequence > sequence) continue; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 4c41bbfa33b0..80d4e1325e1d 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -454,8 +454,11 @@ struct xlog { #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) -#define XLOG_FORCED_SHUTDOWN(log) \ - (unlikely((log)->l_flags & XLOG_IO_ERROR)) +static inline bool +xlog_is_shutdown(struct xlog *log) +{ + return (log->l_flags & XLOG_IO_ERROR); +} /* common routines */ extern int diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1721fce2ec94..37296f87a435 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -146,7 +146,7 @@ xlog_do_io( error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, BBTOB(nbblks), data, op); - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { + if (error && !xlog_is_shutdown(log)) { xfs_alert(log->l_mp, "log recovery %s I/O error at daddr 0x%llx len %d error %d", op == REQ_OP_WRITE ? "write" : "read", @@ -3280,10 +3280,7 @@ xlog_do_recover( if (error) return error; - /* - * If IO errors happened during recovery, bail out. - */ - if (XFS_FORCED_SHUTDOWN(mp)) + if (xlog_is_shutdown(log)) return -EIO; /* @@ -3305,7 +3302,7 @@ xlog_do_recover( xfs_buf_hold(bp); error = _xfs_buf_read(bp, XBF_READ); if (error) { - if (!XFS_FORCED_SHUTDOWN(mp)) { + if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); ASSERT(0); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 87bffd12c20c..e26ade9fc630 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -908,7 +908,7 @@ __xfs_trans_commit( */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log)) + if (regrant && !xlog_is_shutdown(mp->m_log)) xfs_log_ticket_regrant(mp->m_log, tp->t_ticket); else xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); From patchwork Wed Jul 14 03:19:51 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375863 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 76878C11F69 for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 40485613AF for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237436AbhGNDW4 (ORCPT ); Tue, 13 Jul 2021 23:22:56 -0400 Received: from mail109.syd.optusnet.com.au ([211.29.132.80]:43615 "EHLO mail109.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237486AbhGNDWz (ORCPT ); Tue, 13 Jul 2021 23:22:55 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail109.syd.optusnet.com.au (Postfix) with ESMTPS id 988F26B4B1 for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IJ9-6F for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRI-00Ay8w-UK for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:00 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 2/9] xfs: XLOG_STATE_IOERROR must die Date: Wed, 14 Jul 2021 13:19:51 +1000 Message-Id: <20210714031958.2614411-3-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Tu+Yewfh c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=p1n60LeDUcQraZ1cDp0A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner We don't need an iclog state field to tell us the log has been shut down. We can just check the xlog_is_shutdown() instead. The avoids the need to shutdowns overwriting the current iclog state while being active used by the log code and so having to ensure that every iclog state check handles XLOG_STATE_IOERROR appropriately. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 110 ++++++++++++------------------------------ fs/xfs/xfs_log_cil.c | 2 +- fs/xfs/xfs_log_priv.h | 5 +- fs/xfs/xfs_trace.h | 1 - 4 files changed, 33 insertions(+), 85 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3596086d0e4d..75cc487da578 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -522,7 +522,7 @@ xlog_state_release_iclog( lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); - if (iclog->ic_state == XLOG_STATE_IOERROR) + if (xlog_is_shutdown(log)) return -EIO; if (atomic_dec_and_test(&iclog->ic_refcnt) && @@ -857,7 +857,7 @@ xlog_unmount_write( error = xlog_write_unmount_record(log, tic); /* * At this point, we're umounting anyway, so there's no point in - * transitioning log state to IOERROR. Just continue... + * transitioning log state to shutdown. Just continue... */ out_err: if (error) @@ -870,7 +870,7 @@ xlog_unmount_write( xlog_state_switch_iclogs(log, iclog, 0); else ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); + xlog_is_shutdown(log)); /* * Ensure the journal is fully flushed and on stable storage once the * iclog containing the unmount record is written. @@ -1770,7 +1770,7 @@ xlog_write_iclog( * across the log IO to archieve that. */ down(&iclog->ic_sema); - if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) { + if (xlog_is_shutdown(log)) { /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of @@ -2299,7 +2299,7 @@ xlog_write_copy_finish( xlog_state_switch_iclogs(log, iclog, 0); else ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); + xlog_is_shutdown(log)); if (!commit_iclog) goto release_iclog; spin_unlock(&log->l_icloglock); @@ -2715,8 +2715,7 @@ xlog_state_set_callback( static bool xlog_state_iodone_process_iclog( struct xlog *log, - struct xlog_in_core *iclog, - bool *ioerror) + struct xlog_in_core *iclog) { xfs_lsn_t lowest_lsn; xfs_lsn_t header_lsn; @@ -2728,15 +2727,6 @@ xlog_state_iodone_process_iclog( * Skip all iclogs in the ACTIVE & DIRTY states: */ return false; - case XLOG_STATE_IOERROR: - /* - * Between marking a filesystem SHUTDOWN and stopping the log, - * we do flush all iclogs to disk (if there wasn't a log I/O - * error). So, we do want things to go smoothly in case of just - * a SHUTDOWN w/o a LOG_IO_ERROR. - */ - *ioerror = true; - return false; case XLOG_STATE_DONE_SYNC: /* * Now that we have an iclog that is in the DONE_SYNC state, do @@ -2767,7 +2757,6 @@ xlog_state_do_callback( struct xlog_in_core *iclog; struct xlog_in_core *first_iclog; bool cycled_icloglock; - bool ioerror; int flushcnt = 0; int repeats = 0; @@ -2781,23 +2770,20 @@ xlog_state_do_callback( * Keep looping through iclogs until one full pass is made * without running any callbacks. */ - first_iclog = log->l_iclog; - iclog = log->l_iclog; cycled_icloglock = false; - ioerror = false; - repeats++; + first_iclog = log->l_iclog; + iclog = first_iclog; do { LIST_HEAD(cb_list); - if (xlog_state_iodone_process_iclog(log, iclog, - &ioerror)) - break; - - if (iclog->ic_state != XLOG_STATE_CALLBACK && - iclog->ic_state != XLOG_STATE_IOERROR) { - iclog = iclog->ic_next; - continue; + if (!xlog_is_shutdown(log)) { + if (xlog_state_iodone_process_iclog(log, iclog)) + break; + if (iclog->ic_state != XLOG_STATE_CALLBACK) { + iclog = iclog->ic_next; + continue; + } } list_splice_init(&iclog->ic_callbacks, &cb_list); spin_unlock(&log->l_icloglock); @@ -2813,19 +2799,19 @@ xlog_state_do_callback( else xlog_state_clean_iclog(log, iclog); iclog = iclog->ic_next; - } while (first_iclog != iclog); + } while (iclog != first_iclog); - if (repeats > 5000) { + if (++repeats > 5000) { flushcnt += repeats; repeats = 0; xfs_warn(log->l_mp, "%s: possible infinite loop (%d iterations)", __func__, flushcnt); } - } while (!ioerror && cycled_icloglock); + } while (!xlog_is_shutdown(log) && cycled_icloglock); if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE || - log->l_iclog->ic_state == XLOG_STATE_IOERROR) + xlog_is_shutdown(log)) wake_up_all(&log->l_flush_wait); spin_unlock(&log->l_icloglock); @@ -2835,13 +2821,6 @@ xlog_state_do_callback( /* * Finish transitioning this iclog to the dirty state. * - * Make sure that we completely execute this routine only when this is - * the last call to the iclog. There is a good chance that iclog flushes, - * when we reach the end of the physical log, get turned into 2 separate - * calls to bwrite. Hence, one iclog flush could generate two calls to this - * routine. By using the reference count bwritecnt, we guarantee that only - * the second completion goes through. - * * Callbacks could take time, so they are done outside the scope of the * global state machine log lock. */ @@ -3173,10 +3152,10 @@ xfs_log_force( xlog_cil_force(log); spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - if (iclog->ic_state == XLOG_STATE_IOERROR) + if (xlog_is_shutdown(log)) goto out_error; + iclog = log->l_iclog; trace_xlog_iclog_force(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_DIRTY || @@ -3247,10 +3226,10 @@ xlog_force_lsn( struct xlog_in_core *iclog; spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - if (iclog->ic_state == XLOG_STATE_IOERROR) + if (xlog_is_shutdown(log)) goto out_error; + iclog = log->l_iclog; while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { trace_xlog_iclog_force_lsn(iclog, _RET_IP_); iclog = iclog->ic_next; @@ -3685,34 +3664,6 @@ xlog_verify_iclog( } #endif -/* - * Mark all iclogs IOERROR. l_icloglock is held by the caller. - */ -STATIC int -xlog_state_ioerror( - struct xlog *log) -{ - xlog_in_core_t *iclog, *ic; - - iclog = log->l_iclog; - if (iclog->ic_state != XLOG_STATE_IOERROR) { - /* - * Mark all the incore logs IOERROR. - * From now on, no log flushes will result. - */ - ic = iclog; - do { - ic->ic_state = XLOG_STATE_IOERROR; - ic = ic->ic_next; - } while (ic != iclog); - return 0; - } - /* - * Return non-zero, if state transition has already happened. - */ - return 1; -} - /* * This is called from xfs_force_shutdown, when we're forcibly * shutting down the filesystem, typically because of an IO error. @@ -3728,6 +3679,8 @@ xlog_state_ioerror( * Note: for the !logerror case we need to flush the regions held in memory out * to disk first. This needs to be done before the log is marked as shutdown, * otherwise the iclog writes will fail. + * + * Return non-zero if log shutdown transition had already happened. */ int xfs_log_force_umount( @@ -3735,7 +3688,7 @@ xfs_log_force_umount( int logerror) { struct xlog *log; - int retval; + int retval = 0; log = mp->m_log; @@ -3755,10 +3708,8 @@ xfs_log_force_umount( * Somebody could've already done the hard work for us. * No need to get locks for this. */ - if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) { - ASSERT(xlog_is_shutdown(log)); + if (logerror && xlog_is_shutdown(log)) return 1; - } /* * Flush all the completed transactions to disk before marking the log @@ -3783,8 +3734,10 @@ xfs_log_force_umount( * Mark the log and the iclogs with IO error flags to prevent any * further log IO from being issued or completed. */ - log->l_flags |= XLOG_IO_ERROR; - retval = xlog_state_ioerror(log); + if (!(log->l_flags & XLOG_IO_ERROR)) { + log->l_flags |= XLOG_IO_ERROR; + retval = 1; + } spin_unlock(&log->l_icloglock); /* @@ -3808,7 +3761,6 @@ xfs_log_force_umount( spin_unlock(&log->l_cilp->xc_push_lock); xlog_state_do_callback(log); - /* return non-zero if log IOERROR transition had already happened */ return retval; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 8fab7ec1ceb1..2c9d9bcd25cb 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -881,7 +881,7 @@ xlog_cil_push_work( * callbacks and dropped the icloglock. */ spin_lock(&log->l_icloglock); - if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { + if (xlog_is_shutdown(log)) { spin_unlock(&log->l_icloglock); goto out_abort; } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 80d4e1325e1d..bf05763ba8df 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -47,7 +47,6 @@ enum xlog_iclog_state { XLOG_STATE_DONE_SYNC, /* Done syncing to disk */ XLOG_STATE_CALLBACK, /* Callback functions now */ XLOG_STATE_DIRTY, /* Dirty IC log, not ready for ACTIVE status */ - XLOG_STATE_IOERROR, /* IO error happened in sync'ing log */ }; #define XLOG_STATE_STRINGS \ @@ -56,9 +55,7 @@ enum xlog_iclog_state { { XLOG_STATE_SYNCING, "XLOG_STATE_SYNCING" }, \ { XLOG_STATE_DONE_SYNC, "XLOG_STATE_DONE_SYNC" }, \ { XLOG_STATE_CALLBACK, "XLOG_STATE_CALLBACK" }, \ - { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ - { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } - + { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" } /* * Log ticket flags diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f9d8d605f9b1..46be04167cf3 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3934,7 +3934,6 @@ TRACE_DEFINE_ENUM(XLOG_STATE_SYNCING); TRACE_DEFINE_ENUM(XLOG_STATE_DONE_SYNC); TRACE_DEFINE_ENUM(XLOG_STATE_CALLBACK); TRACE_DEFINE_ENUM(XLOG_STATE_DIRTY); -TRACE_DEFINE_ENUM(XLOG_STATE_IOERROR); DECLARE_EVENT_CLASS(xlog_iclog_class, TP_PROTO(struct xlog_in_core *iclog, unsigned long caller_ip), From patchwork Wed Jul 14 03:19:52 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375861 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 01E2CC11F66 for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id DB0A6613AB for ; Wed, 14 Jul 2021 03:20:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237722AbhGNDWz (ORCPT ); Tue, 13 Jul 2021 23:22:55 -0400 Received: from mail109.syd.optusnet.com.au ([211.29.132.80]:43647 "EHLO mail109.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237503AbhGNDWz (ORCPT ); Tue, 13 Jul 2021 23:22:55 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail109.syd.optusnet.com.au (Postfix) with ESMTPS id D0F266BAB5 for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IJp-7F for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRI-00Ay8z-Vl for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:00 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 3/9] xfs: move recovery needed state updates to xfs_log_mount_finish Date: Wed, 14 Jul 2021 13:19:52 +1000 Message-Id: <20210714031958.2614411-4-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Tu+Yewfh c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=MU6oyeIEaMzVx_n5NM0A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner xfs_log_mount_finish() needs to know if recovery is needed or not to make descisions on whether to flush the log and AIL. Move the handling of the NEED_RECOVERY state out to this function rather than needing a temporary variable to store this state over the call to xlog_recover_finish(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 24 ++++++++----- fs/xfs/xfs_log_recover.c | 73 +++++++++++++++------------------------- 2 files changed, 43 insertions(+), 54 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 75cc487da578..6760608642cc 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -698,9 +698,9 @@ int xfs_log_mount_finish( struct xfs_mount *mp) { - int error = 0; - bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY); - bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED; + struct xlog *log = mp->m_log; + bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY); + int error = 0; if (mp->m_flags & XFS_MOUNT_NORECOVERY) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); @@ -731,7 +731,8 @@ xfs_log_mount_finish( * mount failure occurs. */ mp->m_super->s_flags |= SB_ACTIVE; - error = xlog_recover_finish(mp->m_log); + if (log->l_flags & XLOG_RECOVERY_NEEDED) + error = xlog_recover_finish(log); if (!error) xfs_log_work_queue(mp); mp->m_super->s_flags &= ~SB_ACTIVE; @@ -746,17 +747,24 @@ xfs_log_mount_finish( * Don't push in the error case because the AIL may have pending intents * that aren't removed until recovery is cancelled. */ - if (!error && recovered) { - xfs_log_force(mp, XFS_LOG_SYNC); - xfs_ail_push_all_sync(mp->m_ail); + if (log->l_flags & XLOG_RECOVERY_NEEDED) { + if (!error) { + xfs_log_force(mp, XFS_LOG_SYNC); + xfs_ail_push_all_sync(mp->m_ail); + } + xfs_notice(mp, "Ending recovery (logdev: %s)", + mp->m_logname ? mp->m_logname : "internal"); + } else { + xfs_info(mp, "Ending clean mount"); } xfs_buftarg_drain(mp->m_ddev_targp); + log->l_flags &= ~XLOG_RECOVERY_NEEDED; if (readonly) mp->m_flags |= XFS_MOUNT_RDONLY; /* Make sure the log is dead if we're returning failure. */ - ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); + ASSERT(!error || xlog_is_shutdown(log)); return error; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 37296f87a435..c384ecdd6389 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3416,62 +3416,43 @@ xlog_recover( } /* - * In the first part of recovery we replay inodes and buffers and build - * up the list of extent free items which need to be processed. Here - * we process the extent free items and clean up the on disk unlinked - * inode lists. This is separated from the first part of recovery so - * that the root and real-time bitmap inodes can be read in from disk in - * between the two stages. This is necessary so that we can free space - * in the real-time portion of the file system. + * In the first part of recovery we replay inodes and buffers and build up the + * list of intents which need to be processed. Here we process the intents and + * clean up the on disk unlinked inode lists. This is separated from the first + * part of recovery so that the root and real-time bitmap inodes can be read in + * from disk in between the two stages. This is necessary so that we can free + * space in the real-time portion of the file system. */ int xlog_recover_finish( struct xlog *log) { - /* - * Now we're ready to do the transactions needed for the - * rest of recovery. Start with completing all the extent - * free intent records and then process the unlinked inode - * lists. At this point, we essentially run in normal mode - * except that we're still performing recovery actions - * rather than accepting new requests. - */ - if (log->l_flags & XLOG_RECOVERY_NEEDED) { - int error; - error = xlog_recover_process_intents(log); - if (error) { - /* - * Cancel all the unprocessed intent items now so that - * we don't leave them pinned in the AIL. This can - * cause the AIL to livelock on the pinned item if - * anyone tries to push the AIL (inode reclaim does - * this) before we get around to xfs_log_mount_cancel. - */ - xlog_recover_cancel_intents(log); - xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); - xfs_alert(log->l_mp, "Failed to recover intents"); - return error; - } + int error; + error = xlog_recover_process_intents(log); + if (error) { /* - * Sync the log to get all the intents out of the AIL. - * This isn't absolutely necessary, but it helps in - * case the unlink transactions would have problems - * pushing the intents out of the way. + * Cancel all the unprocessed intent items now so that we don't + * leave them pinned in the AIL. This can cause the AIL to + * livelock on the pinned item if anyone tries to push the AIL + * (inode reclaim does this) before we get around to + * xfs_log_mount_cancel. */ - xfs_log_force(log->l_mp, XFS_LOG_SYNC); - - xlog_recover_process_iunlinks(log); + xlog_recover_cancel_intents(log); + xfs_alert(log->l_mp, "Failed to recover intents"); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + return error; + } - xlog_recover_check_summary(log); + /* + * Sync the log to get all the intents out of the AIL. This isn't + * absolutely necessary, but it helps in case the unlink transactions + * would have problems pushing the intents out of the way. + */ + xfs_log_force(log->l_mp, XFS_LOG_SYNC); + xlog_recover_process_iunlinks(log); - xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", - log->l_mp->m_logname ? log->l_mp->m_logname - : "internal"); - log->l_flags &= ~XLOG_RECOVERY_NEEDED; - } else { - xfs_info(log->l_mp, "Ending clean mount"); - } + xlog_recover_check_summary(log); return 0; } From patchwork Wed Jul 14 03:19:53 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375865 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 84F07C11F68 for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 5608C613B0 for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237503AbhGNDW4 (ORCPT ); Tue, 13 Jul 2021 23:22:56 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:56840 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237536AbhGNDWz (ORCPT ); Tue, 13 Jul 2021 23:22:55 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id D2A1C1045433 for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IJr-8T for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRJ-00Ay92-0X for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 4/9] xfs: convert log flags to an operational state field Date: Wed, 14 Jul 2021 13:19:53 +1000 Message-Id: <20210714031958.2614411-5-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=Klpj2nW2cWQEV4EXp2IA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner log->l_flags doesn't actually contain "flags" as such, it contains operational state information that can change at runtime. For the shutdown state, this at least should be an atomic bit because it is read without holding locks in many places and so using atomic bitops for the state field modifications makes sense. This allows us to use things like test_and_set_bit() on state changes (e.g. setting XLOG_TAIL_WARN) to avoid races in setting the state when we aren't holding locks. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 58 ++++++++++++++++------------------------ fs/xfs/xfs_log.h | 1 - fs/xfs/xfs_log_priv.h | 34 +++++++++++++++-------- fs/xfs/xfs_log_recover.c | 6 ++--- fs/xfs/xfs_super.c | 2 +- 5 files changed, 50 insertions(+), 51 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6760608642cc..54fff0d41fd0 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -299,7 +299,7 @@ xlog_grant_head_check( int free_bytes; int error = 0; - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + ASSERT(!xlog_in_recovery(log)); /* * If there are other waiters on the queue then give them a chance at @@ -552,6 +552,7 @@ xfs_log_mount( xfs_daddr_t blk_offset, int num_bblks) { + struct xlog *log; bool fatal = xfs_sb_version_hascrc(&mp->m_sb); int error = 0; int min_logfsbs; @@ -566,11 +567,12 @@ xfs_log_mount( ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } - mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); - if (IS_ERR(mp->m_log)) { - error = PTR_ERR(mp->m_log); + log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (IS_ERR(log)) { + error = PTR_ERR(log); goto out; } + mp->m_log = log; /* * Validate the given log space and drop a critical message via syslog @@ -635,7 +637,7 @@ xfs_log_mount( xfs_warn(mp, "AIL initialisation failed: error %d", error); goto out_free_log; } - mp->m_log->l_ailp = mp->m_ail; + log->l_ailp = mp->m_ail; /* * skip log recovery on a norecovery mount. pretend it all @@ -647,39 +649,39 @@ xfs_log_mount( if (readonly) mp->m_flags &= ~XFS_MOUNT_RDONLY; - error = xlog_recover(mp->m_log); + error = xlog_recover(log); if (readonly) mp->m_flags |= XFS_MOUNT_RDONLY; if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); - xlog_recover_cancel(mp->m_log); + xlog_recover_cancel(log); goto out_destroy_ail; } } - error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj, + error = xfs_sysfs_init(&log->l_kobj, &xfs_log_ktype, &mp->m_kobj, "log"); if (error) goto out_destroy_ail; /* Normal transactions can now occur */ - mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; + clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); /* * Now the log has been fully initialised and we know were our * space grant counters are, we can initialise the permanent ticket * needed for delayed logging to work. */ - xlog_cil_init_post_recovery(mp->m_log); + xlog_cil_init_post_recovery(log); return 0; out_destroy_ail: xfs_trans_ail_destroy(mp); out_free_log: - xlog_dealloc_log(mp->m_log); + xlog_dealloc_log(log); out: return error; } @@ -731,7 +733,7 @@ xfs_log_mount_finish( * mount failure occurs. */ mp->m_super->s_flags |= SB_ACTIVE; - if (log->l_flags & XLOG_RECOVERY_NEEDED) + if (xlog_recovery_needed(log)) error = xlog_recover_finish(log); if (!error) xfs_log_work_queue(mp); @@ -747,7 +749,7 @@ xfs_log_mount_finish( * Don't push in the error case because the AIL may have pending intents * that aren't removed until recovery is cancelled. */ - if (log->l_flags & XLOG_RECOVERY_NEEDED) { + if (xlog_recovery_needed(log)) { if (!error) { xfs_log_force(mp, XFS_LOG_SYNC); xfs_ail_push_all_sync(mp->m_ail); @@ -759,7 +761,7 @@ xfs_log_mount_finish( } xfs_buftarg_drain(mp->m_ddev_targp); - log->l_flags &= ~XLOG_RECOVERY_NEEDED; + clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); if (readonly) mp->m_flags |= XFS_MOUNT_RDONLY; @@ -1036,7 +1038,7 @@ xfs_log_space_wake( return; if (!list_empty_careful(&log->l_write_head.waiters)) { - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + ASSERT(!xlog_in_recovery(log)); spin_lock(&log->l_write_head.lock); free_bytes = xlog_space_left(log, &log->l_write_head.grant); @@ -1045,7 +1047,7 @@ xfs_log_space_wake( } if (!list_empty_careful(&log->l_reserve_head.waiters)) { - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + ASSERT(!xlog_in_recovery(log)); spin_lock(&log->l_reserve_head.lock); free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); @@ -1400,7 +1402,7 @@ xlog_alloc_log( log->l_logBBstart = blk_offset; log->l_logBBsize = num_bblks; log->l_covered_state = XLOG_STATE_COVER_IDLE; - log->l_flags |= XLOG_ACTIVE_RECOVERY; + set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); log->l_prev_block = -1; @@ -3527,17 +3529,15 @@ xlog_verify_grant_tail( xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { if (cycle - 1 != tail_cycle && - !(log->l_flags & XLOG_TAIL_WARN)) { + !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "%s: cycle - 1 != tail_cycle", __func__); - log->l_flags |= XLOG_TAIL_WARN; } if (space > BBTOB(tail_blocks) && - !(log->l_flags & XLOG_TAIL_WARN)) { + !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, "%s: space > BBTOB(tail_blocks)", __func__); - log->l_flags |= XLOG_TAIL_WARN; } } } @@ -3704,8 +3704,7 @@ xfs_log_force_umount( * If this happens during log recovery, don't worry about * locking; the log isn't open for business yet. */ - if (!log || - log->l_flags & XLOG_ACTIVE_RECOVERY) { + if (!log || xlog_in_recovery(log)) { mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; if (mp->m_sb_bp) mp->m_sb_bp->b_flags |= XBF_DONE; @@ -3742,10 +3741,8 @@ xfs_log_force_umount( * Mark the log and the iclogs with IO error flags to prevent any * further log IO from being issued or completed. */ - if (!(log->l_flags & XLOG_IO_ERROR)) { - log->l_flags |= XLOG_IO_ERROR; + if (!test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) retval = 1; - } spin_unlock(&log->l_icloglock); /* @@ -3832,12 +3829,3 @@ xfs_log_check_lsn( return valid; } - -bool -xfs_log_in_recovery( - struct xfs_mount *mp) -{ - struct xlog *log = mp->m_log; - - return log->l_flags & XLOG_ACTIVE_RECOVERY; -} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 813b972e9788..4c5c8a7db1d9 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -138,7 +138,6 @@ void xfs_log_work_queue(struct xfs_mount *mp); int xfs_log_quiesce(struct xfs_mount *mp); void xfs_log_clean(struct xfs_mount *mp); bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); -bool xfs_log_in_recovery(struct xfs_mount *); xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index bf05763ba8df..6c2f88e06ac3 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -11,15 +11,6 @@ struct xlog; struct xlog_ticket; struct xfs_mount; -/* - * Flags for log structure - */ -#define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ -#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ -#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being - shutdown */ -#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ - /* * get client id from packed copy. * @@ -397,7 +388,7 @@ struct xlog { struct xfs_buftarg *l_targ; /* buftarg of log */ struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ struct delayed_work l_work; /* background flush work */ - uint l_flags; + long l_opstate; /* operational state */ uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ struct list_head *l_buf_cancel_table; int l_iclog_hsize; /* size of iclog header */ @@ -451,10 +442,31 @@ struct xlog { #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) +/* + * Bits for operational state + */ +#define XLOG_ACTIVE_RECOVERY 0 /* in the middle of recovery */ +#define XLOG_RECOVERY_NEEDED 1 /* log was recovered */ +#define XLOG_IO_ERROR 2 /* log hit an I/O error, and being + shutdown */ +#define XLOG_TAIL_WARN 3 /* log tail verify warning issued */ + +static inline bool +xlog_recovery_needed(struct xlog *log) +{ + return test_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); +} + +static inline bool +xlog_in_recovery(struct xlog *log) +{ + return test_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); +} + static inline bool xlog_is_shutdown(struct xlog *log) { - return (log->l_flags & XLOG_IO_ERROR); + return test_bit(XLOG_IO_ERROR, &log->l_opstate); } /* common routines */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index c384ecdd6389..5db3fb184fbe 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3326,7 +3326,7 @@ xlog_do_recover( xlog_recover_check_summary(log); /* Normal transactions can now occur */ - log->l_flags &= ~XLOG_ACTIVE_RECOVERY; + clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); return 0; } @@ -3410,7 +3410,7 @@ xlog_recover( : "internal"); error = xlog_do_recover(log, head_blk, tail_blk); - log->l_flags |= XLOG_RECOVERY_NEEDED; + set_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); } return error; } @@ -3460,7 +3460,7 @@ void xlog_recover_cancel( struct xlog *log) { - if (log->l_flags & XLOG_RECOVERY_NEEDED) + if (xlog_recovery_needed(log)) xlog_recover_cancel_intents(log); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2c9e26a44546..29bec1f6476e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -734,7 +734,7 @@ xfs_fs_drop_inode( * that. See the comment for this inode flag. */ if (ip->i_flags & XFS_IRECOVERY) { - ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); + ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); return 0; } From patchwork Wed Jul 14 03:19:54 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375867 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id B5C87C11F6A for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 9D866613B0 for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237536AbhGNDW4 (ORCPT ); Tue, 13 Jul 2021 23:22:56 -0400 Received: from mail106.syd.optusnet.com.au ([211.29.132.42]:50895 "EHLO mail106.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237513AbhGNDWz (ORCPT ); Tue, 13 Jul 2021 23:22:55 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail106.syd.optusnet.com.au (Postfix) with ESMTPS id E95CD80C3E1 for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IJu-9c for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRJ-00Ay95-1k for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 5/9] xfs: make forced shutdown processing atomic Date: Wed, 14 Jul 2021 13:19:54 +1000 Message-Id: <20210714031958.2614411-6-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=50gOBdlLeu0FhYWuWkEA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner The running of a forced shutdown is a bit of a mess. It does racy checks for XFS_MOUNT_SHUTDOWN in xfs_do_force_shutdown(), then does more racy checks in xfs_log_force_unmount() before finally setting XFS_MOUNT_SHUTDOWN and XLOG_IO_ERROR under the log->icloglock. Move the checking and setting of XFS_MOUNT_SHUTDOWN into xfs_do_force_shutdown() so we only process a shutdown once and once only. Serialise this with the mp->m_sb_lock spinlock so that the state change is atomic and won't race. Move all the mount specific shutdown state changes from xfs_log_force_unmount() to xfs_do_force_shutdown() so they are done atomically with setting XFS_MOUNT_SHUTDOWN. Then get rid of the racy xlog_is_shutdown() check from xlog_force_shutdown(), and gate the log shutdown on the test_and_set_bit(XLOG_IO_ERROR) test under the icloglock. This means that the log is shutdown once and once only, and code that needs to prevent races with shutdown can do so by holding the icloglock and checking the return value of xlog_is_shutdown(). This results in a predicable shutdown execution process - we set the shutdown flags once and process the shutdown once rather than the current "as many concurrent shutdowns as can race to the flag setting" situation we have now. Also, now that shutdown is atomic, alway emit a stack trace when the error level for the filesystem is high enough. This means that we always get a stack trace when trying to diagnose the cause of shutdowns in the field, rather than just for SHUTDOWN_CORRUPT_INCORE cases. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_fsops.c | 63 ++++++++++++++-------------- fs/xfs/xfs_log.c | 100 ++++++++++++++++++++------------------------- fs/xfs/xfs_log.h | 2 +- 3 files changed, 76 insertions(+), 89 deletions(-) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 6ed29b158312..7a2f4feacc35 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -511,6 +511,11 @@ xfs_fs_goingdown( * consistent. We don't do an unmount here; just shutdown the shop, make sure * that absolutely nothing persistent happens to this filesystem after this * point. + * + * The shutdown state change is atomic, resulting in the first and only the + * first shutdown call processing the shutdown. This means we only shutdown the + * log once as it requires, and we don't spam the logs when multiple concurrent + * shutdowns race to set the shutdown flags. */ void xfs_do_force_shutdown( @@ -519,48 +524,40 @@ xfs_do_force_shutdown( char *fname, int lnnum) { - bool logerror = flags & SHUTDOWN_LOG_IO_ERROR; + int tag; + const char *why; - /* - * No need to duplicate efforts. - */ - if (XFS_FORCED_SHUTDOWN(mp) && !logerror) - return; - - /* - * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't - * queue up anybody new on the log reservations, and wakes up - * everybody who's sleeping on log reservations to tell them - * the bad news. - */ - if (xfs_log_force_umount(mp, logerror)) - return; - - if (flags & SHUTDOWN_FORCE_UMOUNT) { - xfs_alert(mp, -"User initiated shutdown (0x%x) received. Shutting down filesystem", - flags); + spin_lock(&mp->m_sb_lock); + if (XFS_FORCED_SHUTDOWN(mp)) { + spin_unlock(&mp->m_sb_lock); return; } + mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; + if (mp->m_sb_bp) + mp->m_sb_bp->b_flags |= XBF_DONE; + spin_unlock(&mp->m_sb_lock); + + if (flags & SHUTDOWN_FORCE_UMOUNT) + xfs_alert(mp, "User initiated shutdown received."); - if (flags & SHUTDOWN_CORRUPT_INCORE) { - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, -"Corruption of in-memory data (0x%x) detected at %pS (%s:%d). Shutting down filesystem", - flags, __return_address, fname, lnnum); - if (XFS_ERRLEVEL_HIGH <= xfs_error_level) - xfs_stack_trace(); - } else if (logerror) { - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, -"Log I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem", - flags, __return_address, fname, lnnum); + if (xlog_force_shutdown(mp->m_log, flags)) { + tag = XFS_PTAG_SHUTDOWN_LOGERROR; + why = "Log I/O Error"; + } else if (flags & SHUTDOWN_CORRUPT_INCORE) { + tag = XFS_PTAG_SHUTDOWN_CORRUPT; + why = "Corruption of in-memory data"; } else { - xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, -"I/O error (0x%x) detected at %pS (%s:%d). Shutting down filesystem", - flags, __return_address, fname, lnnum); + tag = XFS_PTAG_SHUTDOWN_IOERROR; + why = "Metadata I/O Error"; } + xfs_alert_tag(mp, tag, +"%s (0x%x) detected at %pS (%s:%d). Shutting down filesystem.", + why, flags, __return_address, fname, lnnum); xfs_alert(mp, "Please unmount the filesystem and rectify the problem(s)"); + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); } /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 54fff0d41fd0..f996f51c6cee 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3673,76 +3673,66 @@ xlog_verify_iclog( #endif /* - * This is called from xfs_force_shutdown, when we're forcibly - * shutting down the filesystem, typically because of an IO error. - * Our main objectives here are to make sure that: - * a. if !logerror, flush the logs to disk. Anything modified - * after this is ignored. - * b. the filesystem gets marked 'SHUTDOWN' for all interested - * parties to find out, 'atomically'. - * c. those who're sleeping on log reservations, pinned objects and - * other resources get woken up, and be told the bad news. - * d. nothing new gets queued up after (b) and (c) are done. + * Perform a forced shutdown on the log. This should be called once and once + * only by the high level filesystem shutdown code to shut the log subsystem + * down cleanly. * - * Note: for the !logerror case we need to flush the regions held in memory out - * to disk first. This needs to be done before the log is marked as shutdown, - * otherwise the iclog writes will fail. + * Our main objectives here are to make sure that: + * a. if the shutdown was not due to a log IO error, flush the logs to + * disk. Anything modified after this is ignored. + * b. the log gets atomically marked 'XLOG_IO_ERROR' for all interested + * parties to find out. Nothing new gets queued after this is done. + * c. Tasks sleeping on log reservations, pinned objects and + * other resources get woken up. * - * Return non-zero if log shutdown transition had already happened. + * Return true if the shutdown cause was a log IO error and we actually shut the + * log down. */ -int -xfs_log_force_umount( - struct xfs_mount *mp, - int logerror) +bool +xlog_force_shutdown( + struct xlog *log, + int shutdown_flags) { - struct xlog *log; - int retval = 0; - - log = mp->m_log; + bool log_error = (shutdown_flags & SHUTDOWN_LOG_IO_ERROR); /* - * If this happens during log recovery, don't worry about - * locking; the log isn't open for business yet. + * If this happens during log recovery then we aren't using the runtime + * log mechanisms yet so there's nothing to shut down. */ - if (!log || xlog_in_recovery(log)) { - mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - if (mp->m_sb_bp) - mp->m_sb_bp->b_flags |= XBF_DONE; - return 0; - } + if (!log || xlog_in_recovery(log)) + return false; - /* - * Somebody could've already done the hard work for us. - * No need to get locks for this. - */ - if (logerror && xlog_is_shutdown(log)) - return 1; + ASSERT(!xlog_is_shutdown(log)); /* * Flush all the completed transactions to disk before marking the log - * being shut down. We need to do it in this order to ensure that - * completed operations are safely on disk before we shut down, and that - * we don't have to issue any buffer IO after the shutdown flags are set - * to guarantee this. + * being shut down. We need to do this first as shutting down the log + * before the force will prevent the log force from flushing the iclogs + * to disk. + * + * Re-entry due to a log IO error shutdown during the log force is + * prevented by the atomicity of higher level shutdown code. */ - if (!logerror) - xfs_log_force(mp, XFS_LOG_SYNC); + if (!log_error) + xfs_log_force(log->l_mp, XFS_LOG_SYNC); /* - * mark the filesystem and the as in a shutdown state and wake - * everybody up to tell them the bad news. + * Atomically set the shutdown state. If the shutdown state is already + * set, there someone else is performing the shutdown and so we are done + * here. This should never happen because we should only ever get called + * once by the first shutdown caller. + * + * Much of the log state machine transitions assume that shutdown state + * cannot change once they hold the log->l_icloglock. Hence we need to + * hold that lock here, even though we use the atomic test_and_set_bit() + * operation to set the shutdown state. */ spin_lock(&log->l_icloglock); - mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - if (mp->m_sb_bp) - mp->m_sb_bp->b_flags |= XBF_DONE; - - /* - * Mark the log and the iclogs with IO error flags to prevent any - * further log IO from being issued or completed. - */ - if (!test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) - retval = 1; + if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) { + spin_unlock(&log->l_icloglock); + ASSERT(0); + return false; + } spin_unlock(&log->l_icloglock); /* @@ -3766,7 +3756,7 @@ xfs_log_force_umount( spin_unlock(&log->l_cilp->xc_push_lock); xlog_state_do_callback(log); - return retval; + return log_error; } STATIC int diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 4c5c8a7db1d9..3f680f0c9744 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -125,7 +125,6 @@ int xfs_log_reserve(struct xfs_mount *mp, bool permanent); int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); void xfs_log_unmount(struct xfs_mount *mp); -int xfs_log_force_umount(struct xfs_mount *mp, int logerror); bool xfs_log_writable(struct xfs_mount *mp); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); @@ -140,5 +139,6 @@ void xfs_log_clean(struct xfs_mount *mp); bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes); +bool xlog_force_shutdown(struct xlog *log, int shutdown_flags); #endif /* __XFS_LOG_H__ */ From patchwork Wed Jul 14 03:19:55 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375877 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id DC476C11F66 for ; Wed, 14 Jul 2021 03:20:19 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id BD9AF60720 for ; Wed, 14 Jul 2021 03:20:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237729AbhGNDXJ (ORCPT ); Tue, 13 Jul 2021 23:23:09 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:41252 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237718AbhGNDXI (ORCPT ); Tue, 13 Jul 2021 23:23:08 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id E7F9D86481A for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IJx-Am for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRJ-00Ay98-2p for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 6/9] xfs: rework xlog_state_do_callback() Date: Wed, 14 Jul 2021 13:19:55 +1000 Message-Id: <20210714031958.2614411-7-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=VwQbUJbxAAAA:8 a=p1iSVuLfdZg6vMKyjxMA:9 a=AjGcO6oz07-iQ99wixmX:22 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Clean it up a bit by factoring and rearranging some of the code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 96 ++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f996f51c6cee..302c1ce27974 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2760,56 +2760,66 @@ xlog_state_iodone_process_iclog( } } +/* + * Loop over all the iclogs, running attached callbacks on them. Return true if + * we ran any callbacks, indicating that we dropped the icloglock. + */ +static bool +xlog_state_do_iclog_callbacks( + struct xlog *log) + __releases(&log->l_icloglock) + __acquires(&log->l_icloglock) +{ + struct xlog_in_core *first_iclog = log->l_iclog; + struct xlog_in_core *iclog = first_iclog; + bool ran_callback = false; + + do { + LIST_HEAD(cb_list); + + if (!xlog_is_shutdown(log)) { + if (xlog_state_iodone_process_iclog(log, iclog)) + break; + if (iclog->ic_state != XLOG_STATE_CALLBACK) { + iclog = iclog->ic_next; + continue; + } + } + list_splice_init(&iclog->ic_callbacks, &cb_list); + spin_unlock(&log->l_icloglock); + + trace_xlog_iclog_callbacks_start(iclog, _RET_IP_); + xlog_cil_process_committed(&cb_list); + trace_xlog_iclog_callbacks_done(iclog, _RET_IP_); + ran_callback = true; + + spin_lock(&log->l_icloglock); + if (xlog_is_shutdown(log)) + wake_up_all(&iclog->ic_force_wait); + else + xlog_state_clean_iclog(log, iclog); + iclog = iclog->ic_next; + } while (iclog != first_iclog); + + return ran_callback; +} + + +/* + * Loop running iclog completion callbacks until there are no more iclogs in a + * state that can run callbacks. + */ STATIC void xlog_state_do_callback( struct xlog *log) { - struct xlog_in_core *iclog; - struct xlog_in_core *first_iclog; - bool cycled_icloglock; int flushcnt = 0; int repeats = 0; spin_lock(&log->l_icloglock); - do { - /* - * Scan all iclogs starting with the one pointed to by the - * log. Reset this starting point each time the log is - * unlocked (during callbacks). - * - * Keep looping through iclogs until one full pass is made - * without running any callbacks. - */ - cycled_icloglock = false; - first_iclog = log->l_iclog; - iclog = first_iclog; - - do { - LIST_HEAD(cb_list); - - if (!xlog_is_shutdown(log)) { - if (xlog_state_iodone_process_iclog(log, iclog)) - break; - if (iclog->ic_state != XLOG_STATE_CALLBACK) { - iclog = iclog->ic_next; - continue; - } - } - list_splice_init(&iclog->ic_callbacks, &cb_list); - spin_unlock(&log->l_icloglock); - - trace_xlog_iclog_callbacks_start(iclog, _RET_IP_); - xlog_cil_process_committed(&cb_list); - trace_xlog_iclog_callbacks_done(iclog, _RET_IP_); - cycled_icloglock = true; - - spin_lock(&log->l_icloglock); - if (xlog_is_shutdown(log)) - wake_up_all(&iclog->ic_force_wait); - else - xlog_state_clean_iclog(log, iclog); - iclog = iclog->ic_next; - } while (iclog != first_iclog); + while (xlog_state_do_iclog_callbacks(log)) { + if (xlog_is_shutdown(log)) + break; if (++repeats > 5000) { flushcnt += repeats; @@ -2818,7 +2828,7 @@ xlog_state_do_callback( "%s: possible infinite loop (%d iterations)", __func__, flushcnt); } - } while (!xlog_is_shutdown(log) && cycled_icloglock); + } if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE || xlog_is_shutdown(log)) From patchwork Wed Jul 14 03:19:56 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375869 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0152CC11F6B for ; Wed, 14 Jul 2021 03:20:07 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E4179613AF for ; Wed, 14 Jul 2021 03:20:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237513AbhGNDW4 (ORCPT ); Tue, 13 Jul 2021 23:22:56 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:56874 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237559AbhGNDWz (ORCPT ); Tue, 13 Jul 2021 23:22:55 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id E76C3104537C for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IK0-Bx for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRJ-00Ay9B-47 for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 7/9] xfs: separate out log shutdown callback processing Date: Wed, 14 Jul 2021 13:19:56 +1000 Message-Id: <20210714031958.2614411-8-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=Tu+Yewfh c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=eKOPN3Rt75fyAB7gLQ4A:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner The iclog callback processing done during a forced log shutdown has different logic to normal runtime IO completion callback processing. Separate out eh shutdown callbacks into their own function and call that from the shutdown code instead. We don't need this shutdown specific logic in the normal runtime completion code - we'll always run the shutdown version on shutdown, and it will do what shutdown needs regardless of whether there are racing IO completion callbacks scheduled or in progress. Hence we can also simplify the normal IO completion callpath and only abort if shutdown occurred while we actively were processing callbacks. Further, separating out the IO completion logic from the shutdown logic avoids callback race conditions from being triggered by log IO completion after a shutdown. IO completion will now only run callbacks on iclogs that are in the correct state for a callback to be run, avoiding the possibility of running callbacks on a referenced iclog that hasn't yet been submitted for IO. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 53 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 302c1ce27974..4d72d9efed7c 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -487,6 +487,32 @@ xfs_log_reserve( return error; } +/* + * Run all the pending iclog callbacks and wake log force waiters and iclog + * space waiters so they can process the newly set shutdown state. We really + * don't care what order we process callbacks here because the log is shut down + * and so state cannot change on disk anymore. + */ +static void +xlog_state_shutdown_callbacks( + struct xlog *log) +{ + struct xlog_in_core *iclog; + LIST_HEAD(cb_list); + + spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + do { + list_splice_init(&iclog->ic_callbacks, &cb_list); + wake_up_all(&iclog->ic_force_wait); + } while ((iclog = iclog->ic_next) != log->l_iclog); + + wake_up_all(&log->l_flush_wait); + spin_unlock(&log->l_icloglock); + + xlog_cil_process_committed(&cb_list); +} + static bool __xlog_state_release_iclog( struct xlog *log, @@ -2762,7 +2788,10 @@ xlog_state_iodone_process_iclog( /* * Loop over all the iclogs, running attached callbacks on them. Return true if - * we ran any callbacks, indicating that we dropped the icloglock. + * we ran any callbacks, indicating that we dropped the icloglock. We don't need + * to handle transient shutdown state here at all because + * xlog_state_shutdown_callbacks() will be run to do the necessary shutdown + * cleanup of the callbacks. */ static bool xlog_state_do_iclog_callbacks( @@ -2777,13 +2806,11 @@ xlog_state_do_iclog_callbacks( do { LIST_HEAD(cb_list); - if (!xlog_is_shutdown(log)) { - if (xlog_state_iodone_process_iclog(log, iclog)) - break; - if (iclog->ic_state != XLOG_STATE_CALLBACK) { - iclog = iclog->ic_next; - continue; - } + if (xlog_state_iodone_process_iclog(log, iclog)) + break; + if (iclog->ic_state != XLOG_STATE_CALLBACK) { + iclog = iclog->ic_next; + continue; } list_splice_init(&iclog->ic_callbacks, &cb_list); spin_unlock(&log->l_icloglock); @@ -2794,10 +2821,7 @@ xlog_state_do_iclog_callbacks( ran_callback = true; spin_lock(&log->l_icloglock); - if (xlog_is_shutdown(log)) - wake_up_all(&iclog->ic_force_wait); - else - xlog_state_clean_iclog(log, iclog); + xlog_state_clean_iclog(log, iclog); iclog = iclog->ic_next; } while (iclog != first_iclog); @@ -2830,8 +2854,7 @@ xlog_state_do_callback( } } - if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE || - xlog_is_shutdown(log)) + if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE) wake_up_all(&log->l_flush_wait); spin_unlock(&log->l_icloglock); @@ -3764,7 +3787,7 @@ xlog_force_shutdown( spin_lock(&log->l_cilp->xc_push_lock); wake_up_all(&log->l_cilp->xc_commit_wait); spin_unlock(&log->l_cilp->xc_push_lock); - xlog_state_do_callback(log); + xlog_state_shutdown_callbacks(log); return log_error; } From patchwork Wed Jul 14 03:19:57 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375871 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1442AC11F66 for ; Wed, 14 Jul 2021 03:20:09 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id F0F7C613AC for ; Wed, 14 Jul 2021 03:20:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237728AbhGNDW7 (ORCPT ); Tue, 13 Jul 2021 23:22:59 -0400 Received: from mail110.syd.optusnet.com.au ([211.29.132.97]:56546 "EHLO mail110.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237559AbhGNDW6 (ORCPT ); Tue, 13 Jul 2021 23:22:58 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail110.syd.optusnet.com.au (Postfix) with ESMTPS id E9C34105F88 for ; Wed, 14 Jul 2021 13:20:01 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IK3-Cu for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRJ-00Ay9E-5B for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 8/9] xfs: don't run shutdown callbacks on active iclogs Date: Wed, 14 Jul 2021 13:19:57 +1000 Message-Id: <20210714031958.2614411-9-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=YKPhNiOx c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=TwccxBiVExJpV-d7L8IA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner When the log is shutdown, it currently walks all the iclogs and runs callbacks that are attached to the iclogs, regardless of whether the iclog is queued for IO completion or not. This creates a problem for contexts attaching callbacks to iclogs in that a racing shutdown can run the callbacks even before the attaching context has finished processing the iclog and releasing it for IO submission. If the callback processing of the iclog frees the structure that is attached to the iclog, then this leads to an UAF scenario that can only be protected against by holding the icloglock from the point callbacks are attached through to the release of the iclog. While we currently do this, it is not practical or sustainable. Hence we need to make shutdown processing the responsibility of the context that holds active references to the iclog. We know that the contexts attaching callbacks to the iclog must have active references to the iclog, and that means they must be in either ACTIVE or WANT_SYNC states. xlog_state_do_callback() will skip over iclogs in these states -except- when the log is shut down. xlog_state_do_callback() checks the state of the iclogs while holding the icloglock, therefore the reference count/state change that occurs in xlog_state_release_iclog() after the callbacks are atomic w.r.t. shutdown processing. We can't push the responsibility of callback cleanup onto the CIL context because we can have ACTIVE iclogs that have callbacks attached that have already been released. Hence we really need to internalise the cleanup of callbacks into xlog_state_release_iclog() processing. Indeed, we already have that internalisation via: xlog_state_release_iclog drop last reference ->SYNCING xlog_sync xlog_write_iclog if (log_is_shutdown) xlog_state_done_syncing() xlog_state_do_callback() The problem is that xlog_state_release_iclog() aborts before doing anything if the log is already shut down. It assumes that the callbacks have already been cleaned up, and it doesn't need to do any cleanup. Hence the fix is to remove the xlog_is_shutdown() check from xlog_state_release_iclog() so that reference counts are correctly released from the iclogs, and when the reference count is zero we always transition to SYNCING if the log is shut down. Hence we'll always enter the xlog_sync() path in a shutdown and eventually end up erroring out the iclog IO and running xlog_state_do_callback() to process the callbacks attached to the iclog. This allows us to stop processing referenced ACTIVE/WANT_SYNC iclogs directly in the shutdown code, and in doing so gets rid of the UAF vector that currently exists. This then decouples the adding of callbacks to the iclogs from xlog_state_release_iclog() as we guarantee that xlog_state_release_iclog() will process the callbacks if the log has been shut down before xlog_state_release_iclog() has been called. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 43 +++++++++++++++++++++++++++++++++++++------ fs/xfs/xfs_log_cil.c | 15 +++++++-------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4d72d9efed7c..01c20b42b2fc 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -41,6 +41,8 @@ xlog_dealloc_log( /* local state machine functions */ STATIC void xlog_state_done_syncing( struct xlog_in_core *iclog); +STATIC void xlog_state_do_callback( + struct xlog *log); STATIC int xlog_state_get_iclog_space( struct xlog *log, @@ -492,6 +494,11 @@ xfs_log_reserve( * space waiters so they can process the newly set shutdown state. We really * don't care what order we process callbacks here because the log is shut down * and so state cannot change on disk anymore. + * + * We avoid processing actively referenced iclogs so that we don't run callbacks + * while the iclog owner might still be preparing the iclog for IO submssion. + * These will be caught by xlog_state_iclog_release() and call this function + * again to process any callbacks that may have been added to that iclog. */ static void xlog_state_shutdown_callbacks( @@ -503,7 +510,12 @@ xlog_state_shutdown_callbacks( spin_lock(&log->l_icloglock); iclog = log->l_iclog; do { + if (atomic_read(&iclog->ic_refcnt)) { + /* Reference holder will re-run iclog callbacks. */ + continue; + } list_splice_init(&iclog->ic_callbacks, &cb_list); + wake_up_all(&iclog->ic_write_wait); wake_up_all(&iclog->ic_force_wait); } while ((iclog = iclog->ic_next) != log->l_iclog); @@ -514,7 +526,7 @@ xlog_state_shutdown_callbacks( } static bool -__xlog_state_release_iclog( +xlog_state_want_sync( struct xlog *log, struct xlog_in_core *iclog) { @@ -537,27 +549,46 @@ __xlog_state_release_iclog( } /* - * Flush iclog to disk if this is the last reference to the given iclog and the - * it is in the WANT_SYNC state. + * Release the active reference to the iclog. If this is the last reference to + * the iclog being dropped, check if the caller wants to be synced to disk and + * initiate IO submission. If the log has been shut down, then we need to run + * callback processing on this iclog as shutdown callback processing skips + * actively referenced iclogs. */ int xlog_state_release_iclog( struct xlog *log, struct xlog_in_core *iclog) + __releases(&log->l_icloglock) + __acquires(&log->l_icloglock) { lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); - if (xlog_is_shutdown(log)) + if (!atomic_dec_and_test(&iclog->ic_refcnt)) + goto out_check_shutdown; + + if (xlog_is_shutdown(log)) { + /* + * No more references to this iclog, so process the pending + * iclog callbacks that were waiting on the release of this + * iclog. + */ + spin_unlock(&log->l_icloglock); + xlog_state_shutdown_callbacks(log); + spin_lock(&log->l_icloglock); return -EIO; + } - if (atomic_dec_and_test(&iclog->ic_refcnt) && - __xlog_state_release_iclog(log, iclog)) { + if (xlog_state_want_sync(log, iclog)) { spin_unlock(&log->l_icloglock); xlog_sync(log, iclog); spin_lock(&log->l_icloglock); } +out_check_shutdown: + if (xlog_is_shutdown(log)) + return -EIO; return 0; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 2c9d9bcd25cb..62967449fe8c 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -874,11 +874,10 @@ xlog_cil_push_work( xfs_log_ticket_ungrant(log, tic); /* - * Once we attach the ctx to the iclog, a shutdown can process the - * iclog, run the callbacks and free the ctx. The only thing preventing - * this potential UAF situation here is that we are holding the - * icloglock. Hence we cannot access the ctx once we have attached the - * callbacks and dropped the icloglock. + * Once we attach the ctx to the iclog, it is effectively owned by the + * iclog and we can only use it while we still have an active reference + * to the iclog. i.e. once we call xlog_state_release_iclog() we can no + * longer safely reference the ctx. */ spin_lock(&log->l_icloglock); if (xlog_is_shutdown(log)) { @@ -910,9 +909,6 @@ xlog_cil_push_work( * wakeup until this commit_iclog is written to disk. Hence we use the * iclog header lsn and compare it to the commit lsn to determine if we * need to wait on iclogs or not. - * - * NOTE: It is not safe to reference the ctx after this check as we drop - * the icloglock if we have to wait for completion of other iclogs. */ if (ctx->start_lsn != commit_lsn) { xfs_lsn_t plsn; @@ -942,6 +938,9 @@ xlog_cil_push_work( */ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; xlog_state_release_iclog(log, commit_iclog); + + /* Not safe to reference ctx now! */ + spin_unlock(&log->l_icloglock); return; From patchwork Wed Jul 14 03:19:58 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 12375879 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id B9130C07E95 for ; Wed, 14 Jul 2021 03:20:21 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 9DD3C613AC for ; Wed, 14 Jul 2021 03:20:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S237718AbhGNDXL (ORCPT ); Tue, 13 Jul 2021 23:23:11 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:44288 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S237733AbhGNDXL (ORCPT ); Tue, 13 Jul 2021 23:23:11 -0400 Received: from dread.disaster.area (pa49-181-34-10.pa.nsw.optusnet.com.au [49.181.34.10]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 11C27864827 for ; Wed, 14 Jul 2021 13:20:02 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92.3) (envelope-from ) id 1m3VRJ-006IK7-EH for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 Received: from dave by discord.disaster.area with local (Exim 4.94) (envelope-from ) id 1m3VRJ-00Ay9H-6D for linux-xfs@vger.kernel.org; Wed, 14 Jul 2021 13:20:01 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 9/9] xfs: log head and tail aren't reliable during shutdown Date: Wed, 14 Jul 2021 13:19:58 +1000 Message-Id: <20210714031958.2614411-10-david@fromorbit.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20210714031958.2614411-1-david@fromorbit.com> References: <20210714031958.2614411-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.3 cv=F8MpiZpN c=1 sm=1 tr=0 a=hdaoRb6WoHYrV466vVKEyw==:117 a=hdaoRb6WoHYrV466vVKEyw==:17 a=e_q4qTt1xDgA:10 a=20KFwNOVAAAA:8 a=pixFsiyS0xW5W-CbQLwA:9 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner I'm seeing assert failures from xlog_space_left() after a shutdown has begun that look like: XFS (dm-0): log I/O error -5 XFS (dm-0): xfs_do_force_shutdown(0x2) called from line 1338 of file fs/xfs/xfs_log.c. Return address = xlog_ioend_work+0x64/0xc0 XFS (dm-0): Log I/O Error Detected. XFS (dm-0): Shutting down filesystem. Please unmount the filesystem and rectify the problem(s) XFS (dm-0): xlog_space_left: head behind tail XFS (dm-0): tail_cycle = 6, tail_bytes = 2706944 XFS (dm-0): GH cycle = 6, GH bytes = 1633867 XFS: Assertion failed: 0, file: fs/xfs/xfs_log.c, line: 1310 ------------[ cut here ]------------ Call Trace: xlog_space_left+0xc3/0x110 xlog_grant_push_threshold+0x3f/0xf0 xlog_grant_push_ail+0x12/0x40 xfs_log_reserve+0xd2/0x270 ? __might_sleep+0x4b/0x80 xfs_trans_reserve+0x18b/0x260 ..... There are two things here. Firstly, after a shutdown, the log head and tail can be out of whack as things abort and release (or don't release) resources, so checking them for sanity doesn't make much sense. Secondly, xfs_log_reserve() can race with shutdown and so it can still fail like this even though it has already checked for a log shutdown before calling xlog_grant_push_ail(). So, before ASSERT failing in xlog_space_left(), make sure we haven't already shut down.... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 51 +++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 01c20b42b2fc..6617cdccaf00 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1272,16 +1272,18 @@ xlog_assign_tail_lsn( * wrap the tail, we should blow up. Rather than catch this case here, * we depend on other ASSERTions in other parts of the code. XXXmiken * - * This code also handles the case where the reservation head is behind - * the tail. The details of this case are described below, but the end - * result is that we return the size of the log as the amount of space left. + * If reservation head is behind the tail, we have a problem. Warn about it, + * but then treat it as if the log is empty. + * + * If the log is shut down, the head and tail may be invalid or out of whack, so + * shortcut invalidity asserts in this case so that we don't trigger them + * falsely. */ STATIC int xlog_space_left( struct xlog *log, atomic64_t *head) { - int free_bytes; int tail_bytes; int tail_cycle; int head_cycle; @@ -1291,29 +1293,30 @@ xlog_space_left( xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); tail_bytes = BBTOB(tail_bytes); if (tail_cycle == head_cycle && head_bytes >= tail_bytes) - free_bytes = log->l_logsize - (head_bytes - tail_bytes); - else if (tail_cycle + 1 < head_cycle) + return log->l_logsize - (head_bytes - tail_bytes); + if (tail_cycle + 1 < head_cycle) return 0; - else if (tail_cycle < head_cycle) { + + /* Ignore potential inconsistency when shutdown. */ + if (xlog_is_shutdown(log)) + return log->l_logsize; + + if (tail_cycle < head_cycle) { ASSERT(tail_cycle == (head_cycle - 1)); - free_bytes = tail_bytes - head_bytes; - } else { - /* - * The reservation head is behind the tail. - * In this case we just want to return the size of the - * log as the amount of space left. - */ - xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); - xfs_alert(log->l_mp, - " tail_cycle = %d, tail_bytes = %d", - tail_cycle, tail_bytes); - xfs_alert(log->l_mp, - " GH cycle = %d, GH bytes = %d", - head_cycle, head_bytes); - ASSERT(0); - free_bytes = log->l_logsize; + return tail_bytes - head_bytes; } - return free_bytes; + + /* + * The reservation head is behind the tail. In this case we just want to + * return the size of the log as the amount of space left. + */ + xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); + xfs_alert(log->l_mp, " tail_cycle = %d, tail_bytes = %d", + tail_cycle, tail_bytes); + xfs_alert(log->l_mp, " GH cycle = %d, GH bytes = %d", + head_cycle, head_bytes); + ASSERT(0); + return log->l_logsize; }