From patchwork Sun Dec 31 22:00:59 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507776 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CCC50C127 for ; Sun, 31 Dec 2023 22:01:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kqJ0m8o6" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 65AC7C433C7; Sun, 31 Dec 2023 22:01:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704060060; bh=HzvgwWrjyOecBSnPXSeGyCcn8JxFlNGUQaAmYh+kc+o=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=kqJ0m8o60JSGw01Q54/Fr2H9BVyIIkDv4ZC4fokK2wCXEOc0/VwG1oFpsCEDicI0x yHkYy82UebrkIMlWxvoKB6BiGQLaJQFxp53a8SCH3vEhHU46qu3Uzqj0nQqAY0Zm4G PUN90twJV0ZU9c9pOv2tEHLZy9jt1XZuoextpUhV1gxtZhRa0TK/C9yBft77mN5wrs 5UZ/kiYhh2KRwmUj0RSJbpoiPF2XhMa4mGtb5ADQkHaVKrifBa07Z9nGurFDmirelh Mt9GZYK/x9OqJE5neecS0aokFn8/B9C5w7iycdlsq7gqhnw8T16nEiZzHxCiz9UdPA 8NrF9FysvhIlg== Date: Sun, 31 Dec 2023 14:00:59 -0800 Subject: [PATCH 1/5] xfs: track deferred ops statistics From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404854737.1769671.8541528862677264303.stgit@frogsfrogsfrogs> In-Reply-To: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> References: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Track some basic statistics on how hard we're pushing the defer ops. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_defer.c | 14 ++++++++++++++ fs/xfs/xfs_trace.h | 19 +++++++++++++++++++ fs/xfs/xfs_trans.c | 3 +++ fs/xfs/xfs_trans.h | 7 +++++++ 4 files changed, 43 insertions(+) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index cd28b96b49ea9..bf83508edf822 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -617,6 +617,8 @@ xfs_defer_finish_one( /* Done with the dfp, free it. */ list_del(&dfp->dfp_list); kmem_cache_free(xfs_defer_pending_cache, dfp); + tp->t_dfops_nr--; + tp->t_dfops_finished++; out: if (ops->finish_cleanup) ops->finish_cleanup(tp, state, error); @@ -679,6 +681,9 @@ xfs_defer_finish_noroll( list_splice_init(&(*tp)->t_dfops, &dop_pending); + (*tp)->t_dfops_nr_max = max((*tp)->t_dfops_nr, + (*tp)->t_dfops_nr_max); + if (has_intents < 0) { error = has_intents; goto out_shutdown; @@ -720,6 +725,7 @@ xfs_defer_finish_noroll( xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); trace_xfs_defer_finish_error(*tp, error); xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); + (*tp)->t_dfops_nr = 0; xfs_defer_cancel(*tp); return error; } @@ -767,6 +773,7 @@ xfs_defer_cancel( trace_xfs_defer_cancel(tp, _RET_IP_); xfs_defer_trans_abort(tp, &tp->t_dfops); xfs_defer_cancel_list(mp, &tp->t_dfops); + tp->t_dfops_nr = 0; } /* @@ -830,6 +837,7 @@ xfs_defer_alloc( dfp->dfp_ops = ops; INIT_LIST_HEAD(&dfp->dfp_work); list_add_tail(&dfp->dfp_list, &tp->t_dfops); + tp->t_dfops_nr++; return dfp; } @@ -942,6 +950,12 @@ xfs_defer_move( struct xfs_trans *stp) { list_splice_init(&stp->t_dfops, &dtp->t_dfops); + dtp->t_dfops_nr += stp->t_dfops_nr; + dtp->t_dfops_nr_max = stp->t_dfops_nr_max; + dtp->t_dfops_finished = stp->t_dfops_finished; + stp->t_dfops_nr = 0; + stp->t_dfops_nr_max = 0; + stp->t_dfops_finished = 0; /* * Low free space mode was historically controlled by a dfops field. diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 906e35eef223d..6c99bf56184b0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2711,6 +2711,25 @@ TRACE_EVENT(xfs_btree_free_block, /* deferred ops */ struct xfs_defer_pending; +TRACE_EVENT(xfs_defer_stats, + TP_PROTO(struct xfs_trans *tp), + TP_ARGS(tp), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, max) + __field(unsigned int, finished) + ), + TP_fast_assign( + __entry->dev = tp->t_mountp->m_super->s_dev; + __entry->max = tp->t_dfops_nr_max; + __entry->finished = tp->t_dfops_finished; + ), + TP_printk("dev %d:%d max %u finished %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->max, + __entry->finished) +) + DECLARE_EVENT_CLASS(xfs_defer_class, TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), TP_ARGS(tp, caller_ip), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 008380482777b..eb7d4272aef28 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -70,6 +70,9 @@ xfs_trans_free( xfs_extent_busy_sort(&tp->t_busy); xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); + if (tp->t_dfops_finished > 0) + trace_xfs_defer_stats(tp); + trace_xfs_trans_free(tp, _RET_IP_); xfs_trans_clear_context(tp); if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 2046ee06fe88f..6017efe354adc 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -151,6 +151,13 @@ typedef struct xfs_trans { struct list_head t_busy; /* list of busy extents */ struct list_head t_dfops; /* deferred operations */ unsigned long t_pflags; /* saved process flags state */ + + /* Count of deferred ops attached to transaction. */ + unsigned int t_dfops_nr; + /* Maximum t_dfops_nr seen in a loop. */ + unsigned int t_dfops_nr_max; + /* Number of dfops finished. */ + unsigned int t_dfops_finished; } xfs_trans_t; /* From patchwork Sun Dec 31 22:01:15 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507777 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8B0B1C127 for ; Sun, 31 Dec 2023 22:01:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="rOiY5pzG" Received: by smtp.kernel.org (Postfix) with ESMTPSA id F194CC433C7; Sun, 31 Dec 2023 22:01:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704060076; bh=YgmmpQbX5hop6oq1XSXF8U7+XI2Mc4waI+U94unp/A4=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=rOiY5pzGIYDS9Q9RLM11RxeIWqGjYzUuCerqrXyFkZvsfRzu33i+CXYaIWF/x+Tm9 aIse6J+yhtqGCR2wqzHTkqY9MEW72cG0CmN4rScIwVrqvYz3n2qvufUivfXUGsr+wn CWdfF6IBpe7/Zryl/i25WO9IRmNu4nfcjLp2Ta19s4LmgSBTOYVFnYioYpyBo5tCFk 56Y0ImUfUx0CxWP+7dXh/fVomY4bQRXYSZrInelhl5ONk6HUQ6C081dsngyZRK9DxK jVt34BYz33AaCHYLru+ZCtts7ldIa/CxYQ12uEv8jU2L7xV26j/yjrUdrsJiZWR2Lk a6Hoh1Lb7OX+Q== Date: Sun, 31 Dec 2023 14:01:15 -0800 Subject: [PATCH 2/5] xfs: whine to dmesg when we encounter errors From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404854753.1769671.12991524814359250998.stgit@frogsfrogsfrogs> In-Reply-To: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> References: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Forward everything scrub whines about to dmesg. Signed-off-by: Darrick J. Wong --- fs/xfs/Kconfig | 13 ++++++ fs/xfs/scrub/btree.c | 88 +++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/common.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/common.h | 1 fs/xfs/scrub/dabtree.c | 24 +++++++++++ fs/xfs/scrub/inode.c | 4 ++ fs/xfs/scrub/scrub.c | 40 ++++++++++++++++++ fs/xfs/scrub/trace.c | 22 ++++++++++ fs/xfs/scrub/trace.h | 2 + fs/xfs/xfs_globals.c | 5 ++ fs/xfs/xfs_sysctl.h | 1 fs/xfs/xfs_sysfs.c | 32 ++++++++++++++ 12 files changed, 339 insertions(+) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 0ed89b2381936..be17dbeb0eb43 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -172,6 +172,19 @@ config XFS_ONLINE_SCRUB_STATS If unsure, say N. +config XFS_ONLINE_SCRUB_WHINE + bool "XFS online metadata verbose logging by default" + default y + depends on XFS_ONLINE_SCRUB + help + If you say Y here, the kernel will by default log the outcomes of all + scrub and repair operations, as well as any corruptions found. This + may slow down scrub due to printk logging overhead timers. + + This value can be changed by editing /sys/fs/xfs/debug/scrub_whine + + If unsure, say N. + choice prompt "XFS hook implementation" depends on XFS_FS && XFS_LIVE_HOOKS && XFS_ONLINE_SCRUB diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 1935b9ce1885c..e1b22ac074d34 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -11,6 +11,8 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_log_format.h" +#include "xfs_ag.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" @@ -18,6 +20,62 @@ /* btree scrubbing */ +/* Figure out which block the btree cursor was pointing to. */ +static inline xfs_fsblock_t +xchk_btree_cur_fsbno( + struct xfs_btree_cur *cur, + int level) +{ + if (level < cur->bc_nlevels && cur->bc_levels[level].bp) + return XFS_DADDR_TO_FSB(cur->bc_mp, + xfs_buf_daddr(cur->bc_levels[level].bp)); + else if (level == cur->bc_nlevels - 1 && + (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)) + return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino); + else if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) + return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, 0); + return NULLFSBLOCK; +} + +static inline void +process_error_whine( + struct xfs_scrub *sc, + struct xfs_btree_cur *cur, + int level, + int *error, + __u32 errflag, + void *ret_ip) +{ + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s btnum %d level %d ptr %d agno 0x%x agbno 0x%x error %d errflag 0x%x ret_ip %pS", + cur->bc_ino.ip->i_ino, + cur->bc_ino.whichfork, + xchk_type_string(sc->sm->sm_type), + cur->bc_btnum, + level, + cur->bc_levels[level].ptr, + XFS_FSB_TO_AGNO(cur->bc_mp, fsbno), + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), + *error, + errflag, + ret_ip); + return; + } + + xchk_whine(sc->mp, "type %s btnum %d level %d ptr %d agno 0x%x agbno 0x%x error %d errflag 0x%x ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + cur->bc_btnum, + level, + cur->bc_levels[level].ptr, + XFS_FSB_TO_AGNO(cur->bc_mp, fsbno), + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), + *error, + errflag, + ret_ip); +} + /* * Check for btree operation errors. See the section about handling * operational errors in common.c. @@ -44,9 +102,13 @@ __xchk_btree_process_error( case -EFSCORRUPTED: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; + process_error_whine(sc, cur, level, error, errflag, ret_ip); *error = 0; fallthrough; default: + if (*error) + process_error_whine(sc, cur, level, error, errflag, + ret_ip); if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) trace_xchk_ifork_btree_op_error(sc, cur, level, *error, ret_ip); @@ -92,11 +154,37 @@ __xchk_btree_set_corrupt( sc->sm->sm_flags |= errflag; if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + { + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s btnum %d level %d ptr %d agno 0x%x agbno 0x%x errflag 0x%x ret_ip %pS", + cur->bc_ino.ip->i_ino, + cur->bc_ino.whichfork, + xchk_type_string(sc->sm->sm_type), + cur->bc_btnum, + level, + cur->bc_levels[level].ptr, + XFS_FSB_TO_AGNO(cur->bc_mp, fsbno), + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), + errflag, + ret_ip); trace_xchk_ifork_btree_error(sc, cur, level, ret_ip); + } else + { + xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); + xchk_whine(sc->mp, "type %s btnum %d level %d ptr %d agno 0x%x agbno 0x%x errflag 0x%x ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + cur->bc_btnum, + level, + cur->bc_levels[level].ptr, + XFS_FSB_TO_AGNO(cur->bc_mp, fsbno), + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), + errflag, + ret_ip); trace_xchk_btree_error(sc, cur, level, ret_ip); + } } void diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index a9801a5bb0383..2a9741235d310 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -105,9 +105,23 @@ __xchk_process_error( case -EFSCORRUPTED: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; + xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d errflag 0x%x ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + agno, + bno, + *error, + errflag, + ret_ip); *error = 0; fallthrough; default: + if (*error) + xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + agno, + bno, + *error, + ret_ip); trace_xchk_op_error(sc, agno, bno, *error, ret_ip); break; } @@ -190,9 +204,25 @@ __xchk_fblock_process_error( case -EFSCORRUPTED: /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d errflag 0x%x ret_ip %pS", + sc->ip->i_ino, + whichfork, + xchk_type_string(sc->sm->sm_type), + offset, + *error, + errflag, + ret_ip); *error = 0; fallthrough; default: + if (*error) + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS", + sc->ip->i_ino, + whichfork, + xchk_type_string(sc->sm->sm_type), + offset, + *error, + ret_ip); trace_xchk_file_op_error(sc, whichfork, offset, *error, ret_ip); break; @@ -264,6 +294,8 @@ xchk_set_corrupt( struct xfs_scrub *sc) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + xchk_whine(sc->mp, "type %s ret_ip %pS", xchk_type_string(sc->sm->sm_type), + __return_address); trace_xchk_fs_error(sc, 0, __return_address); } @@ -275,6 +307,11 @@ xchk_block_set_corrupt( { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); + xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)), + xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)), + __return_address); } #ifdef CONFIG_XFS_QUOTA @@ -286,6 +323,8 @@ xchk_qcheck_set_corrupt( xfs_dqid_t id) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + xchk_whine(sc->mp, "type %s dqtype %u id %u ret_ip %pS", + xchk_type_string(sc->sm->sm_type), dqtype, id, __return_address); trace_xchk_qcheck_error(sc, dqtype, id, __return_address); } #endif /* CONFIG_XFS_QUOTA */ @@ -298,6 +337,11 @@ xchk_block_xref_set_corrupt( { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); + xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)), + xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)), + __return_address); } /* @@ -311,6 +355,8 @@ xchk_ino_set_corrupt( xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS", + ino, xchk_type_string(sc->sm->sm_type), __return_address); trace_xchk_ino_error(sc, ino, __return_address); } @@ -321,6 +367,8 @@ xchk_ino_xref_set_corrupt( xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS", + ino, xchk_type_string(sc->sm->sm_type), __return_address); trace_xchk_ino_error(sc, ino, __return_address); } @@ -332,6 +380,12 @@ xchk_fblock_set_corrupt( xfs_fileoff_t offset) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS", + sc->ip->i_ino, + whichfork, + xchk_type_string(sc->sm->sm_type), + offset, + __return_address); trace_xchk_fblock_error(sc, whichfork, offset, __return_address); } @@ -343,6 +397,12 @@ xchk_fblock_xref_set_corrupt( xfs_fileoff_t offset) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS", + sc->ip->i_ino, + whichfork, + xchk_type_string(sc->sm->sm_type), + offset, + __return_address); trace_xchk_fblock_error(sc, whichfork, offset, __return_address); } @@ -356,6 +416,8 @@ xchk_ino_set_warning( xfs_ino_t ino) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; + xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS", + ino, xchk_type_string(sc->sm->sm_type), __return_address); trace_xchk_ino_warning(sc, ino, __return_address); } @@ -367,6 +429,12 @@ xchk_fblock_set_warning( xfs_fileoff_t offset) { sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS", + sc->ip->i_ino, + whichfork, + xchk_type_string(sc->sm->sm_type), + offset, + __return_address); trace_xchk_fblock_warning(sc, whichfork, offset, __return_address); } @@ -1312,6 +1380,10 @@ xchk_iget_for_scrubbing( out_cancel: xchk_trans_cancel(sc); out_error: + xchk_whine(mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS", + xchk_type_string(sc->sm->sm_type), agno, + XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error, + __return_address); trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error, __return_address); return error; @@ -1453,6 +1525,10 @@ xchk_should_check_xref( } sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; + xchk_whine(sc->mp, "type %s xref error %d ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + *error, + __return_address); trace_xchk_xref_error(sc, *error, __return_address); /* @@ -1484,6 +1560,11 @@ xchk_buffer_recheck( return; sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa); + xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS", + xchk_type_string(sc->sm->sm_type), + xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)), + xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)), + fa); } static inline int @@ -1793,3 +1874,29 @@ xchk_inode_count_blocks( *count = btblocks - 1; return 0; } + +/* Complain about failures... */ +void +xchk_whine( + const struct xfs_mount *mp, + const char *fmt, + ...) +{ + struct va_format vaf; + va_list args; + + if (!xfs_globals.scrub_whine) + return; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_INFO "XFS (%s) %pS: %pV\n", mp->m_super->s_id, + __return_address, &vaf); + va_end(args); + + if (xfs_error_level >= XFS_ERRLEVEL_HIGH) + xfs_stack_trace(); +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index ed22e1403d0f0..53f4de067369a 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -188,6 +188,7 @@ bool xchk_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags); void xchk_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags); void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp); +void xchk_whine(const struct xfs_mount *mp, const char *fmt, ...); /* * Grab the inode at @inum. The caller must have created a scrub transaction diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 056de4819f866..ae64db9f0bba2 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -47,9 +47,26 @@ xchk_da_process_error( case -EFSCORRUPTED: /* Note the badness but don't abort. */ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s dablk 0x%llx error %d ret_ip %pS", + sc->ip->i_ino, + ds->dargs.whichfork, + xchk_type_string(sc->sm->sm_type), + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + *error, + __return_address); *error = 0; fallthrough; default: + if (*error) + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s dablk 0x%llx error %d ret_ip %pS", + sc->ip->i_ino, + ds->dargs.whichfork, + xchk_type_string(sc->sm->sm_type), + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + *error, + __return_address); trace_xchk_file_op_error(sc, ds->dargs.whichfork, xfs_dir2_da_to_db(ds->dargs.geo, ds->state->path.blk[level].blkno), @@ -72,6 +89,13 @@ xchk_da_set_corrupt( sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + xchk_whine(sc->mp, "ino 0x%llx fork %d type %s dablk 0x%llx ret_ip %pS", + sc->ip->i_ino, + ds->dargs.whichfork, + xchk_type_string(sc->sm->sm_type), + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + __return_address); trace_xchk_fblock_error(sc, ds->dargs.whichfork, xfs_dir2_da_to_db(ds->dargs.geo, ds->state->path.blk[level].blkno), diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index e52e12e9a1b4b..cb2530a93a001 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -217,6 +217,10 @@ xchk_setup_inode( out_cancel: xchk_trans_cancel(sc); out_error: + xchk_whine(mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS", + xchk_type_string(sc->sm->sm_type), agno, + XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error, + __return_address); trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error, __return_address); return error; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 94a733975879a..1a098a8913925 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -632,6 +632,45 @@ xchk_scrub_create_subord( return sub; } +static inline void +repair_outcomes(struct xfs_scrub *sc, int error) +{ + struct xfs_scrub_metadata *sm = sc->sm; + const char *wut = NULL; + + if (!xfs_globals.scrub_whine) + return; + + if (sc->flags & XREP_ALREADY_FIXED) { + wut = "*** REPAIR SUCCESS"; + error = 0; + } else if (error == -EBUSY) { + wut = "??? FILESYSTEM BUSY"; + } else if (error == -EAGAIN) { + wut = "??? REPAIR DEFERRED"; + } else if (error == -ECANCELED) { + wut = "??? REPAIR CANCELLED"; + } else if (error == -EINTR) { + wut = "??? REPAIR INTERRUPTED"; + } else if (error != -EOPNOTSUPP && error != -ENOENT) { + wut = "!!! REPAIR FAILED"; + xfs_info(sc->mp, +"%s ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags 0x%x error %d", + wut, XFS_I(file_inode(sc->file))->i_ino, + xchk_type_string(sm->sm_type), sm->sm_agno, + sm->sm_ino, sm->sm_gen, sm->sm_flags, error); + return; + } else { + return; + } + + xfs_info_ratelimited(sc->mp, +"%s ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags 0x%x error %d", + wut, XFS_I(file_inode(sc->file))->i_ino, + xchk_type_string(sm->sm_type), sm->sm_agno, sm->sm_ino, + sm->sm_gen, sm->sm_flags, error); +} + /* Dispatch metadata scrubbing. */ int xfs_scrub_metadata( @@ -729,6 +768,7 @@ xfs_scrub_metadata( * already tried to fix it, then attempt a repair. */ error = xrep_attempt(sc, &run); + repair_outcomes(sc, error); if (error == -EAGAIN) { /* * Either the repair function succeeded or it couldn't diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 4d0a6dceaa6c6..24a75e9e1a821 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -69,3 +69,25 @@ xfbtree_ino( */ #define CREATE_TRACE_POINTS #include "scrub/trace.h" + +/* xchk_whine stuff */ +struct xchk_tstr { + unsigned int type; + const char *tag; +}; + +static const struct xchk_tstr xchk_tstr_tags[] = { XFS_SCRUB_TYPE_STRINGS }; + +const char * +xchk_type_string( + unsigned int type) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(xchk_tstr_tags); i++) { + if (xchk_tstr_tags[i].type == type) + return xchk_tstr_tags[i].tag; + } + + return "???"; +} diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 2c6f7e3b7578d..cfd882edb2937 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -127,6 +127,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTREFCBT); { XFS_SCRUB_TYPE_RTRMAPBT, "rtrmapbt" }, \ { XFS_SCRUB_TYPE_RTREFCBT, "rtrefcountbt" } +const char *xchk_type_string(unsigned int type); + #define XFS_SCRUB_FLAG_STRINGS \ { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ { XFS_SCRUB_OFLAG_CORRUPT, "corrupt" }, \ diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index f18fec0adf666..f5fe896b9a8ec 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -44,6 +44,11 @@ struct xfs_globals xfs_globals = { .pwork_threads = -1, /* automatic thread detection */ .larp = false, /* log attribute replay */ #endif +#ifdef CONFIG_XFS_ONLINE_SCRUB_WHINE + .scrub_whine = true, +#else + .scrub_whine = false, +#endif /* * Leave this many record slots empty when bulk loading btrees. By diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index 276696a07040c..b0939ac370fba 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -91,6 +91,7 @@ struct xfs_globals { int mount_delay; /* mount setup delay (secs) */ bool bug_on_assert; /* BUG() the kernel on assert failure */ bool always_cow; /* use COW fork for all overwrites */ + bool scrub_whine; /* noisier output from scrub */ }; extern struct xfs_globals xfs_globals; diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 17485666b6723..b9ba47dcee8c1 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -262,6 +262,37 @@ larp_show( XFS_SYSFS_ATTR_RW(larp); #endif /* DEBUG */ +/* Logging of the outcomes of everything that scrub does */ +STATIC ssize_t +scrub_whine_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val < -1 || val > num_possible_cpus()) + return -EINVAL; + + xfs_globals.scrub_whine = val; + + return count; +} + +STATIC ssize_t +scrub_whine_show( + struct kobject *kobject, + char *buf) +{ + return sysfs_emit(buf, "%d\n", xfs_globals.scrub_whine); +} +XFS_SYSFS_ATTR_RW(scrub_whine); + STATIC ssize_t bload_leaf_slack_store( struct kobject *kobject, @@ -323,6 +354,7 @@ static struct attribute *xfs_dbg_attrs[] = { ATTR_LIST(pwork_threads), ATTR_LIST(larp), #endif + ATTR_LIST(scrub_whine), ATTR_LIST(bload_leaf_slack), ATTR_LIST(bload_node_slack), NULL, From patchwork Sun Dec 31 22:01:31 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507778 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3678DC126 for ; Sun, 31 Dec 2023 22:01:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="IeLHqfPW" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 91BD6C433C7; Sun, 31 Dec 2023 22:01:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704060091; bh=+1CROSQl3zCxMsEtCS7OPRNWXnnZwLVf1mOkY8wfaog=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=IeLHqfPWnDAC7Q2iQA1VATFiG9sDM4V3IXBmiqKrhmwOcmzN4WkyEywOU4YLyqbUX XiGQuH7TZULM+hOqHVMWDkohtov2QyGVXBIscFzO08lWquOl0vokq03/c6zcePFqK0 HW7MyiP7VDxe40SsK2Q6hOexcWMdxgDDKIFyUXpOEJFn6F69GUd404e7TGuvbS14+3 kgy1UxAEhhm4of2C4iJf9tjYunvkDB676i0el1qOlNwF4DR5xYYfv94FbyYQEtEzWA 5t534ebjoIYA++GMnGbhDkjaMYjBUISfexTewPUr4w9T7/1xsB8oTD6vO8/MNiIPMu os8OcND8Alu1Q== Date: Sun, 31 Dec 2023 14:01:31 -0800 Subject: [PATCH 3/5] xfs: create a noalloc mode for allocation groups From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404854771.1769671.617940952368075702.stgit@frogsfrogsfrogs> In-Reply-To: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> References: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Create a new noalloc state for the per-AG structure that will disable block allocation in this AG. We accomplish this by subtracting from fdblocks all the free blocks in this AG, hiding those blocks from the allocator, and preventing freed blocks from updating fdblocks until we're ready to lift noalloc mode. Note that we reduce the free block count of the filesystem so that we can prevent transactions from entering the allocator looking for "free" space that we've turned off incore. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ag.c | 60 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_ag.h | 8 ++++++ fs/xfs/libxfs/xfs_ag_resv.c | 27 +++++++++++++++++-- fs/xfs/scrub/fscounters.c | 3 +- fs/xfs/xfs_fsops.c | 10 ++++++- fs/xfs/xfs_super.c | 1 + fs/xfs/xfs_trace.h | 46 +++++++++++++++++++++++++++++++++ 7 files changed, 150 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 35deb474a7cf0..a855f943cfad9 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -1119,3 +1119,63 @@ xfs_ag_get_geometry( xfs_buf_relse(agi_bp); return error; } + +/* How many blocks does this AG contribute to fdblocks? */ +xfs_extlen_t +xfs_ag_fdblocks( + struct xfs_perag *pag) +{ + xfs_extlen_t ret; + + ASSERT(xfs_perag_initialised_agf(pag)); + + ret = pag->pagf_freeblks + pag->pagf_flcount + pag->pagf_btreeblks; + ret -= pag->pag_meta_resv.ar_reserved; + ret -= pag->pag_rmapbt_resv.ar_orig_reserved; + return ret; +} + +/* + * Hide all the free space in this AG. Caller must hold both the AGI and the + * AGF buffers or have otherwise prevented concurrent access. + */ +int +xfs_ag_set_noalloc( + struct xfs_perag *pag) +{ + struct xfs_mount *mp = pag->pag_mount; + int error; + + ASSERT(xfs_perag_initialised_agf(pag)); + ASSERT(xfs_perag_initialised_agi(pag)); + + if (xfs_perag_prohibits_alloc(pag)) + return 0; + + error = xfs_mod_fdblocks(mp, -(int64_t)xfs_ag_fdblocks(pag), false); + if (error) + return error; + + trace_xfs_ag_set_noalloc(pag); + set_bit(XFS_AGSTATE_NOALLOC, &pag->pag_opstate); + return 0; +} + +/* + * Unhide all the free space in this AG. Caller must hold both the AGI and + * the AGF buffers or have otherwise prevented concurrent access. + */ +void +xfs_ag_clear_noalloc( + struct xfs_perag *pag) +{ + struct xfs_mount *mp = pag->pag_mount; + + if (!xfs_perag_prohibits_alloc(pag)) + return; + + xfs_mod_fdblocks(mp, xfs_ag_fdblocks(pag), false); + + trace_xfs_ag_clear_noalloc(pag); + clear_bit(XFS_AGSTATE_NOALLOC, &pag->pag_opstate); +} diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 79017fcd3df58..c21cc30ebc73f 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -131,6 +131,7 @@ struct xfs_perag { #define XFS_AGSTATE_PREFERS_METADATA 2 #define XFS_AGSTATE_ALLOWS_INODES 3 #define XFS_AGSTATE_AGFL_NEEDS_RESET 4 +#define XFS_AGSTATE_NOALLOC 5 #define __XFS_AG_OPSTATE(name, NAME) \ static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \ @@ -143,6 +144,7 @@ __XFS_AG_OPSTATE(initialised_agi, AGI_INIT) __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA) __XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES) __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET) +__XFS_AG_OPSTATE(prohibits_alloc, NOALLOC) int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); @@ -156,12 +158,18 @@ struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag); + /* Active AG references */ struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t); struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t, int tag); void xfs_perag_rele(struct xfs_perag *pag); +/* Enable or disable allocation from an AG */ +xfs_extlen_t xfs_ag_fdblocks(struct xfs_perag *pag); +int xfs_ag_set_noalloc(struct xfs_perag *pag); +void xfs_ag_clear_noalloc(struct xfs_perag *pag); + /* * Per-ag geometry infomation and validation */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index f775b92b4aacd..7142eda1c2501 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -73,6 +73,13 @@ xfs_ag_resv_critical( xfs_extlen_t avail; xfs_extlen_t orig; + /* + * Pretend we're critically low on reservations in this AG to scare + * everyone else away. + */ + if (xfs_perag_prohibits_alloc(pag)) + return true; + switch (type) { case XFS_AG_RESV_METADATA: avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved; @@ -115,7 +122,12 @@ xfs_ag_resv_needed( break; case XFS_AG_RESV_IMETA: case XFS_AG_RESV_NONE: - /* empty */ + /* + * In noalloc mode, we pretend that all the free blocks in this + * AG have been allocated. Make this AG look full. + */ + if (xfs_perag_prohibits_alloc(pag)) + len += xfs_ag_fdblocks(pag); break; default: ASSERT(0); @@ -344,6 +356,8 @@ xfs_ag_resv_alloc_extent( xfs_extlen_t len; uint field; + ASSERT(type != XFS_AG_RESV_NONE || !xfs_perag_prohibits_alloc(pag)); + trace_xfs_ag_resv_alloc_extent(pag, type, args->len); switch (type) { @@ -401,7 +415,14 @@ xfs_ag_resv_free_extent( ASSERT(0); fallthrough; case XFS_AG_RESV_NONE: - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); + /* + * Normally we put freed blocks back into fdblocks. In noalloc + * mode, however, we pretend that there are no fdblocks in the + * AG, so don't put them back. + */ + if (!xfs_perag_prohibits_alloc(pag)) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, + (int64_t)len); fallthrough; case XFS_AG_RESV_IGNORE: return; @@ -414,6 +435,6 @@ xfs_ag_resv_free_extent( /* Freeing into the reserved pool only requires on-disk update... */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); /* ...but freeing beyond that requires in-core and on-disk update. */ - if (len > leftover) + if (len > leftover && !xfs_perag_prohibits_alloc(pag)) xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover); } diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 2b4bd2eb71b57..220b27e79497b 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -338,7 +338,8 @@ xchk_fscount_aggregate_agcounts( */ fsc->fdblocks -= pag->pag_meta_resv.ar_reserved; fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved; - + if (xfs_perag_prohibits_alloc(pag)) + fsc->fdblocks -= xfs_ag_fdblocks(pag); } if (pag) xfs_perag_rele(pag); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9584c08480f75..639b46c617be7 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -638,6 +638,14 @@ xfs_fs_unreserve_ag_blocks( if (xfs_has_realtime(mp)) xfs_rt_resv_free(mp); - for_each_perag(mp, agno, pag) + for_each_perag(mp, agno, pag) { + /* + * Bring the AG back online because our AG hiding only exists + * in-core and we need the superblock to be written out with + * the super fdblocks reflecting the AGF freeblks. Do this + * before adding the per-AG reservations back to fdblocks. + */ + xfs_ag_clear_noalloc(pag); xfs_ag_resv_free(pag); + } } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2aa91f9be05a6..8f06716dd0169 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -324,6 +324,7 @@ xfs_set_inode_alloc( pag = xfs_perag_get(mp, index); if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) maxagi++; + clear_bit(XFS_AGSTATE_NOALLOC, &pag->pag_opstate); xfs_perag_put(pag); } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6c99bf56184b0..531a522ac0f7a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4527,6 +4527,52 @@ DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_unfixed_corruption); +DECLARE_EVENT_CLASS(xfs_ag_noalloc_class, + TP_PROTO(struct xfs_perag *pag), + TP_ARGS(pag), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_extlen_t, freeblks) + __field(xfs_extlen_t, flcount) + __field(xfs_extlen_t, btreeblks) + __field(xfs_extlen_t, meta_resv) + __field(xfs_extlen_t, rmap_resv) + + __field(unsigned long long, resblks) + __field(unsigned long long, resblks_avail) + ), + TP_fast_assign( + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; + __entry->freeblks = pag->pagf_freeblks; + __entry->flcount = pag->pagf_flcount; + __entry->btreeblks = pag->pagf_btreeblks; + __entry->meta_resv = pag->pag_meta_resv.ar_reserved; + __entry->rmap_resv = pag->pag_rmapbt_resv.ar_orig_reserved; + + __entry->resblks = pag->pag_mount->m_resblks; + __entry->resblks_avail = pag->pag_mount->m_resblks_avail; + ), + TP_printk("dev %d:%d agno 0x%x freeblks %u flcount %u btreeblks %u metaresv %u rmapresv %u resblks %llu resblks_avail %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->freeblks, + __entry->flcount, + __entry->btreeblks, + __entry->meta_resv, + __entry->rmap_resv, + __entry->resblks, + __entry->resblks_avail) +); +#define DEFINE_AG_NOALLOC_EVENT(name) \ +DEFINE_EVENT(xfs_ag_noalloc_class, name, \ + TP_PROTO(struct xfs_perag *pag), \ + TP_ARGS(pag)) + +DEFINE_AG_NOALLOC_EVENT(xfs_ag_set_noalloc); +DEFINE_AG_NOALLOC_EVENT(xfs_ag_clear_noalloc); + TRACE_EVENT(xfs_iwalk_ag, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t startino), From patchwork Sun Dec 31 22:01:46 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507779 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CAE60C129 for ; Sun, 31 Dec 2023 22:01:47 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="boavxlZ5" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4239FC433C7; Sun, 31 Dec 2023 22:01:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704060107; bh=fo/3UPIT5NSa1VBdubce+qAawWsOg9L0Lk1kNKPicNI=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=boavxlZ5ISHlpG8ZT32aL5qsKXB30QgueF8PVYTn/4t211W/GIV3GvZeO1pHdSoEc meOLlY+c2kGu+y6XIYqU2WYgji5Uk8NhCKh1u6syZp8qAXK2ZLziFf41iIQj6BWfhc OgkJfNUm5UpLY/Y0Sbuhq2+ynANkXbJU3xR9tHyhQAHjrQ/effIF9hUTUwC4U9XE6u +edZpl8BJMOG7/ljpVaLFm1FkntkIvCAIysGJU5wehdQo6AbUQjLv+11d9KN2smAUx UzmgP3jtaFCt3yygI6+iA51MG2sumOmsEWbYY+w9Ng5EKtTqmbaf4jS+o2r2RANhbm YIc7BrBaxHeCQ== Date: Sun, 31 Dec 2023 14:01:46 -0800 Subject: [PATCH 4/5] xfs: enable userspace to hide an AG from allocation From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404854787.1769671.8982987186849794848.stgit@frogsfrogsfrogs> In-Reply-To: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> References: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Add an administrative interface so that userspace can hide an allocation group from block allocation. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ag.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_fs.h | 5 ++++ fs/xfs/xfs_ioctl.c | 4 +++- 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index a855f943cfad9..183f46edb5d15 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -1075,6 +1075,54 @@ xfs_ag_extend_space( return 0; } +/* Compute the AG geometry flags. */ +static inline uint32_t +xfs_ag_calc_geoflags( + struct xfs_perag *pag) +{ + uint32_t ret = 0; + + if (xfs_perag_prohibits_alloc(pag)) + ret |= XFS_AG_FLAG_NOALLOC; + + return ret; +} + +/* + * Compare the current AG geometry flags against the flags in the AG geometry + * structure and update the AG state to reflect any changes, then update the + * struct to reflect the current status. + */ +static inline int +xfs_ag_update_geoflags( + struct xfs_perag *pag, + struct xfs_ag_geometry *ageo, + uint32_t new_flags) +{ + uint32_t old_flags = xfs_ag_calc_geoflags(pag); + int error; + + if (!(new_flags & XFS_AG_FLAG_UPDATE)) { + ageo->ag_flags = old_flags; + return 0; + } + + if ((old_flags & XFS_AG_FLAG_NOALLOC) && + !(new_flags & XFS_AG_FLAG_NOALLOC)) { + xfs_ag_clear_noalloc(pag); + } + + if (!(old_flags & XFS_AG_FLAG_NOALLOC) && + (new_flags & XFS_AG_FLAG_NOALLOC)) { + error = xfs_ag_set_noalloc(pag); + if (error) + return error; + } + + ageo->ag_flags = xfs_ag_calc_geoflags(pag); + return 0; +} + /* Retrieve AG geometry. */ int xfs_ag_get_geometry( @@ -1086,6 +1134,7 @@ xfs_ag_get_geometry( struct xfs_agi *agi; struct xfs_agf *agf; unsigned int freeblks; + uint32_t inflags = ageo->ag_flags; int error; /* Lock the AG headers. */ @@ -1096,6 +1145,10 @@ xfs_ag_get_geometry( if (error) goto out_agi; + error = xfs_ag_update_geoflags(pag, ageo, inflags); + if (error) + goto out; + /* Fill out form. */ memset(ageo, 0, sizeof(*ageo)); ageo->ag_number = pag->pag_agno; @@ -1113,6 +1166,7 @@ xfs_ag_get_geometry( ageo->ag_freeblks = freeblks; xfs_ag_geom_health(pag, ageo); +out: /* Release resources. */ xfs_buf_relse(agf_bp); out_agi: diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 4159e96d01ae6..96688f9301e08 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -305,6 +305,11 @@ struct xfs_ag_geometry { #define XFS_AG_GEOM_SICK_REFCNTBT (1 << 9) /* reference counts */ #define XFS_AG_GEOM_SICK_INODES (1 << 10) /* bad inodes were seen */ +#define XFS_AG_FLAG_UPDATE (1 << 0) /* update flags */ +#define XFS_AG_FLAG_NOALLOC (1 << 1) /* do not allocate from this AG */ +#define XFS_AG_FLAG_ALL (XFS_AG_FLAG_UPDATE | \ + XFS_AG_FLAG_NOALLOC) + /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index f85d5f142d180..d17b17e7eea1d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -976,10 +976,12 @@ xfs_ioc_ag_geometry( if (copy_from_user(&ageo, arg, sizeof(ageo))) return -EFAULT; - if (ageo.ag_flags) + if (ageo.ag_flags & ~XFS_AG_FLAG_ALL) return -EINVAL; if (memchr_inv(&ageo.ag_reserved, 0, sizeof(ageo.ag_reserved))) return -EINVAL; + if ((ageo.ag_flags & XFS_AG_FLAG_UPDATE) && !capable(CAP_SYS_ADMIN)) + return -EPERM; pag = xfs_perag_get(mp, ageo.ag_number); if (!pag) From patchwork Sun Dec 31 22:02:02 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507780 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1D5EAC126 for ; Sun, 31 Dec 2023 22:02:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="XJEeQ/YO" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DCE3DC433C7; Sun, 31 Dec 2023 22:02:02 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704060122; bh=MV8rfkGzWmusWy8QHJKGZa4Z7oXRFHcJK40+OLZmK+E=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=XJEeQ/YOGJhi5o1RfcTMqwIizZPp10OtUFFbzyXYS/epOzwyvR1/BZp2t/jwP+QHe s1xZgFt/NnHoYf1VScd8yvdWiAo9uJMsRiUt8X2YcOockfYHCNa9vB/8xziC/nnMER DMMdYIx1gi958xgrzuDrui/JR7RTIUPwd8kBtjAsd7uEgzbg38z32vyI66gFWktxQN getFpTm2wvpaEbwtiNKHHVxKiq1L4OHl/Q9VmXFrMa8NFDetNSp/euvuI9C1hKlN6q 7d2aOyBfkIQvKZy1NEvIHU8kxQ4aewdzUGg7Jqdq8zo0iOpijPXldMhhAtIuaNHh4D vorJshM1HVsGg== Date: Sun, 31 Dec 2023 14:02:02 -0800 Subject: [PATCH 5/5] xfs: apply noalloc mode to inode allocations too From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404854804.1769671.2859585470367782301.stgit@frogsfrogsfrogs> In-Reply-To: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> References: <170404854709.1769671.12231107418026207335.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Don't allow inode allocations from this group if it's marked noalloc. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ialloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 46c0bb67e4c47..e22f02722d19f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1064,6 +1064,7 @@ xfs_dialloc_ag_inobt( ASSERT(xfs_perag_initialised_agi(pag)); ASSERT(xfs_perag_allows_inodes(pag)); + ASSERT(!xfs_perag_prohibits_alloc(pag)); ASSERT(pag->pagi_freecount > 0); restart_pagno: @@ -1692,6 +1693,8 @@ xfs_dialloc_good_ag( return false; if (!xfs_perag_allows_inodes(pag)) return false; + if (xfs_perag_prohibits_alloc(pag)) + return false; if (!xfs_perag_initialised_agi(pag)) { error = xfs_ialloc_read_agi(pag, tp, NULL);