From patchwork Sun Dec 31 22:48:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507960 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 726BCC127 for ; Sun, 31 Dec 2023 22:48:42 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="lmwYf0Ce" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DB149C433C8; Sun, 31 Dec 2023 22:48:41 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704062921; bh=ku0P7X0noBc30net+fP+VftmSqEYDyjo283keHplStk=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=lmwYf0CeDOZtmXoLmiDJ8NQPZzIv1Ra+VGZajeQiy9+eeDwMJB/C2A0DUbwe81SqA 80Ii/yzbnhlvuKQ1k5qZUdLzDFKpu8+E9vgCXnu4+ZZVIBOgF1HLlswJ0UTxRgTRkd EJOApmpdiosCz7zR2pg9JZHE5x2fGIO2VySz6byPMhF8USF1KIqhAaAvRof6qfpIMt FUQBWTX7ve7VRRxEIK/g/bzX7Pq5DbB2I655gZmKxspR5G//cFsphfP6j1O0qRGAHr c8sqzgRV1v9lbz4M3ny3uaytuiLzg2bcg25BWaLHVxTU5+8epT0l3D7JfPS9tS9Zsz SDOwTOttLqKug== Date: Sun, 31 Dec 2023 14:48:41 -0800 Subject: [PATCH 1/8] xfs_scrub: move FITRIM to phase 8 From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001063.1798752.11685058643451631355.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Issuing discards against the filesystem should be the *last* thing that xfs_scrub does, after everything else has been checked, repaired, and found to be clean. If we can't satisfy all those conditions, we have no business telling the storage to discard itself. Signed-off-by: Darrick J. Wong --- scrub/Makefile | 1 + scrub/phase4.c | 30 ++---------------------- scrub/phase8.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ scrub/xfs_scrub.h | 3 ++ 4 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 scrub/phase8.c diff --git a/scrub/Makefile b/scrub/Makefile index 24af9716120..af94cf0d684 100644 --- a/scrub/Makefile +++ b/scrub/Makefile @@ -65,6 +65,7 @@ phase4.c \ phase5.c \ phase6.c \ phase7.c \ +phase8.c \ progress.c \ read_verify.c \ repair.c \ diff --git a/scrub/phase4.c b/scrub/phase4.c index 9080d38818f..451101811c9 100644 --- a/scrub/phase4.c +++ b/scrub/phase4.c @@ -227,16 +227,6 @@ repair_everything( return action_list_process(ctx, ctx->fs_repair_list, XRM_FINAL_WARNING); } -/* Trim the unused areas of the filesystem if the caller asked us to. */ -static void -trim_filesystem( - struct scrub_ctx *ctx) -{ - if (want_fstrim) - fstrim(ctx); - progress_add(1); -} - /* Fix everything that needs fixing. */ int phase4_func( @@ -248,7 +238,7 @@ phase4_func( if (action_list_empty(ctx->fs_repair_list) && action_list_empty(ctx->file_repair_list)) - goto maybe_trim; + return 0; /* * Check the resource usage counters early. Normally we do this during @@ -281,20 +271,7 @@ phase4_func( if (ret) return ret; - ret = repair_everything(ctx); - if (ret) - return ret; - - /* - * If errors remain on the filesystem, do not trim anything. We don't - * have any threads running, so it's ok to skip the ctx lock here. - */ - if (ctx->corruptions_found || ctx->unfixable_errors != 0) - return 0; - -maybe_trim: - trim_filesystem(ctx); - return 0; + return repair_everything(ctx); } /* Estimate how much work we're going to do. */ @@ -307,10 +284,9 @@ phase4_estimate( { unsigned long long need_fixing; - /* Everything on the repair list plus FSTRIM. */ + /* Everything on the repair lis. */ need_fixing = action_list_length(ctx->fs_repair_list) + action_list_length(ctx->file_repair_list); - need_fixing++; *items = need_fixing; *nr_threads = scrub_nproc(ctx) + 1; diff --git a/scrub/phase8.c b/scrub/phase8.c new file mode 100644 index 00000000000..07726b5b869 --- /dev/null +++ b/scrub/phase8.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include +#include +#include +#include +#include "list.h" +#include "libfrog/paths.h" +#include "libfrog/workqueue.h" +#include "xfs_scrub.h" +#include "common.h" +#include "progress.h" +#include "scrub.h" +#include "repair.h" +#include "vfs.h" +#include "atomic.h" + +/* Phase 8: Trim filesystem. */ + +/* Trim the unused areas of the filesystem if the caller asked us to. */ +static void +trim_filesystem( + struct scrub_ctx *ctx) +{ + fstrim(ctx); + progress_add(1); +} + +/* Trim the filesystem, if desired. */ +int +phase8_func( + struct scrub_ctx *ctx) +{ + if (action_list_empty(ctx->fs_repair_list) && + action_list_empty(ctx->file_repair_list)) + goto maybe_trim; + + /* + * If errors remain on the filesystem, do not trim anything. We don't + * have any threads running, so it's ok to skip the ctx lock here. + */ + if (ctx->corruptions_found || ctx->unfixable_errors != 0) + return 0; + +maybe_trim: + trim_filesystem(ctx); + return 0; +} + +/* Estimate how much work we're going to do. */ +int +phase8_estimate( + struct scrub_ctx *ctx, + uint64_t *items, + unsigned int *nr_threads, + int *rshift) +{ + *items = 1; + *nr_threads = 1; + *rshift = 0; + return 0; +} diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h index ed86d0093db..6272a36879e 100644 --- a/scrub/xfs_scrub.h +++ b/scrub/xfs_scrub.h @@ -98,6 +98,7 @@ int phase4_func(struct scrub_ctx *ctx); int phase5_func(struct scrub_ctx *ctx); int phase6_func(struct scrub_ctx *ctx); int phase7_func(struct scrub_ctx *ctx); +int phase8_func(struct scrub_ctx *ctx); /* Progress estimator functions */ unsigned int scrub_estimate_ag_work(struct scrub_ctx *ctx); @@ -112,5 +113,7 @@ int phase5_estimate(struct scrub_ctx *ctx, uint64_t *items, unsigned int *nr_threads, int *rshift); int phase6_estimate(struct scrub_ctx *ctx, uint64_t *items, unsigned int *nr_threads, int *rshift); +int phase8_estimate(struct scrub_ctx *ctx, uint64_t *items, + unsigned int *nr_threads, int *rshift); #endif /* XFS_SCRUB_XFS_SCRUB_H_ */ From patchwork Sun Dec 31 22:48:57 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507961 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CB842C127 for ; Sun, 31 Dec 2023 22:48:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Wlpv6+sn" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9E8D8C433C7; Sun, 31 Dec 2023 22:48:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704062937; bh=Afe50GMs9/MQc0iJ1IJai2Fzl91majUcxKGbTjeOjzs=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=Wlpv6+snzmus0kdfuC2drtJNTe7KiNfxtP9JzlHXcjjL0rhhhcLA9Rv5M5FmoEahH OeFOM8rPJBOwnIZHFhCq5ku8mw5PngC11LbFvJs/5DyZeKMnVp+t9q/NIf1hRxS0tP Hbw6MZDsGZkEfrHWn2YAYkngk+SVob5mtBni3begTlc6yGNjurHWD+N+48v4PvoUQl cSQCCDxMEF7FSt4R1QR0bJ5ESIHqpiMj1aLG6h52GS2zfCj2hQKPPLCHyB+Bxdt67g 2LYxt4p0j5t47vT5SUDhKodfFvPBrjunuHAev08mzuQl7oN/FvBiEtSHqcxVJQ3Z5Y rA8j2qwGubkTg== Date: Sun, 31 Dec 2023 14:48:57 -0800 Subject: [PATCH 2/8] xfs_scrub: ignore phase 8 if the user disabled fstrim From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001076.1798752.12614566262144331017.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong If the user told us to skip trimming the filesystem, don't run the phase at all. Signed-off-by: Darrick J. Wong --- scrub/xfs_scrub.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c index aa68c23c62e..23b2e03865b 100644 --- a/scrub/xfs_scrub.c +++ b/scrub/xfs_scrub.c @@ -249,6 +249,7 @@ struct phase_rusage { /* Operations for each phase. */ #define DATASCAN_DUMMY_FN ((void *)1) #define REPAIR_DUMMY_FN ((void *)2) +#define FSTRIM_DUMMY_FN ((void *)3) struct phase_ops { char *descr; int (*fn)(struct scrub_ctx *ctx); @@ -429,6 +430,11 @@ run_scrub_phases( .fn = phase7_func, .must_run = true, }, + { + .descr = _("Trim filesystem storage."), + .fn = FSTRIM_DUMMY_FN, + .estimate_work = phase8_estimate, + }, { NULL }, @@ -449,6 +455,8 @@ run_scrub_phases( /* Turn on certain phases if user said to. */ if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) { sp->fn = phase6_func; + } else if (sp->fn == FSTRIM_DUMMY_FN && want_fstrim) { + sp->fn = phase8_func; } else if (sp->fn == REPAIR_DUMMY_FN && ctx->mode == SCRUB_MODE_REPAIR) { sp->descr = _("Repair filesystem."); @@ -458,7 +466,8 @@ run_scrub_phases( /* Skip certain phases unless they're turned on. */ if (sp->fn == REPAIR_DUMMY_FN || - sp->fn == DATASCAN_DUMMY_FN) + sp->fn == DATASCAN_DUMMY_FN || + sp->fn == FSTRIM_DUMMY_FN) continue; /* Allow debug users to force a particular phase. */ From patchwork Sun Dec 31 22:49:12 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507962 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5E559C127 for ; Sun, 31 Dec 2023 22:49:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="RyO/wh0M" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2F661C433C7; Sun, 31 Dec 2023 22:49:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704062953; bh=wRGLf2dxL4EG0pyxBCmep/T8ot6o68RlT5IIMRrX2VI=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=RyO/wh0MWpgP0ehdiY0Uryzkmq0hZldx1UOkshhjUX4u1zLkRJzVsBKCH8ZPhlo0y caQwfru73DJVaeduiGQEbrn6JpEwkz5N0/hBlxFNxL6s9PLFEGqESzy2TXWvofi2J4 ZAMmUcYtdcnLi0i0sQ+5d3vrb/5BZGuD6vYYGNcml6iNXcWUC9u7MwwjqIKAY5UMcV JV3Umu32e1HxRN2tb/T4UmpZBGcNAUN9K75GmP3Pxck1UMJvduCbG0lfXUvP377MpM BkSze0Wh2wkjVIrz2HhGW360u6o6RXxxdPnrxgIo+G27NlP+BuMfMnc2AOODv/+UHB jH3inQVRs4eoQ== Date: Sun, 31 Dec 2023 14:49:12 -0800 Subject: [PATCH 3/8] xfs_scrub: collapse trim_filesystem From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001089.1798752.7539616554424103381.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Collapse this two-line helper into the main function since it's trivial. Signed-off-by: Darrick J. Wong --- scrub/phase8.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/scrub/phase8.c b/scrub/phase8.c index 07726b5b869..e577260a93d 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -21,15 +21,6 @@ /* Phase 8: Trim filesystem. */ -/* Trim the unused areas of the filesystem if the caller asked us to. */ -static void -trim_filesystem( - struct scrub_ctx *ctx) -{ - fstrim(ctx); - progress_add(1); -} - /* Trim the filesystem, if desired. */ int phase8_func( @@ -47,7 +38,8 @@ phase8_func( return 0; maybe_trim: - trim_filesystem(ctx); + fstrim(ctx); + progress_add(1); return 0; } From patchwork Sun Dec 31 22:49:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507963 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3E735C140 for ; Sun, 31 Dec 2023 22:49:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="YMrn6yVr" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BAF59C433C7; Sun, 31 Dec 2023 22:49:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704062968; bh=jrFosdQWgW4PQM65ZOpOf1p4y0J3lPuPqfI5Es8bdbs=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=YMrn6yVrAaLjBGskP55EQcI2kiEH2a/6YDG2muHuaA8J/J2W8OpTCbgJPoJVhw5w1 woyMeG9MXzEsu9AeM1VTXfX7R04aRKCYtxwcbXTv2/x/zA/NEp1xQs5fWFe47SAWHP 1sPp+5iOrKO6VGGP9a9bTW1k4rqKTGjczmPbA+GIbFCWyVAxszd0bkh1Mwjct7c4Lu zofMDhMN9FOC0RMjj7CqVXgUfCguMomaDpiAAIDpZEII4NTdWi+VXKSSOxOK/u/J04 Uy2BKfHTa828vgUYdoQRJGrvxnIQmBEFQjw7zzMKJhjYMvEo4e4GhC5T/djRrZPP3k +zw7+85hkG7FA== Date: Sun, 31 Dec 2023 14:49:28 -0800 Subject: [PATCH 4/8] xfs_scrub: fix the work estimation for phase 8 From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001102.1798752.9143123897906517770.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong If there are latent errors on the filesystem, we aren't going to do any work during phase 8 and it makes no sense to add that into the work estimate for the progress bar. Signed-off-by: Darrick J. Wong --- scrub/phase8.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/scrub/phase8.c b/scrub/phase8.c index e577260a93d..dfe62e8d97b 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -21,23 +21,35 @@ /* Phase 8: Trim filesystem. */ -/* Trim the filesystem, if desired. */ -int -phase8_func( +static inline bool +fstrim_ok( struct scrub_ctx *ctx) { - if (action_list_empty(ctx->fs_repair_list) && - action_list_empty(ctx->file_repair_list)) - goto maybe_trim; - /* * If errors remain on the filesystem, do not trim anything. We don't * have any threads running, so it's ok to skip the ctx lock here. */ - if (ctx->corruptions_found || ctx->unfixable_errors != 0) + if (!action_list_empty(ctx->fs_repair_list)) + return false; + if (!action_list_empty(ctx->file_repair_list)) + return false; + + if (ctx->corruptions_found != 0) + return false; + if (ctx->unfixable_errors != 0) + return false; + + return true; +} + +/* Trim the filesystem, if desired. */ +int +phase8_func( + struct scrub_ctx *ctx) +{ + if (!fstrim_ok(ctx)) return 0; -maybe_trim: fstrim(ctx); progress_add(1); return 0; @@ -51,7 +63,11 @@ phase8_estimate( unsigned int *nr_threads, int *rshift) { - *items = 1; + *items = 0; + + if (fstrim_ok(ctx)) + *items = 1; + *nr_threads = 1; *rshift = 0; return 0; From patchwork Sun Dec 31 22:49:43 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507964 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E92AFC140 for ; Sun, 31 Dec 2023 22:49:44 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="BRdfxGVC" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5FBF5C433C7; Sun, 31 Dec 2023 22:49:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704062984; bh=ZRnDknJbezFFW/Hut6MV1aSUfthgI2IsQECqVn7bYPc=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=BRdfxGVCCLI+h2WjwMeWfoZVLPSpJhwS6Mi0LX0Iruhz/827cI5Zfd+Vx+Qp9+xXW 5Rc3l9uj5p3QtyUPQ4wpV7Y3Fql3GgH15kVtEhiOt4lz4R/DVVsHoY72ODAZj+83hw S+An8ttsGX5qXTnMBx9Zk2UVfaPJfGd3Ue2L2Ea/HwRgjsc1GxnGLWWZFGzaxjYKzK muE+sbl7aRyXkJxda8X8IRNFMpKISQ2rfboPASazBNODj+u2QjyEdkxyeHmcibhw3m ZGtzvLiq9atBUz2EmWZnUfw4RFwK7OnbHBsoJxw4YQ3x6atMjdUiJ153DcIj7SmD/v ykEqiDYUZcjBQ== Date: Sun, 31 Dec 2023 14:49:43 -0800 Subject: [PATCH 5/8] xfs_scrub: report FITRIM errors properly From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001115.1798752.4682959777961980500.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Move the error reporting for the FITRIM ioctl out of vfs.c and into phase8.c. This makes it so that IO errors encountered during trim are counted as runtime errors instead of being dropped silently. Signed-off-by: Darrick J. Wong --- scrub/phase8.c | 12 +++++++++++- scrub/vfs.c | 12 +++++++----- scrub/vfs.h | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/scrub/phase8.c b/scrub/phase8.c index dfe62e8d97b..288800a76cf 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -47,10 +47,20 @@ int phase8_func( struct scrub_ctx *ctx) { + int error; + if (!fstrim_ok(ctx)) return 0; - fstrim(ctx); + error = fstrim(ctx); + if (error == EOPNOTSUPP) + return 0; + + if (error) { + str_liberror(ctx, error, _("fstrim")); + return error; + } + progress_add(1); return 0; } diff --git a/scrub/vfs.c b/scrub/vfs.c index 9e459d6243f..bcfd4f42ca8 100644 --- a/scrub/vfs.c +++ b/scrub/vfs.c @@ -296,15 +296,17 @@ struct fstrim_range { #endif /* Call FITRIM to trim all the unused space in a filesystem. */ -void +int fstrim( struct scrub_ctx *ctx) { struct fstrim_range range = {0}; - int error; range.len = ULLONG_MAX; - error = ioctl(ctx->mnt.fd, FITRIM, &range); - if (error && errno != EOPNOTSUPP && errno != ENOTTY) - perror(_("fstrim")); + if (ioctl(ctx->mnt.fd, FITRIM, &range) == 0) + return 0; + if (errno == EOPNOTSUPP || errno == ENOTTY) + return EOPNOTSUPP; + + return errno; } diff --git a/scrub/vfs.h b/scrub/vfs.h index 1ac41e5aac0..a8a4d72e290 100644 --- a/scrub/vfs.h +++ b/scrub/vfs.h @@ -24,6 +24,6 @@ typedef int (*scan_fs_tree_dirent_fn)(struct scrub_ctx *, const char *, int scan_fs_tree(struct scrub_ctx *ctx, scan_fs_tree_dir_fn dir_fn, scan_fs_tree_dirent_fn dirent_fn, void *arg); -void fstrim(struct scrub_ctx *ctx); +int fstrim(struct scrub_ctx *ctx); #endif /* XFS_SCRUB_VFS_H_ */ From patchwork Sun Dec 31 22:49:59 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507965 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 934F8C127 for ; Sun, 31 Dec 2023 22:50:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qXKT3lKj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0A5F4C433C7; Sun, 31 Dec 2023 22:50:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704063000; bh=YEJeS1h2eZAOK8aADV2kYoZFWYBkRfFGe4tb11paMrs=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=qXKT3lKj/2Dmgc7LrU2DBKKtezTxlj5Ief5DzyUiMP/2Hb8MjJWGgr9WTF0p0noNm i3g+lDPEbm2ZQVntR5tUe/afDgV4RkZNsldmGgpiNZyPIBwMIDJ8KuG/O++ETmwUBU R8feFy4pzffRxzEqFF+Y4h4iT8pgH5udpmSK2isryWjeOicxlUIcOW3PI3TyWqOEaW sr7VElLGr+J+3N8fS11ceMIzv8O7LvkXJbyXHeVOq0CAE/G7MPZhw/YC8FwdRozpQW TLuUKcz5nW0S8xdo2oOtSnHQi3KzDPcKlZHmuutbJNwOaLCvonvOGvkcI9cuH2H/wL tVu1LV0SubfoQ== Date: Sun, 31 Dec 2023 14:49:59 -0800 Subject: [PATCH 6/8] xfs_scrub: don't call FITRIM after runtime errors From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001128.1798752.1623838175857601607.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Don't call FITRIM if there have been runtime errors -- we don't want to touch anything after any kind of unfixable problem. Signed-off-by: Darrick J. Wong --- scrub/phase8.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scrub/phase8.c b/scrub/phase8.c index 288800a76cf..75400c96859 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -39,6 +39,9 @@ fstrim_ok( if (ctx->unfixable_errors != 0) return false; + if (ctx->runtime_errors != 0) + return false; + return true; } From patchwork Sun Dec 31 22:50:15 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507966 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CA1C0C13B for ; Sun, 31 Dec 2023 22:50:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Ip4dtN4L" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9AF75C433C7; Sun, 31 Dec 2023 22:50:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704063015; bh=0ht5OmFX3QmyILDP8MSE0UGuR7FaSBNu08HIuiCyZUU=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=Ip4dtN4LVcfd4sWpMfpcqYP4l0Jr9pWE0zkya+VGvPwZtAxapcdMODGoYvydzD5So H/9KfJXM7OJbAqueyhMPpD0NQ+ksIm/WHEScqsfptxw9YnFKvXGtZkxsaWgcxUaQ8O VKur5SMpVBtrE2ouOQIj5ftwZOoDk56Kb0Jk9ijmydozV7K6QL+Ci1Oc1DxdIhVNBl NiT7cCD5/aUzB2RMCTnOAaOkGXhDvc6Qcv+QmhlDM4JoO/G4MENxwjSa2pHSftYMFh lyy0xf9fqTEkb5vN1zn9EEaytJxXON2Yr6dRK3OyXAI19AbfuSvB2BxejB4CX4NPy/ cJl8ZaDjjXiOQ== Date: Sun, 31 Dec 2023 14:50:15 -0800 Subject: [PATCH 7/8] xfs_scrub: don't trim the first agbno of each AG for better performance From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001142.1798752.5565677389390007777.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong XFS issues discard IOs while holding the free space btree and the AGF buffers locked. If the discard IOs are slow or the free space is extremely fragmented, this can lead to long stalls for every other thread trying to access that AG. On a 10TB high performance flash storage device with a severely fragmented free space btree in every AG, this results in many threads tripping the hangcheck warnings while waiting for the AGF. This happens even after we've run fstrim a few times and waited for the nvme namespace utilization counters to stabilize. Strace for the entire 10TB looks like: ioctl(3, FITRIM, {start=0x0, len=10995116277760, minlen=0}) = 0 <686.209839> Reducing the size of the FITRIM requests to a single AG at a time produces lower times for each individual call, but even this isn't quite acceptable, because the lock hold times are still high enough to cause stall warnings: Strace for the first 4x 1TB AGs looks like (2): ioctl(3, FITRIM, {start=0x0, len=1099511627776, minlen=0}) = 0 <68.352033> ioctl(3, FITRIM, {start=0x10000000000, len=1099511627776, minlen=0}) = 0 <68.760323> ioctl(3, FITRIM, {start=0x20000000000, len=1099511627776, minlen=0}) = 0 <67.235226> ioctl(3, FITRIM, {start=0x30000000000, len=1099511627776, minlen=0}) = 0 <69.465744> The fstrim code has to synchronize discards with block allocations, so we must hold the AGF lock while issuing discard IOs. Breaking up the calls into smaller start/len segments ought to reduce the lock hold time and allow other threads a chance to make progress. Unfortunately, the current fstrim implementation handles this poorly because it walks the entire free space by length index (cntbt) and it's not clear if we can cycle the AGF periodically to reduce latency because there's no less-than btree lookup. The first solution I thought of was to limit latency by scanning parts of an AG at a time, but this doesn't solve the stalling problem when the free space is heavily fragmented because each sub-AG scan has to walk the entire cntbt to find free space that fits within the given range. In fact, this dramatically increases the runtime! Ultimately, I forked the kernel implementations -- for full AG fstrims, it still trims by length. However, for a sub-AG scan, it will walk the bnobt and perform the trims in block number order. Since the cursor has an obviously monotonically increasing value, it is easy to cycle the AGF periodically to allow other threads to do work. This implementation avoids the worst problems of the original code, though it lacks the desirable attribute of freeing the biggest chunks first. This second algorithm is what we want for xfs_scrub, which generally runs as a background service. Skip the first block of each AG to ensure that we get the sub-AG algorithm, Signed-off-by: Darrick J. Wong --- scrub/phase8.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++--------- scrub/vfs.c | 10 ++++++--- scrub/vfs.h | 2 +- 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/scrub/phase8.c b/scrub/phase8.c index 75400c96859..570083be9d8 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -45,29 +45,72 @@ fstrim_ok( return true; } -/* Trim the filesystem, if desired. */ -int -phase8_func( - struct scrub_ctx *ctx) +/* Trim a certain range of the filesystem. */ +static int +fstrim_fsblocks( + struct scrub_ctx *ctx, + uint64_t start_fsb, + uint64_t fsbcount) { + uint64_t start = cvt_off_fsb_to_b(&ctx->mnt, start_fsb); + uint64_t len = cvt_off_fsb_to_b(&ctx->mnt, fsbcount); int error; - if (!fstrim_ok(ctx)) - return 0; - - error = fstrim(ctx); + error = fstrim(ctx, start, len); if (error == EOPNOTSUPP) return 0; - if (error) { - str_liberror(ctx, error, _("fstrim")); + char descr[DESCR_BUFSZ]; + + snprintf(descr, sizeof(descr) - 1, + _("fstrim start 0x%llx len 0x%llx"), + (unsigned long long)start, + (unsigned long long)len); + str_liberror(ctx, error, descr); return error; } + return 0; +} + +/* Trim each AG on the data device. */ +static int +fstrim_datadev( + struct scrub_ctx *ctx) +{ + struct xfs_fsop_geom *geo = &ctx->mnt.fsgeom; + uint64_t fsbno; + int error; + + for (fsbno = 0; fsbno < geo->datablocks; fsbno += geo->agblocks) { + uint64_t fsbcount; + + /* + * Skip the first block of each AG to ensure that we get the + * partial-AG discard implementation, which cycles the AGF lock + * to prevent foreground threads from stalling. + */ + fsbcount = min(geo->datablocks - fsbno + 1, geo->agblocks); + error = fstrim_fsblocks(ctx, fsbno + 1, fsbcount); + if (error) + return error; + } + progress_add(1); return 0; } +/* Trim the filesystem, if desired. */ +int +phase8_func( + struct scrub_ctx *ctx) +{ + if (!fstrim_ok(ctx)) + return 0; + + return fstrim_datadev(ctx); +} + /* Estimate how much work we're going to do. */ int phase8_estimate( diff --git a/scrub/vfs.c b/scrub/vfs.c index bcfd4f42ca8..cc958ba9438 100644 --- a/scrub/vfs.c +++ b/scrub/vfs.c @@ -298,11 +298,15 @@ struct fstrim_range { /* Call FITRIM to trim all the unused space in a filesystem. */ int fstrim( - struct scrub_ctx *ctx) + struct scrub_ctx *ctx, + uint64_t start, + uint64_t len) { - struct fstrim_range range = {0}; + struct fstrim_range range = { + .start = start, + .len = len, + }; - range.len = ULLONG_MAX; if (ioctl(ctx->mnt.fd, FITRIM, &range) == 0) return 0; if (errno == EOPNOTSUPP || errno == ENOTTY) diff --git a/scrub/vfs.h b/scrub/vfs.h index a8a4d72e290..1af8d80d1de 100644 --- a/scrub/vfs.h +++ b/scrub/vfs.h @@ -24,6 +24,6 @@ typedef int (*scan_fs_tree_dirent_fn)(struct scrub_ctx *, const char *, int scan_fs_tree(struct scrub_ctx *ctx, scan_fs_tree_dir_fn dir_fn, scan_fs_tree_dirent_fn dirent_fn, void *arg); -int fstrim(struct scrub_ctx *ctx); +int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len); #endif /* XFS_SCRUB_VFS_H_ */ From patchwork Sun Dec 31 22:50:30 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507967 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BDE07C13B for ; Sun, 31 Dec 2023 22:50:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VqkMWEEc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3B12BC433C8; Sun, 31 Dec 2023 22:50:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704063031; bh=kNOJPAybs4rduIY4cRV6niX6HFC1S42BbrEyJTtlhZo=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=VqkMWEEcxqsUQ29Pbpi79RcCl/ACkR5WG7SgjWGmIeIjFOurbtIWeOAg1R8NuRifi Xfujm0FG++u+yIKMm4cYSGqFCuQYSLyRQ8uIjcGsvtp+1czZbn+Fj0+KILZTNI+YOx eKUynYfGVr8W0oHHrPCVJvWligda7HuLEOUjX1/x4eZ1F+uhGRJis+L+hTY3VtQZ35 j8dDTzi0NJlC82fuzG3zciD/gJ0TSGb42zO7PbDNtYI3sBxfMx03xpTyZCp+5ONLx7 L8cHWtDxGLzIZ+yAltY96H0eiPl+e/ffxETDLXJX8X0Lv66xRP+5tFPmpIJW/nh2iY WJUCphyNLrxpw== Date: Sun, 31 Dec 2023 14:50:30 -0800 Subject: [PATCH 8/8] xfs_scrub: improve progress meter for phase 8 fstrimming From: "Darrick J. Wong" To: djwong@kernel.org, cem@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170405001155.1798752.5966040935670328891.stgit@frogsfrogsfrogs> In-Reply-To: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> References: <170405001045.1798752.4380751003208751209.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Currently, progress reporting in phase 8 is awful, because we stall at 0% until jumping to 100%. Since we're now performing sub-AG fstrim calls to limit the latency impacts to the rest of the system, we might as well limit the FSTRIM scan size so that we can report status updates to the user more regularly. Doing so also facilitates CPU usage control during phase 8. Signed-off-by: Darrick J. Wong --- scrub/phase8.c | 59 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/scrub/phase8.c b/scrub/phase8.c index 570083be9d8..f1a854a3e56 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -45,6 +45,13 @@ fstrim_ok( return true; } +/* + * Limit the amount of fstrim scanning that we let the kernel do in a single + * call so that we can implement decent progress reporting and CPU resource + * control. Pick a prime number of gigabytes for interest. + */ +#define FSTRIM_MAX_BYTES (11ULL << 30) + /* Trim a certain range of the filesystem. */ static int fstrim_fsblocks( @@ -56,18 +63,31 @@ fstrim_fsblocks( uint64_t len = cvt_off_fsb_to_b(&ctx->mnt, fsbcount); int error; - error = fstrim(ctx, start, len); - if (error == EOPNOTSUPP) - return 0; - if (error) { - char descr[DESCR_BUFSZ]; - - snprintf(descr, sizeof(descr) - 1, - _("fstrim start 0x%llx len 0x%llx"), - (unsigned long long)start, - (unsigned long long)len); - str_liberror(ctx, error, descr); - return error; + while (len > 0) { + uint64_t run; + + run = min(len, FSTRIM_MAX_BYTES); + + error = fstrim(ctx, start, run); + if (error == EOPNOTSUPP) { + /* Pretend we finished all the work. */ + progress_add(len); + return 0; + } + if (error) { + char descr[DESCR_BUFSZ]; + + snprintf(descr, sizeof(descr) - 1, + _("fstrim start 0x%llx run 0x%llx"), + (unsigned long long)start, + (unsigned long long)run); + str_liberror(ctx, error, descr); + return error; + } + + progress_add(run); + len -= run; + start += run; } return 0; @@ -90,13 +110,13 @@ fstrim_datadev( * partial-AG discard implementation, which cycles the AGF lock * to prevent foreground threads from stalling. */ + progress_add(geo->blocksize); fsbcount = min(geo->datablocks - fsbno + 1, geo->agblocks); error = fstrim_fsblocks(ctx, fsbno + 1, fsbcount); if (error) return error; } - progress_add(1); return 0; } @@ -119,12 +139,13 @@ phase8_estimate( unsigned int *nr_threads, int *rshift) { - *items = 0; - - if (fstrim_ok(ctx)) - *items = 1; - + if (fstrim_ok(ctx)) { + *items = cvt_off_fsb_to_b(&ctx->mnt, + ctx->mnt.fsgeom.datablocks); + } else { + *items = 0; + } *nr_threads = 1; - *rshift = 0; + *rshift = 30; /* GiB */ return 0; }