From patchwork Sun Dec 31 20:35:11 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507438 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B777ABA2B for ; Sun, 31 Dec 2023 20:35:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="lBMZHtYl" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 285A6C433C7; Sun, 31 Dec 2023 20:35:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704054912; bh=k7dBTG01g9t36WAJmN2IrVgGxemPZ8ZKDyxDjyYLjBE=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=lBMZHtYlaG3gNP5eZN+kTdZ0jdV+nlQ5fFcHfPJ/NZtlfVoB5V50P3NNTTEccIUFi zS1DsgU2/Mm0q21UWoKYY8jffhMC1vXJbikS11uy6A26b4o1pzteBTssSLImx3J2Ls TXKiQlcnpQSgxSV+QqUmFcW1L/4dgKYpfZAtbLs/3x8T2cn0vchEwJqEbl+fkAWUSG 7SM8A0tXCyiKlHmRzdSSpOw1V6SPW34gpGenEDgPtUd3VZf9jcRZDsBcNNV1nWX7lb UG0827qRi8NjXcmJrzqkOq8giDfgrVKwBEbSbQdA3a6EirAeKZ057Nz9gNV2bUdPAU ZLwz8lioWXSDg== Date: Sun, 31 Dec 2023 12:35:11 -0800 Subject: [PATCH 1/6] xfs: create a blob array data structure From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404835229.1753315.13978723246161515244.stgit@frogsfrogsfrogs> In-Reply-To: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> References: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Create a simple 'blob array' data structure for storage of arbitrarily sized metadata objects that will be used to reconstruct metadata. For the intended usage (temporarily storing extended attribute names and values) we only have to support storing objects and retrieving them. Use the xfile abstraction to store the attribute information in memory that can be swapped out. Signed-off-by: Darrick J. Wong --- fs/xfs/Makefile | 1 fs/xfs/scrub/xfblob.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/xfblob.h | 24 ++++++++ 3 files changed, 176 insertions(+) create mode 100644 fs/xfs/scrub/xfblob.c create mode 100644 fs/xfs/scrub/xfblob.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 62e38f70c304b..72df9890edcf6 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -208,6 +208,7 @@ xfs-y += $(addprefix scrub/, \ repair.o \ rmap_repair.o \ tempfile.o \ + xfblob.o \ xfbtree.o \ ) diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c new file mode 100644 index 0000000000000..216f9cb2965a7 --- /dev/null +++ b/fs/xfs/scrub/xfblob.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2021-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "scrub/scrub.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" + +/* + * XFS Blob Storage + * ================ + * Stores and retrieves blobs using an xfile. Objects are appended to the file + * and the offset is returned as a magic cookie for retrieval. + */ + +#define XB_KEY_MAGIC 0xABAADDAD +struct xb_key { + uint32_t xb_magic; /* XB_KEY_MAGIC */ + uint32_t xb_size; /* size of the blob, in bytes */ + loff_t xb_offset; /* byte offset of this key */ + /* blob comes after here */ +} __packed; + +/* Initialize a blob storage object. */ +int +xfblob_create( + const char *description, + struct xfblob **blobp) +{ + struct xfblob *blob; + struct xfile *xfile; + int error; + + error = xfile_create(description, 0, &xfile); + if (error) + return error; + + blob = kmalloc(sizeof(struct xfblob), XCHK_GFP_FLAGS); + if (!blob) { + error = -ENOMEM; + goto out_xfile; + } + + blob->xfile = xfile; + blob->last_offset = PAGE_SIZE; + + *blobp = blob; + return 0; + +out_xfile: + xfile_destroy(xfile); + return error; +} + +/* Destroy a blob storage object. */ +void +xfblob_destroy( + struct xfblob *blob) +{ + xfile_destroy(blob->xfile); + kfree(blob); +} + +/* Retrieve a blob. */ +int +xfblob_load( + struct xfblob *blob, + xfblob_cookie cookie, + void *ptr, + uint32_t size) +{ + struct xb_key key; + int error; + + error = xfile_obj_load(blob->xfile, &key, sizeof(key), cookie); + if (error) + return error; + + if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) { + ASSERT(0); + return -ENODATA; + } + if (size < key.xb_size) { + ASSERT(0); + return -EFBIG; + } + + return xfile_obj_load(blob->xfile, ptr, key.xb_size, + cookie + sizeof(key)); +} + +/* Store a blob. */ +int +xfblob_store( + struct xfblob *blob, + xfblob_cookie *cookie, + const void *ptr, + uint32_t size) +{ + struct xb_key key = { + .xb_offset = blob->last_offset, + .xb_magic = XB_KEY_MAGIC, + .xb_size = size, + }; + loff_t pos = blob->last_offset; + int error; + + error = xfile_obj_store(blob->xfile, &key, sizeof(key), pos); + if (error) + return error; + + pos += sizeof(key); + error = xfile_obj_store(blob->xfile, ptr, size, pos); + if (error) + goto out_err; + + *cookie = blob->last_offset; + blob->last_offset += sizeof(key) + size; + return 0; +out_err: + xfile_discard(blob->xfile, blob->last_offset, sizeof(key)); + return error; +} + +/* Free a blob. */ +int +xfblob_free( + struct xfblob *blob, + xfblob_cookie cookie) +{ + struct xb_key key; + int error; + + error = xfile_obj_load(blob->xfile, &key, sizeof(key), cookie); + if (error) + return error; + + if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) { + ASSERT(0); + return -ENODATA; + } + + xfile_discard(blob->xfile, cookie, sizeof(key) + key.xb_size); + return 0; +} diff --git a/fs/xfs/scrub/xfblob.h b/fs/xfs/scrub/xfblob.h new file mode 100644 index 0000000000000..bd98647407f1d --- /dev/null +++ b/fs/xfs/scrub/xfblob.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_XFBLOB_H__ +#define __XFS_SCRUB_XFBLOB_H__ + +struct xfblob { + struct xfile *xfile; + loff_t last_offset; +}; + +typedef loff_t xfblob_cookie; + +int xfblob_create(const char *descr, struct xfblob **blobp); +void xfblob_destroy(struct xfblob *blob); +int xfblob_load(struct xfblob *blob, xfblob_cookie cookie, void *ptr, + uint32_t size); +int xfblob_store(struct xfblob *blob, xfblob_cookie *cookie, const void *ptr, + uint32_t size); +int xfblob_free(struct xfblob *blob, xfblob_cookie cookie); + +#endif /* __XFS_SCRUB_XFBLOB_H__ */ From patchwork Sun Dec 31 20:35:27 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507439 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EAFA7BA2E for ; Sun, 31 Dec 2023 20:35:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="hMHbweIN" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B9B44C433C8; Sun, 31 Dec 2023 20:35:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704054927; bh=VdZDDDq+tgGs7GYXCjxVC+0FT8uaAnEEc55RtoaQP88=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=hMHbweINTMCIKHMFQqbZGubDHT6ZxCkpK+WDsLOHIxw+pRrsIyVNARxQJPrNZaqyu xgM/ibawr46+mxwtP/2twU0gA2iijs1LjWS5szRbocyU+ABg9oG5fXt9FjUUo9dRvX HInIiShNYO3hgM33CmO7AOJtbo8BQhghqDmZ+5r6HlcdqDRzDhBR3I6RdXo8Ym8WgV R/wfnIAABaajMcozgAwbDw+98bPvg4UWmfQ4yhaEpllAFZooMaAsyFXIBn/iYvuml3 CNSs5s1hObLrETpgt47ekdBaAworrSGofKsoHLwwKpNTZUjm20LRYY6FVLZdMQ8EWO oW6hQ8A88wkXw== Date: Sun, 31 Dec 2023 12:35:27 -0800 Subject: [PATCH 2/6] xfs: use atomic extent swapping to fix user file fork data From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404835245.1753315.12607896932356278287.stgit@frogsfrogsfrogs> In-Reply-To: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> References: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Build on the code that was recently added to the temporary repair file code so that we can atomically switch the contents of any file fork, even if the fork is in local format. The upcoming functions to repair xattrs, directories, and symlinks will need that capability. Repair can lock out access to these user files by holding IOLOCK_EXCL on these user files. Therefore, it is safe to drop the ILOCK of both the file being repaired and the tempfile being used for staging, and cancel the scrub transaction. We do this so that we can reuse the resource estimation and transaction allocation functions used by a regular file exchange operation. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_swapext.c | 2 fs/xfs/libxfs/xfs_swapext.h | 1 fs/xfs/scrub/tempfile.c | 203 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/tempfile.h | 3 + fs/xfs/scrub/tempswap.h | 2 5 files changed, 210 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_swapext.c b/fs/xfs/libxfs/xfs_swapext.c index 554da23a575eb..244ef3d8431fd 100644 --- a/fs/xfs/libxfs/xfs_swapext.c +++ b/fs/xfs/libxfs/xfs_swapext.c @@ -769,7 +769,7 @@ xfs_swapext_rmapbt_blocks( } /* Estimate the bmbt and rmapbt overhead required to exchange extents. */ -static int +int xfs_swapext_estimate_overhead( struct xfs_swapext_req *req) { diff --git a/fs/xfs/libxfs/xfs_swapext.h b/fs/xfs/libxfs/xfs_swapext.h index 37842a4ee9a6d..a4768eddc9c8c 100644 --- a/fs/xfs/libxfs/xfs_swapext.h +++ b/fs/xfs/libxfs/xfs_swapext.h @@ -200,6 +200,7 @@ unsigned int xfs_swapext_reflink_prep(const struct xfs_swapext_req *req); void xfs_swapext_reflink_finish(struct xfs_trans *tp, const struct xfs_swapext_req *req, unsigned int reflink_state); +int xfs_swapext_estimate_overhead(struct xfs_swapext_req *req); int xfs_swapext_estimate(struct xfs_swapext_req *req); extern struct kmem_cache *xfs_swapext_intent_cache; diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index a1736a3556a7d..f1726822e18f7 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -239,6 +239,28 @@ xrep_tempfile_iunlock( sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL; } +/* + * Begin the process of making changes to both the file being scrubbed and + * the temporary file by taking ILOCK_EXCL on both. + */ +void +xrep_tempfile_ilock_both( + struct xfs_scrub *sc) +{ + xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL); + sc->ilock_flags |= XFS_ILOCK_EXCL; + sc->temp_ilock_flags |= XFS_ILOCK_EXCL; +} + +/* Unlock ILOCK_EXCL on both files. */ +void +xrep_tempfile_iunlock_both( + struct xfs_scrub *sc) +{ + xrep_tempfile_iunlock(sc); + xchk_iunlock(sc, XFS_ILOCK_EXCL); +} + /* Release the temporary file. */ void xrep_tempfile_rele( @@ -526,6 +548,88 @@ xrep_tempswap_prep_request( return 0; } +/* + * Fill out the swapext resource estimation structures in preparation for + * swapping the contents of a metadata file that we've rebuilt in the temp + * file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files. + */ +STATIC int +xrep_tempswap_estimate( + struct xfs_scrub *sc, + struct xrep_tempswap *tx) +{ + struct xfs_swapext_req *req = &tx->req; + struct xfs_ifork *ifp; + struct xfs_ifork *tifp; + int state = 0; + + /* + * Deal with either fork being in local format. The swapext code only + * knows how to exchange block mappings for regular files, so we only + * have to know about local format for xattrs and directories. + */ + ifp = xfs_ifork_ptr(sc->ip, req->whichfork); + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) + state |= 1; + + tifp = xfs_ifork_ptr(sc->tempip, req->whichfork); + if (tifp->if_format == XFS_DINODE_FMT_LOCAL) + state |= 2; + + switch (state) { + case 0: + /* Both files have mapped extents; use the regular estimate. */ + return xfs_xchg_range_estimate(req); + case 1: + /* + * The file being repaired is in local format, but the temp + * file has mapped extents. To perform the swap, the file + * being repaired must have its shorform data converted to a + * fsblock, and the fork changed to extents format. We need + * one resblk for the conversion; the number of exchanges is + * (worst case) the temporary file's extent count plus the + * block we converted. + */ + req->ip1_bcount = sc->tempip->i_nblocks; + req->ip2_bcount = 1; + req->nr_exchanges = 1 + tifp->if_nextents; + req->resblks = 1; + break; + case 2: + /* + * The temporary file is in local format, but the file being + * repaired has mapped extents. To perform the swap, the temp + * file must have its shortform data converted to an fsblock, + * and the fork changed to extents format. We need one resblk + * for the conversion; the number of exchanges is (worst case) + * the extent count of the file being repaired plus the block + * we converted. + */ + req->ip1_bcount = 1; + req->ip2_bcount = sc->ip->i_nblocks; + req->nr_exchanges = 1 + ifp->if_nextents; + req->resblks = 1; + break; + case 3: + /* + * Both forks are in local format. To perform the swap, both + * files must have their shortform data converted to fsblocks, + * and both forks must be converted to extents format. We + * need two resblks for the two conversions, and the number of + * exchanges is 1 since there's only one block at fileoff 0. + * Presumably, the caller could not exchange the two inode fork + * areas directly. + */ + req->ip1_bcount = 1; + req->ip2_bcount = 1; + req->nr_exchanges = 1; + req->resblks = 2; + break; + } + + return xfs_swapext_estimate_overhead(req); +} + /* * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip * this if quota enforcement is disabled or if both inodes' dquots are the @@ -616,6 +720,55 @@ xrep_tempswap_trans_reserve( return xrep_tempswap_reserve_quota(sc, tx); } +/* + * Create a new transaction for a swap. + * + * This function fills out the swapext request and resource estimation + * structures in preparation for swapping the contents of a metadata file that + * has been rebuilt in the temp file. Next, it reserves space, takes + * ILOCK_EXCL of both inodes, joins them to the transaction and reserves quota + * for the transaction. + * + * The caller is responsible for dropping both ILOCKs when appropriate. + */ +int +xrep_tempswap_trans_alloc( + struct xfs_scrub *sc, + int whichfork, + struct xrep_tempswap *tx) +{ + unsigned int flags = 0; + int error; + + ASSERT(sc->tp == NULL); + + error = xrep_tempswap_prep_request(sc, whichfork, tx); + if (error) + return error; + + error = xrep_tempswap_estimate(sc, tx); + if (error) + return error; + + if (xfs_has_lazysbcount(sc->mp)) + flags |= XFS_TRANS_RES_FDBLKS; + + error = xrep_tempswap_grab_log_assist(sc); + if (error) + return error; + + error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, + tx->req.resblks, 0, flags, &sc->tp); + if (error) + return error; + + sc->temp_ilock_flags |= XFS_ILOCK_EXCL; + sc->ilock_flags |= XFS_ILOCK_EXCL; + xfs_xchg_range_ilock(sc->tp, sc->ip, sc->tempip); + + return xrep_tempswap_reserve_quota(sc, tx); +} + /* * Swap forks between the file being repaired and the temporary file. Returns * with both inodes locked and joined to a clean scrub transaction. @@ -650,3 +803,53 @@ xrep_tempswap_contents( return 0; } + +/* + * Write local format data from one of the temporary file's forks into the same + * fork of file being repaired, and swap the file sizes, if appropriate. + * Caller must ensure that the file being repaired has enough fork space to + * hold all the bytes. + */ +void +xrep_tempfile_copyout_local( + struct xfs_scrub *sc, + int whichfork) +{ + struct xfs_ifork *temp_ifp; + struct xfs_ifork *ifp; + unsigned int ilog_flags = XFS_ILOG_CORE; + + temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork); + ifp = xfs_ifork_ptr(sc->ip, whichfork); + + ASSERT(temp_ifp != NULL); + ASSERT(ifp != NULL); + ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); + + switch (whichfork) { + case XFS_DATA_FORK: + ASSERT(sc->tempip->i_disk_size <= + xfs_inode_data_fork_size(sc->ip)); + break; + case XFS_ATTR_FORK: + ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff); + break; + default: + ASSERT(0); + return; + } + + /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */ + xfs_idestroy_fork(ifp); + xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_u1.if_data, + temp_ifp->if_bytes); + + if (whichfork == XFS_DATA_FORK) { + i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); + sc->ip->i_disk_size = sc->tempip->i_disk_size; + } + + ilog_flags |= xfs_ilog_fdata(whichfork); + xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags); +} diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h index 7980f9c4de552..d57e4f145a7c8 100644 --- a/fs/xfs/scrub/tempfile.h +++ b/fs/xfs/scrub/tempfile.h @@ -17,6 +17,8 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc); void xrep_tempfile_ilock(struct xfs_scrub *sc); bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc); void xrep_tempfile_iunlock(struct xfs_scrub *sc); +void xrep_tempfile_iunlock_both(struct xfs_scrub *sc); +void xrep_tempfile_ilock_both(struct xfs_scrub *sc); int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off, xfs_filblks_t len); @@ -32,6 +34,7 @@ int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off, int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize); int xrep_tempfile_roll_trans(struct xfs_scrub *sc); +void xrep_tempfile_copyout_local(struct xfs_scrub *sc, int whichfork); #else static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc) { diff --git a/fs/xfs/scrub/tempswap.h b/fs/xfs/scrub/tempswap.h index e8f8a6e3c8861..83900eef8cfc5 100644 --- a/fs/xfs/scrub/tempswap.h +++ b/fs/xfs/scrub/tempswap.h @@ -14,6 +14,8 @@ struct xrep_tempswap { int xrep_tempswap_grab_log_assist(struct xfs_scrub *sc); int xrep_tempswap_trans_reserve(struct xfs_scrub *sc, int whichfork, struct xrep_tempswap *ti); +int xrep_tempswap_trans_alloc(struct xfs_scrub *sc, int whichfork, + struct xrep_tempswap *ti); int xrep_tempswap_contents(struct xfs_scrub *sc, struct xrep_tempswap *ti); #endif /* CONFIG_XFS_ONLINE_REPAIR */ From patchwork Sun Dec 31 20:35:42 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507440 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AEC7FBA2B for ; Sun, 31 Dec 2023 20:35:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="BIDW5f+Q" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6A9E0C433C7; Sun, 31 Dec 2023 20:35:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704054943; bh=sP53tHMcPuNOoSrYMb8rDmrmP7g+h9ovqRxSqiubDRE=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=BIDW5f+Q1xbwluSxHltkTRlCxhxjg4TK5IsG3FWZmTfON7NsmlBnrJxGZAw28YL3c F9kZgX5aJ5sqqEdGQBT3Whzr9z3KMzNbBCPzFMpV7RjaP3XdmelH0wsAihnkDTQtr4 2KTUxJM93txKxcQn8EzGVDZdvXG6xfp23awPRMAbbgB9RyZU/a3uyPVq/M6/v5B+d3 wAQywfIqd8WLxQxG7hM/LgLdLe1yvGFkYAHyxUHzPoayvd5eShMyIdt9lfyfU1tvdn B2CDd/wW3sQlOClxGfNOyGmObXZ1qXdX2q5Ryf5y0eZ/nKxqg6feJl5lFo+0JC/cPo A2ByddVohbRvg== Date: Sun, 31 Dec 2023 12:35:42 -0800 Subject: [PATCH 3/6] xfs: repair extended attributes From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404835262.1753315.12576056577179918983.stgit@frogsfrogsfrogs> In-Reply-To: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> References: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong If the extended attributes look bad, try to sift through the rubble to find whatever keys/values we can, stage a new attribute structure in a temporary file and use the atomic extent swapping mechanism to commit the results in bulk. Signed-off-by: Darrick J. Wong --- fs/xfs/Makefile | 1 fs/xfs/libxfs/xfs_attr.c | 2 fs/xfs/libxfs/xfs_attr.h | 2 fs/xfs/libxfs/xfs_da_format.h | 5 fs/xfs/scrub/attr.c | 20 + fs/xfs/scrub/attr.h | 7 fs/xfs/scrub/attr_repair.c | 1203 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/attr_repair.h | 11 fs/xfs/scrub/repair.c | 46 ++ fs/xfs/scrub/repair.h | 6 fs/xfs/scrub/scrub.c | 2 fs/xfs/scrub/trace.h | 83 +++ fs/xfs/scrub/xfarray.c | 17 + fs/xfs/scrub/xfarray.h | 2 fs/xfs/scrub/xfblob.c | 17 + fs/xfs/scrub/xfblob.h | 2 fs/xfs/scrub/xfile.h | 12 fs/xfs/xfs_buf.c | 3 fs/xfs/xfs_trace.h | 2 19 files changed, 1439 insertions(+), 4 deletions(-) create mode 100644 fs/xfs/scrub/attr_repair.c create mode 100644 fs/xfs/scrub/attr_repair.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 72df9890edcf6..9b227be3e28b6 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -194,6 +194,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) xfs-y += $(addprefix scrub/, \ agheader_repair.o \ alloc_repair.o \ + attr_repair.o \ bmap_repair.o \ cow_repair.o \ fscounters_repair.o \ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 1e94e933d7682..b002ddd5f05a2 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1047,7 +1047,7 @@ xfs_attr_set( * External routines when attribute list is inside the inode *========================================================================*/ -static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) +int xfs_attr_sf_totsize(struct xfs_inode *dp) { struct xfs_attr_shortform *sf; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 81be9b3e40047..e4f55008552b4 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -618,4 +618,6 @@ extern struct kmem_cache *xfs_attr_intent_cache; int __init xfs_attr_intent_init_cache(void); void xfs_attr_intent_destroy_cache(void); +int xfs_attr_sf_totsize(struct xfs_inode *dp); + #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 44748f1640e53..0e1ada44f21ba 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -716,6 +716,11 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT) #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NAMESPACE_STR \ + { XFS_ATTR_LOCAL, "local" }, \ + { XFS_ATTR_ROOT, "root" }, \ + { XFS_ATTR_SECURE, "secure" } + /* * Alignment for namelist and valuelist entries (since they are mixed * there can be only one alignment value) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 40a59b24c209f..692e1b2837bbb 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -10,6 +10,7 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_log_format.h" +#include "xfs_trans.h" #include "xfs_inode.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" @@ -20,6 +21,7 @@ #include "scrub/common.h" #include "scrub/dabtree.h" #include "scrub/attr.h" +#include "scrub/repair.h" /* Free the buffers linked from the xattr buffer. */ static void @@ -35,6 +37,8 @@ xchk_xattr_buf_cleanup( kvfree(ab->value); ab->value = NULL; ab->value_sz = 0; + kvfree(ab->name); + ab->name = NULL; } /* @@ -65,7 +69,7 @@ xchk_xattr_want_freemap( * reallocating the buffer if necessary. Buffer contents are not preserved * across a reallocation. */ -static int +int xchk_setup_xattr_buf( struct xfs_scrub *sc, size_t value_size) @@ -95,6 +99,12 @@ xchk_setup_xattr_buf( return -ENOMEM; } + if (xchk_could_repair(sc)) { + ab->name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS); + if (!ab->name) + return -ENOMEM; + } + resize_value: if (ab->value_sz >= value_size) return 0; @@ -121,6 +131,12 @@ xchk_setup_xattr( { int error; + if (xchk_could_repair(sc)) { + error = xrep_setup_xattr(sc); + if (error) + return error; + } + /* * We failed to get memory while checking attrs, so this time try to * get all the memory we're ever going to need. Allocate the buffer @@ -247,7 +263,7 @@ xchk_xattr_listent( * Within a char, the lowest bit of the char represents the byte with * the smallest address */ -STATIC bool +bool xchk_xattr_set_map( struct xfs_scrub *sc, unsigned long *map, diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h index 48fd9402c4328..7db58af56646b 100644 --- a/fs/xfs/scrub/attr.h +++ b/fs/xfs/scrub/attr.h @@ -16,9 +16,16 @@ struct xchk_xattr_buf { /* Bitmap of free space in xattr leaf blocks. */ unsigned long *freemap; + /* Memory buffer used to hold salvaged xattr names. */ + unsigned char *name; + /* Memory buffer used to extract xattr values. */ void *value; size_t value_sz; }; +bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map, + unsigned int start, unsigned int len); +int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size); + #endif /* __XFS_SCRUB_ATTR_H__ */ diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c new file mode 100644 index 0000000000000..9a88d46392626 --- /dev/null +++ b/fs/xfs/scrub/attr_repair.c @@ -0,0 +1,1203 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_swapext.h" +#include "xfs_xchgrange.h" +#include "xfs_acl.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" +#include "scrub/tempswap.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" +#include "scrub/attr.h" +#include "scrub/reap.h" +#include "scrub/attr_repair.h" + +/* + * Extended Attribute Repair + * ========================= + * + * We repair extended attributes by reading the attr leaf blocks looking for + * attributes entries that look salvageable (name passes verifiers, value can + * be retrieved, etc). Each extended attribute worth salvaging is stashed in + * memory, and the stashed entries are periodically replayed into a temporary + * file to constrain memory use. Batching the construction of the temporary + * extended attribute structure in this fashion reduces lock cycling of the + * file being repaired and the temporary file. + * + * When salvaging completes, the remaining stashed attributes are replayed to + * the temporary file. An atomic extent swap is used to commit the new xattr + * blocks to the file being repaired. This will disrupt attrmulti cursors. + */ + +struct xrep_xattr_key { + /* Cookie for retrieval of the xattr name. */ + xfblob_cookie name_cookie; + + /* Cookie for retrieval of the xattr value. */ + xfblob_cookie value_cookie; + + /* XFS_ATTR_* flags */ + int flags; + + /* Length of the value and name. */ + uint32_t valuelen; + uint16_t namelen; +}; + +/* + * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write + * them to the temp file. + */ +#define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8) + +struct xrep_xattr { + struct xfs_scrub *sc; + + /* Information for swapping attr forks at the end. */ + struct xrep_tempswap tx; + + /* xattr keys */ + struct xfarray *xattr_records; + + /* xattr values */ + struct xfblob *xattr_blobs; + + /* Number of attributes that we are salvaging. */ + unsigned long long attrs_found; +}; + +/* Set up to recreate the extended attributes. */ +int +xrep_setup_xattr( + struct xfs_scrub *sc) +{ + return xrep_tempfile_create(sc, S_IFREG); +} + +/* + * Decide if we want to salvage this attribute. We don't bother with + * incomplete or oversized keys or values. The @value parameter can be null + * for remote attrs. + */ +STATIC int +xrep_xattr_want_salvage( + struct xrep_xattr *rx, + unsigned int attr_flags, + const void *name, + int namelen, + const void *value, + int valuelen) +{ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return false; + if (namelen > XATTR_NAME_MAX || namelen <= 0) + return false; + if (valuelen > XATTR_SIZE_MAX || valuelen < 0) + return false; + return true; +} + +/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */ +STATIC int +xrep_xattr_salvage_key( + struct xrep_xattr *rx, + int flags, + unsigned char *name, + int namelen, + unsigned char *value, + int valuelen) +{ + struct xrep_xattr_key key = { + .valuelen = valuelen, + .flags = flags & XFS_ATTR_NSP_ONDISK_MASK, + }; + unsigned int i = 0; + int error = 0; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + /* + * Truncate the name to the first character that would trip namecheck. + * If we no longer have a name after that, ignore this attribute. + */ + while (i < namelen && name[i] != 0) + i++; + if (i == 0) + return 0; + key.namelen = i; + + trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen, + valuelen); + + error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name, + key.namelen); + if (error) + return error; + + error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value, + key.valuelen); + if (error) + return error; + + error = xfarray_append(rx->xattr_records, &key); + if (error) + return error; + + rx->attrs_found++; + return 0; +} + +/* + * Record a shortform extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_sf_attr( + struct xrep_xattr *rx, + struct xfs_attr_shortform *sf, + struct xfs_attr_sf_entry *sfe) +{ + struct xfs_scrub *sc = rx->sc; + struct xchk_xattr_buf *ab = sc->buf; + unsigned char *name = sfe->nameval; + unsigned char *value = &sfe->nameval[sfe->namelen]; + + if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)sf, + sfe->namelen)) + return 0; + + if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)sf, + sfe->valuelen)) + return 0; + + if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen)) + return 0; + + return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen); +} + +/* + * Record a local format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_local_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_local *lentry) +{ + struct xchk_xattr_buf *ab = rx->sc->buf; + unsigned char *value; + unsigned int valuelen; + unsigned int namesize; + + /* + * Decode the leaf local entry format. If something seems wrong, we + * junk the attribute. + */ + value = &lentry->nameval[lentry->namelen]; + valuelen = be16_to_cpu(lentry->valuelen); + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen); + if ((char *)lentry + namesize > buf_end) + return 0; + if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) + return 0; + + /* Try to save this attribute. */ + return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen); +} + +/* + * Record a remote format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_remote_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_remote *rentry, + unsigned int ent_idx, + struct xfs_buf *leaf_bp) +{ + struct xfs_da_args args = { + .trans = rx->sc->tp, + .dp = rx->sc->ip, + .index = ent_idx, + .geo = rx->sc->mp->m_attr_geo, + .owner = rx->sc->ip->i_ino, + }; + struct xchk_xattr_buf *ab = rx->sc->buf; + unsigned int valuelen; + unsigned int namesize; + int error; + + /* + * Decode the leaf remote entry format. If something seems wrong, we + * junk the attribute. Note that we should never find a zero-length + * remote attribute value. + */ + valuelen = be32_to_cpu(rentry->valuelen); + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); + if ((char *)rentry + namesize > buf_end) + return 0; + if (valuelen == 0 || + !xrep_xattr_want_salvage(rx, ent->flags, rentry->name, + rentry->namelen, NULL, valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) + return 0; + + /* + * Enlarge the buffer (if needed) to hold the value that we're trying + * to salvage from the old extended attribute data. + */ + error = xchk_setup_xattr_buf(rx->sc, valuelen); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) + return error; + + /* Look up the remote value and stash it for reconstruction. */ + args.valuelen = valuelen; + args.namelen = rentry->namelen; + args.name = rentry->name; + args.value = ab->value; + error = xfs_attr3_leaf_getvalue(leaf_bp, &args); + if (error || args.rmtblkno == 0) + goto err_free; + + error = xfs_attr_rmtval_get(&args); + if (error) + goto err_free; + + /* Try to save this attribute. */ + error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name, + rentry->namelen, ab->value, valuelen); +err_free: + /* remote value was garbage, junk it */ + if (error == -EFSBADCRC || error == -EFSCORRUPTED) + error = 0; + return error; +} + +/* Extract every xattr key that we can from this attr fork block. */ +STATIC int +xrep_xattr_recover_leaf( + struct xrep_xattr *rx, + struct xfs_buf *bp) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xfs_scrub *sc = rx->sc; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + struct xfs_attr_leaf_entry *ent; + struct xfs_attr_leaf_entry *entries; + struct xchk_xattr_buf *ab = rx->sc->buf; + char *buf_end; + size_t off; + unsigned int nameidx; + unsigned int hdrsize; + int i; + int error = 0; + + bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); + + /* Check the leaf header */ + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + hdrsize = xfs_attr3_leaf_hdr_size(leaf); + xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize); + entries = xfs_attr3_leaf_entryp(leaf); + + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; + for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { + if (xchk_should_terminate(sc, &error)) + return error; + + /* Skip key if it conflicts with something else? */ + off = (char *)ent - (char *)leaf; + if (!xchk_xattr_set_map(sc, ab->usedmap, off, + sizeof(xfs_attr_leaf_entry_t))) + continue; + + /* Check the name information. */ + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < leafhdr.firstused || + nameidx >= mp->m_attr_geo->blksize) + continue; + + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = xfs_attr3_leaf_name_local(leaf, i); + error = xrep_xattr_salvage_local_attr(rx, ent, nameidx, + buf_end, lentry); + } else { + rentry = xfs_attr3_leaf_name_remote(leaf, i); + error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx, + buf_end, rentry, i, bp); + } + if (error) + return error; + } + + return 0; +} + +/* Try to recover shortform attrs. */ +STATIC int +xrep_xattr_recover_sf( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + struct xchk_xattr_buf *ab = sc->buf; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + struct xfs_attr_sf_entry *next; + struct xfs_ifork *ifp; + unsigned char *end; + int i; + int error = 0; + + ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK); + + bitmap_zero(ab->usedmap, ifp->if_bytes); + sf = (struct xfs_attr_shortform *)rx->sc->ip->i_af.if_u1.if_data; + end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes; + xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(sf->hdr)); + + sfe = &sf->list[0]; + if ((unsigned char *)sfe > end) + return 0; + + for (i = 0; i < sf->hdr.count; i++) { + if (xchk_should_terminate(sc, &error)) + return error; + + next = xfs_attr_sf_nextentry(sfe); + if ((unsigned char *)next > end) + break; + + if (xchk_xattr_set_map(sc, ab->usedmap, + (char *)sfe - (char *)sf, + sizeof(struct xfs_attr_sf_entry))) { + /* + * No conflicts with the sf entry; let's save this + * attribute. + */ + error = xrep_xattr_salvage_sf_attr(rx, sf, sfe); + if (error) + return error; + } + + sfe = next; + } + + return 0; +} + +/* + * Try to return a buffer of xattr data for a given physical extent. + * + * Because the buffer cache get function complains if it finds a buffer + * matching the block number but not matching the length, we must be careful to + * look for incore buffers (up to the maximum length of a remote value) that + * could be hiding anywhere in the physical range. If we find an incore + * buffer, we can pass that to the caller. Optionally, read a single block and + * pass that back. + * + * Note the subtlety that remote attr value blocks for which there is no incore + * buffer will be passed to the callback one block at a time. These buffers + * will not have any ops attached and must be staled to prevent aliasing with + * multiblock buffers once we drop the ILOCK. + */ +STATIC int +xrep_xattr_find_buf( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_extlen_t max_len, + bool can_read, + struct xfs_buf **bpp) +{ + struct xrep_bufscan scan = { + .daddr = XFS_FSB_TO_DADDR(mp, fsbno), + .max_sectors = xrep_bufscan_max_sectors(mp, max_len), + .daddr_step = XFS_FSB_TO_BB(mp, 1), + }; + struct xfs_buf *bp; + + while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) { + *bpp = bp; + return 0; + } + + if (!can_read) { + *bpp = NULL; + return 0; + } + + return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1), + XBF_TRYLOCK, bpp, NULL); +} + +/* + * Deal with a buffer that we found during our walk of the attr fork. + * + * Attribute leaf and node blocks are simple -- they're a single block, so we + * can walk them one at a time and we never have to worry about discontiguous + * multiblock buffers like we do for directories. + * + * Unfortunately, remote attr blocks add a lot of complexity here. Each disk + * block is totally self contained, in the sense that the v5 header provides no + * indication that there could be more data in the next block. The incore + * buffers can span multiple blocks, though they never cross extent records. + * However, they don't necessarily start or end on an extent record boundary. + * Therefore, we need a special buffer find function to walk the buffer cache + * for us. + * + * The caller must hold the ILOCK on the file being repaired. We use + * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't + * own the block and don't want to hang the system on a potentially garbage + * buffer. + */ +STATIC int +xrep_xattr_recover_block( + struct xrep_xattr *rx, + xfs_dablk_t dabno, + xfs_fsblock_t fsbno, + xfs_extlen_t max_len, + xfs_extlen_t *actual_len) +{ + struct xfs_da_blkinfo *info; + struct xfs_buf *bp; + int error; + + error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp); + if (error) + return error; + info = bp->b_addr; + *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length); + + trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno, + be16_to_cpu(info->magic)); + + /* + * If the buffer has the right magic number for an attr leaf block and + * passes a structure check (we don't care about checksums), salvage + * as much as we can from the block. */ + if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) && + xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) && + xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL) + error = xrep_xattr_recover_leaf(rx, bp); + + /* + * If the buffer didn't already have buffer ops set, it was read in by + * the _find_buf function and could very well be /part/ of a multiblock + * remote block. Mark it stale so that it doesn't hang around in + * memory to cause problems. + */ + if (bp->b_ops == NULL) + xfs_buf_stale(bp); + + xfs_buf_relse(bp); + return error; +} + +/* Insert one xattr key/value. */ +STATIC int +xrep_xattr_insert_rec( + struct xrep_xattr *rx, + const struct xrep_xattr_key *key) +{ + struct xfs_da_args args = { + .dp = rx->sc->tempip, + .attr_filter = key->flags, + .attr_flags = XATTR_CREATE, + .namelen = key->namelen, + .valuelen = key->valuelen, + .op_flags = XFS_DA_OP_NOTIME, + .owner = rx->sc->ip->i_ino, + }; + struct xchk_xattr_buf *ab = rx->sc->buf; + int error; + + /* + * Grab pointers to the scrub buffer so that we can use them to insert + * attrs into the temp file. + */ + args.name = ab->name; + args.value = ab->value; + + /* + * The attribute name is stored near the end of the in-core buffer, + * though we reserve one more byte to ensure null termination. + */ + ab->name[XATTR_NAME_MAX] = 0; + + error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name, + key->namelen); + if (error) + return error; + + error = xfblob_free(rx->xattr_blobs, key->name_cookie); + if (error) + return error; + + error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value, + key->valuelen); + if (error) + return error; + + error = xfblob_free(rx->xattr_blobs, key->value_cookie); + if (error) + return error; + + ab->name[key->namelen] = 0; + + trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name, + key->namelen, key->valuelen); + + /* + * xfs_attr_set creates and commits its own transaction. If the attr + * already exists, we'll just drop it during the rebuild. + */ + error = xfs_attr_set(&args); + if (error == -EEXIST) + error = 0; + + return error; +} + +/* + * Periodically flush salvaged attributes to the temporary file. This is done + * to reduce the memory requirements of the xattr rebuild because files can + * contain millions of attributes. + */ +STATIC int +xrep_xattr_flush_stashed( + struct xrep_xattr *rx) +{ + xfarray_idx_t array_cur; + int error; + + /* + * Entering this function, the scrub context has a reference to the + * inode being repaired, the temporary file, and a scrub transaction + * that we use during xattr salvaging to avoid livelocking if there + * are cycles in the xattr structures. We hold ILOCK_EXCL on both + * the inode being repaired, though it is not ijoined to the scrub + * transaction. + * + * To constrain kernel memory use, we occasionally flush salvaged + * xattrs from the xfarray and xfblob structures into the temporary + * file in preparation for swapping the xattr structures at the end. + * Updating the temporary file requires a transaction, so we commit the + * scrub transaction and drop the two ILOCKs so that xfs_attr_set can + * allocate whatever transaction it wants. + * + * We still hold IOLOCK_EXCL on the inode being repaired, which + * prevents anyone from modifying the damaged xattr data while we + * repair it. + */ + error = xrep_trans_commit(rx->sc); + if (error) + return error; + xchk_iunlock(rx->sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK of the temporary file while we modify xattrs. This + * isn't strictly required because the temporary file is never revealed + * to userspace, but we follow the same locking rules. We still hold + * sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rx->sc); + if (error) + return error; + + /* Add all the salvaged attrs to the temporary file. */ + foreach_xfarray_idx(rx->xattr_records, array_cur) { + struct xrep_xattr_key key; + + error = xfarray_load(rx->xattr_records, array_cur, &key); + if (error) + return error; + + error = xrep_xattr_insert_rec(rx, &key); + if (error) + return error; + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rx->xattr_records); + xfblob_truncate(rx->xattr_blobs); + + xrep_tempfile_iounlock(rx->sc); + + /* Recreate the salvage transaction and relock the inode. */ + error = xchk_trans_alloc(rx->sc, 0); + if (error) + return error; + xchk_ilock(rx->sc, XFS_ILOCK_EXCL); + return 0; +} + +/* Decide if we've stashed too much xattr data in memory. */ +static inline bool +xrep_xattr_want_flush_stashed( + struct xrep_xattr *rx) +{ + unsigned long long bytes; + + bytes = xfarray_bytes(rx->xattr_records) + + xfblob_bytes(rx->xattr_blobs); + return bytes > XREP_XATTR_MAX_STASH_BYTES; +} + +/* Extract as many attribute keys and values as we can. */ +STATIC int +xrep_xattr_recover( + struct xrep_xattr *rx) +{ + struct xfs_bmbt_irec got; + struct xfs_scrub *sc = rx->sc; + struct xfs_da_geometry *geo = sc->mp->m_attr_geo; + xfs_fileoff_t offset; + xfs_extlen_t len; + xfs_dablk_t dabno; + int nmap; + int error; + + /* + * Iterate each xattr leaf block in the attr fork to scan them for any + * attributes that we might salvage. + */ + for (offset = 0; + offset < XFS_MAX_FILEOFF; + offset = got.br_startoff + got.br_blockcount) { + nmap = 1; + error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset, + &got, &nmap, XFS_BMAPI_ATTRFORK); + if (error) + return error; + if (nmap != 1) + return -EFSCORRUPTED; + if (!xfs_bmap_is_written_extent(&got)) + continue; + + for (dabno = round_up(got.br_startoff, geo->fsbcount); + dabno < got.br_startoff + got.br_blockcount; + dabno += len) { + xfs_fileoff_t curr_offset = dabno - got.br_startoff; + xfs_extlen_t maxlen; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + maxlen = min_t(xfs_filblks_t, INT_MAX, + got.br_blockcount - curr_offset); + error = xrep_xattr_recover_block(rx, dabno, + curr_offset + got.br_startblock, + maxlen, &len); + if (error) + return error; + + if (xrep_xattr_want_flush_stashed(rx)) { + error = xrep_xattr_flush_stashed(rx); + if (error) + return error; + } + } + } + + return 0; +} + +/* + * Reset the extended attribute fork to a state where we can start re-adding + * the salvaged attributes. + */ +STATIC int +xrep_xattr_fork_remove( + struct xfs_scrub *sc, + struct xfs_inode *ip) +{ + struct xfs_attr_sf_hdr *hdr; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); + + /* + * If the data fork is in btree format, we can't change di_forkoff + * because we could run afoul of the rule that the data fork isn't + * supposed to be in btree format if there's enough space in the fork + * that it could have used extents format. Instead, reinitialize the + * attr fork to have a shortform structure with zero attributes. + */ + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) { + ifp->if_format = XFS_DINODE_FMT_LOCAL; + xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes, + XFS_ATTR_FORK); + hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data; + hdr->count = 0; + hdr->totsize = cpu_to_be16(sizeof(*hdr)); + xfs_trans_log_inode(sc->tp, ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + return 0; + } + + /* If we still have attr fork extents, something's wrong. */ + if (ifp->if_nextents != 0) { + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec irec; + unsigned int i = 0; + + xfs_emerg(sc->mp, + "inode 0x%llx attr fork still has %llu attr extents, format %d?!", + ip->i_ino, ifp->if_nextents, ifp->if_format); + for_each_xfs_iext(ifp, &icur, &irec) { + xfs_err(sc->mp, + "[%u]: startoff %llu startblock %llu blockcount %llu state %u", + i++, irec.br_startoff, + irec.br_startblock, irec.br_blockcount, + irec.br_state); + } + ASSERT(0); + return -EFSCORRUPTED; + } + + xfs_attr_fork_remove(ip, sc->tp); + return 0; +} + +/* + * Free all the attribute fork blocks of the file being repaired and delete the + * fork. The caller must ILOCK the scrub file and join it to the transaction. + * This function returns with the inode joined to a clean transaction. + */ +int +xrep_xattr_reset_fork( + struct xfs_scrub *sc) +{ + int error; + + trace_xrep_xattr_reset_fork(sc->ip, sc->ip); + + /* Unmap all the attr blocks. */ + if (xfs_ifork_has_extents(&sc->ip->i_af)) { + error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK); + if (error) + return error; + } + + error = xrep_xattr_fork_remove(sc, sc->ip); + if (error) + return error; + + return xfs_trans_roll_inode(&sc->tp, sc->ip); +} + +/* + * Free all the attribute fork blocks of the temporary file and delete the attr + * fork. The caller must ILOCK the tempfile and join it to the transaction. + * This function returns with the inode joined to a clean scrub transaction. + */ +STATIC int +xrep_xattr_reset_tempfile_fork( + struct xfs_scrub *sc) +{ + int error; + + trace_xrep_xattr_reset_fork(sc->ip, sc->tempip); + + /* + * Wipe out the attr fork of the temp file so that regular inode + * inactivation won't trip over the corrupt attr fork. + */ + if (xfs_ifork_has_extents(&sc->tempip->i_af)) { + error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK); + if (error) + return error; + } + + return xrep_xattr_fork_remove(sc, sc->tempip); +} + +/* + * Find all the extended attributes for this inode by scraping them out of the + * attribute key blocks by hand, and flushing them into the temp file. + * When we're done, free the staging memory before swapping the xattr + * structures to reduce memory usage. + */ +STATIC int +xrep_xattr_salvage_attributes( + struct xrep_xattr *rx) +{ + struct xfs_inode *ip = rx->sc->ip; + int error; + + /* Short format xattrs are easy! */ + if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) { + error = xrep_xattr_recover_sf(rx); + if (error) + return error; + + return xrep_xattr_flush_stashed(rx); + } + + /* + * For non-inline xattr structures, the salvage function scans the + * buffer cache looking for potential attr leaf blocks. The scan + * requires the ability to lock any buffer found and runs independently + * of any transaction <-> buffer item <-> buffer linkage. Therefore, + * roll the transaction to ensure there are no buffers joined. We hold + * the ILOCK independently of the transaction. + */ + error = xfs_trans_roll(&rx->sc->tp); + if (error) + return error; + + error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK); + if (error) + return error; + + error = xrep_xattr_recover(rx); + if (error) + return error; + + return xrep_xattr_flush_stashed(rx); +} + +/* + * Prepare both inodes' attribute forks for extent swapping. Promote the + * tempfile from short format to leaf format, and if the file being repaired + * has a short format attr fork, turn it into an empty extent list. + */ +STATIC int +xrep_xattr_swap_prep( + struct xfs_scrub *sc, + bool temp_local, + bool ip_local) +{ + int error; + + /* + * If the tempfile's attributes are in shortform format, convert that + * to a single leaf extent so that we can use the atomic extent swap. + */ + if (temp_local) { + struct xfs_da_args args = { + .dp = sc->tempip, + .geo = sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .trans = sc->tp, + .total = 1, + .owner = sc->ip->i_ino, + }; + + error = xfs_attr_shortform_to_leaf(&args); + if (error) + return error; + + /* + * Roll the deferred log items to get us back to a clean + * transaction. + */ + error = xfs_defer_finish(&sc->tp); + if (error) + return error; + } + + /* + * If the file being repaired had a shortform attribute fork, convert + * that to an empty extent list in preparation for the atomic extent + * swap. + */ + if (ip_local) { + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); + + xfs_idestroy_fork(ifp); + ifp->if_format = XFS_DINODE_FMT_EXTENTS; + ifp->if_nextents = 0; + ifp->if_bytes = 0; + ifp->if_u1.if_root = NULL; + ifp->if_height = 0; + + xfs_trans_log_inode(sc->tp, sc->ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + } + + return 0; +} + +/* Swap the temporary file's attribute fork with the one being repaired. */ +STATIC int +xrep_xattr_swap( + struct xfs_scrub *sc, + struct xrep_tempswap *tx) +{ + bool ip_local, temp_local; + int error = 0; + + ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL; + temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL; + + /* + * If the both files have a local format attr fork and the rebuilt + * xattr data would fit in the repaired file's attr fork, just copy + * the contents from the tempfile and declare ourselves done. + */ + if (ip_local && temp_local) { + int forkoff; + int newsize; + + newsize = xfs_attr_sf_totsize(sc->tempip); + forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize); + if (forkoff > 0) { + sc->ip->i_forkoff = forkoff; + xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK); + return 0; + } + } + + /* Otherwise, make sure both attr forks are in block-mapping mode. */ + error = xrep_xattr_swap_prep(sc, temp_local, ip_local); + if (error) + return error; + + return xrep_tempswap_contents(sc, tx); +} + +/* + * Swap the new extended attribute data (which we created in the tempfile) into + * the file being repaired. + */ +STATIC int +xrep_xattr_rebuild_tree( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + int error; + + /* + * If we didn't find any attributes to salvage, repair the file by + * zapping its attr fork. + */ + if (rx->attrs_found == 0) { + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + + goto forget_acls; + } + + trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip); + + /* + * Commit the repair transaction and drop the ILOCKs so that we can use + * the atomic extent swap helper functions to compute the correct + * resource reservations. + * + * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr + * modifications, but there's nothing to prevent userspace from reading + * the attributes until we're ready for the swap operation. Reads will + * return -EIO without shutting down the fs, so we're ok with that. + */ + error = xrep_trans_commit(sc); + if (error) + return error; + + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK on the temporary file so that we can run xattr + * operations with the same locks held as we would for a normal file. + * We still hold sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rx->sc); + if (error) + return error; + + /* Allocate swapext transaction and lock both inodes. */ + error = xrep_tempswap_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx); + if (error) + return error; + + /* + * Exchange the blocks mapped by the tempfile's attr fork with the file + * being repaired. The old attr blocks will then be attached to the + * tempfile, so reap its attr fork. + */ + error = xrep_xattr_swap(sc, &rx->tx); + if (error) + return error; + + error = xrep_xattr_reset_tempfile_fork(sc); + if (error) + return error; + + /* + * Roll to get a transaction without any inodes joined to it. Then we + * can drop the tempfile's ILOCK and IOLOCK before doing more work on + * the scrub target file. + */ + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + + xrep_tempfile_iunlock(sc); + xrep_tempfile_iounlock(sc); + +forget_acls: + /* Invalidate cached ACLs now that we've reloaded all the xattrs. */ + xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE); + xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT); + return 0; +} + +/* Tear down all the incore scan stuff we created. */ +STATIC void +xrep_xattr_teardown( + struct xrep_xattr *rx) +{ + xfblob_destroy(rx->xattr_blobs); + xfarray_destroy(rx->xattr_records); + kfree(rx); +} + +/* Set up the filesystem scan so we can regenerate extended attributes. */ +STATIC int +xrep_xattr_setup_scan( + struct xfs_scrub *sc, + struct xrep_xattr **rxp) +{ + struct xrep_xattr *rx; + char *descr; + int max_len; + int error; + + rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS); + if (!rx) + return -ENOMEM; + rx->sc = sc; + + /* + * Allocate enough memory to handle loading local attr values from the + * xfblob data while flushing stashed attrs to the temporary file. + * We only realloc the buffer when salvaging remote attr values. + */ + max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize); + error = xchk_setup_xattr_buf(rx->sc, max_len); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) + goto out_rx; + + /* Set up some staging for salvaged attribute keys and values */ + descr = xchk_xfile_ino_descr(sc, "xattr keys"); + error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), + &rx->xattr_records); + kfree(descr); + if (error) + goto out_rx; + + descr = xchk_xfile_ino_descr(sc, "xattr names"); + error = xfblob_create(descr, &rx->xattr_blobs); + kfree(descr); + if (error) + goto out_keys; + + *rxp = rx; + return 0; +out_keys: + xfarray_destroy(rx->xattr_records); +out_rx: + kfree(rx); + return error; +} + +/* + * Repair the extended attribute metadata. + * + * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer. + * The buffer cache in XFS can't handle aliased multiblock buffers, so this + * might misbehave if the attr fork is crosslinked with other filesystem + * metadata. + */ +int +xrep_xattr( + struct xfs_scrub *sc) +{ + struct xrep_xattr *rx = NULL; + int error; + + if (!xfs_inode_hasattr(sc->ip)) + return -ENOENT; + + /* The rmapbt is required to reap the old attr fork. */ + if (!xfs_has_rmapbt(sc->mp)) + return -EOPNOTSUPP; + + error = xrep_xattr_setup_scan(sc, &rx); + if (error) + return error; + + ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); + + error = xrep_xattr_salvage_attributes(rx); + if (error) + goto out_scan; + + /* Last chance to abort before we start committing fixes. */ + if (xchk_should_terminate(sc, &error)) + goto out_scan; + + error = xrep_xattr_rebuild_tree(rx); + if (error) + goto out_scan; + +out_scan: + xrep_xattr_teardown(rx); + return error; +} diff --git a/fs/xfs/scrub/attr_repair.h b/fs/xfs/scrub/attr_repair.h new file mode 100644 index 0000000000000..0a9ffa7cfa906 --- /dev/null +++ b/fs/xfs/scrub/attr_repair.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_ATTR_REPAIR_H__ +#define __XFS_SCRUB_ATTR_REPAIR_H__ + +int xrep_xattr_reset_fork(struct xfs_scrub *sc); + +#endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 6490f064e091f..83b7aa48dec19 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -32,6 +32,9 @@ #include "xfs_reflink.h" #include "xfs_health.h" #include "xfs_buf_xfile.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -39,6 +42,7 @@ #include "scrub/bitmap.h" #include "scrub/stats.h" #include "scrub/xfile.h" +#include "scrub/attr_repair.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -1136,6 +1140,17 @@ xrep_metadata_inode_forks( return error; } + /* Clear the attr forks since metadata shouldn't have that. */ + if (xfs_inode_hasattr(sc->ip)) { + if (!dirty) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + } + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + } + /* * If we modified the inode, roll the transaction but don't rejoin the * inode to the new transaction because xrep_bmap_data can do that. @@ -1201,3 +1216,34 @@ xrep_trans_cancel_hook_dummy( current->journal_info = *cookiep; *cookiep = NULL; } + +/* + * See if this buffer can pass the given ->verify_struct() function. + * + * If the buffer already has ops attached and they're not the ones that were + * passed in, we reject the buffer. Otherwise, we perform the structure test + * (note that we do not check CRCs) and return the outcome of the test. The + * buffer ops and error state are left unchanged. + */ +bool +xrep_buf_verify_struct( + struct xfs_buf *bp, + const struct xfs_buf_ops *ops) +{ + const struct xfs_buf_ops *old_ops = bp->b_ops; + xfs_failaddr_t fa; + int old_error; + + if (old_ops) { + if (old_ops != ops) + return false; + } + + old_error = bp->b_error; + bp->b_ops = ops; + fa = bp->b_ops->verify_struct(bp); + bp->b_ops = old_ops; + bp->b_error = old_error; + + return fa == NULL; +} diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 06125d0a2c602..2bc5dd18d46f4 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -90,6 +90,7 @@ int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten); int xrep_metadata_inode_forks(struct xfs_scrub *sc); int xrep_setup_ag_rmapbt(struct xfs_scrub *sc); int xrep_setup_ag_refcountbt(struct xfs_scrub *sc); +int xrep_setup_xattr(struct xfs_scrub *sc); /* Repair setup functions */ int xrep_setup_ag_allocbt(struct xfs_scrub *sc); @@ -123,6 +124,7 @@ int xrep_bmap_attr(struct xfs_scrub *sc); int xrep_bmap_cow(struct xfs_scrub *sc); int xrep_nlinks(struct xfs_scrub *sc); int xrep_fscounters(struct xfs_scrub *sc); +int xrep_xattr(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); @@ -147,6 +149,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep, struct xfs_trans **tpp); void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp); +bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops); + #else #define xrep_ino_dqattach(sc) (0) @@ -190,6 +194,7 @@ xrep_setup_nothing( #define xrep_setup_ag_allocbt xrep_setup_nothing #define xrep_setup_ag_rmapbt xrep_setup_nothing #define xrep_setup_ag_refcountbt xrep_setup_nothing +#define xrep_setup_xattr xrep_setup_nothing #define xrep_setup_inode(sc, imap) ((void)0) @@ -215,6 +220,7 @@ xrep_setup_nothing( #define xrep_nlinks xrep_notsupported #define xrep_fscounters xrep_notsupported #define xrep_rtsummary xrep_notsupported +#define xrep_xattr xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 9af91874e58b9..0b8fdd62055fe 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -334,7 +334,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_INODE, .setup = xchk_setup_xattr, .scrub = xchk_xattr, - .repair = xrep_notsupported, + .repair = xrep_xattr, }, [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ .type = ST_INODE, diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 1f06c1ace5902..ba75a56b2e2de 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2497,6 +2497,89 @@ TRACE_EVENT(xreap_bmapi_binval_scan, __entry->scan_blocks) ); +TRACE_EVENT(xrep_xattr_recover_leafblock, + TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic), + TP_ARGS(ip, dabno, magic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_dablk_t, dabno) + __field(uint16_t, magic) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->dabno = dabno; + __entry->magic = magic; + ), + TP_printk("dev %d:%d ino 0x%llx dablk 0x%x magic 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->dabno, + __entry->magic) +); + +DECLARE_EVENT_CLASS(xrep_xattr_salvage_class, + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, + unsigned int namelen, unsigned int valuelen), + TP_ARGS(ip, flags, name, namelen, valuelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, flags) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + __field(unsigned int, valuelen) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->flags = flags; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + __entry->valuelen = valuelen; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR), + __entry->namelen, + __get_str(name), + __entry->valuelen) +); +#define DEFINE_XREP_XATTR_SALVAGE_EVENT(name) \ +DEFINE_EVENT(xrep_xattr_salvage_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, \ + unsigned int namelen, unsigned int valuelen), \ + TP_ARGS(ip, flags, name, namelen, valuelen)) +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec); + +TRACE_EVENT(xrep_xattr_class, + TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), + TP_ARGS(ip, arg_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, src_ino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->src_ino = arg_ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx src 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->src_ino) +) +#define DEFINE_XREP_XATTR_EVENT(name) \ +DEFINE_EVENT(xrep_xattr_class, name, \ + TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), \ + TP_ARGS(ip, arg_ip)) +DEFINE_XREP_XATTR_EVENT(xrep_xattr_rebuild_tree); +DEFINE_XREP_XATTR_EVENT(xrep_xattr_reset_fork); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c index f0f532c10a5ac..1bd690ac5d368 100644 --- a/fs/xfs/scrub/xfarray.c +++ b/fs/xfs/scrub/xfarray.c @@ -1081,3 +1081,20 @@ xfarray_sort( kvfree(si); return error; } + +/* How many bytes is this array consuming? */ +unsigned long long +xfarray_bytes( + struct xfarray *array) +{ + return xfile_bytes(array->xfile); +} + +/* Empty the entire array. */ +void +xfarray_truncate( + struct xfarray *array) +{ + xfile_discard(array->xfile, 0, MAX_LFS_FILESIZE); + array->nr = 0; +} diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h index 0f1dac3aa1916..f06af7eb484ec 100644 --- a/fs/xfs/scrub/xfarray.h +++ b/fs/xfs/scrub/xfarray.h @@ -44,6 +44,8 @@ int xfarray_unset(struct xfarray *array, xfarray_idx_t idx); int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr); int xfarray_store_anywhere(struct xfarray *array, const void *ptr); bool xfarray_element_is_null(struct xfarray *array, const void *ptr); +void xfarray_truncate(struct xfarray *array); +unsigned long long xfarray_bytes(struct xfarray *array); /* * Load an array element, but zero the buffer if there's no data because we diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c index 216f9cb2965a7..9c9ec90c69dbc 100644 --- a/fs/xfs/scrub/xfblob.c +++ b/fs/xfs/scrub/xfblob.c @@ -149,3 +149,20 @@ xfblob_free( xfile_discard(blob->xfile, cookie, sizeof(key) + key.xb_size); return 0; } + +/* How many bytes is this blob storage object consuming? */ +unsigned long long +xfblob_bytes( + struct xfblob *blob) +{ + return xfile_bytes(blob->xfile); +} + +/* Drop all the blobs. */ +void +xfblob_truncate( + struct xfblob *blob) +{ + xfile_discard(blob->xfile, PAGE_SIZE, MAX_LFS_FILESIZE - PAGE_SIZE); + blob->last_offset = PAGE_SIZE; +} diff --git a/fs/xfs/scrub/xfblob.h b/fs/xfs/scrub/xfblob.h index bd98647407f1d..78a67a06408f8 100644 --- a/fs/xfs/scrub/xfblob.h +++ b/fs/xfs/scrub/xfblob.h @@ -20,5 +20,7 @@ int xfblob_load(struct xfblob *blob, xfblob_cookie cookie, void *ptr, int xfblob_store(struct xfblob *blob, xfblob_cookie *cookie, const void *ptr, uint32_t size); int xfblob_free(struct xfblob *blob, xfblob_cookie cookie); +unsigned long long xfblob_bytes(struct xfblob *blob); +void xfblob_truncate(struct xfblob *blob); #endif /* __XFS_SCRUB_XFBLOB_H__ */ diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index 36061af2c1352..849f59da6a184 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -86,6 +86,18 @@ static inline loff_t xfile_size(struct xfile *xf) return i_size_read(file_inode(xf->file)); } +static inline unsigned long long xfile_bytes(struct xfile *xf) +{ + struct xfile_stat xs; + int ret; + + ret = xfile_stat(xf, &xs); + if (ret) + return 0; + + return xs.bytes; +} + /* file block (aka system page size) to basic block conversions. */ typedef unsigned long long xfileoff_t; #define XFB_BLOCKSIZE (PAGE_SIZE) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a61ad61cb9136..b62518968e784 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -481,6 +481,9 @@ _xfs_buf_obj_cmp( * it stale has not yet committed. i.e. we are * reallocating a busy extent. Skip this buffer and * continue searching for an exact match. + * + * Note: If we're scanning for incore buffers to stale, don't + * complain if we find non-stale buffers. */ if (!(map->bm_flags & XBM_LIVESCAN)) ASSERT(bp->b_flags & XBF_STALE); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ee6f569c2f3d9..2c838b7471191 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -31,6 +31,8 @@ * pos: file offset, in bytes * bytecount: number of bytes * + * dablk: directory or xattr block offset, in filesystem blocks + * * disize: ondisk file size, in bytes * isize: incore file size, in bytes * From patchwork Sun Dec 31 20:35:58 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507441 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 48BA1BA2B for ; Sun, 31 Dec 2023 20:35:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="o7QDXkXL" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 15A60C433C7; Sun, 31 Dec 2023 20:35:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704054959; bh=AIweQzI7PaaFXT46tlCFXTl4wegxqlKajV19A7st1Sc=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=o7QDXkXL6vXQJUNffbG+otr4wTaL2a5S2+fsQt1F//cDJqtvsGMuDXlGeo06Q9D45 AAlEGD8SWtJ33JZKBNlP8V4WicQxXpSD9JECIUWanO3AFPxIfgVZZ6sfi2sm80aX9M qgvrdsTFih1RTXuJlJXhf0j3lMOGMm77+YKhqvtyjuZHLQ8+qXC6+TOmSFzwMHIC1O 52acUSTiwaaDqVwCmjkVz+b0juuiWkhODXtvfY4bz/7pFwskCnnVnKZ4VqDDOlQoph J237TDlYQ02q7svFR0D9O860TpLhTQGS92cHTbkpejzfU6hd0uo8HMxne3dKZQfMXH zItPZoUDSb9UQ== Date: Sun, 31 Dec 2023 12:35:58 -0800 Subject: [PATCH 4/6] xfs: scrub should set preen if attr leaf has holes From: "Darrick J. Wong" To: djwong@kernel.org Cc: Dave Chinner , linux-xfs@vger.kernel.org Message-ID: <170404835280.1753315.9611243551728505316.stgit@frogsfrogsfrogs> In-Reply-To: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> References: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong If an attr block indicates that it could use compaction, set the preen flag to have the attr fork rebuilt, since the attr fork rebuilder can take care of that for us. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/attr.c | 2 ++ fs/xfs/scrub/dabtree.c | 16 ++++++++++++++++ fs/xfs/scrub/dabtree.h | 1 + fs/xfs/scrub/trace.h | 1 + 4 files changed, 20 insertions(+) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 692e1b2837bbb..7dccbc849b19b 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -428,6 +428,8 @@ xchk_xattr_block( xchk_da_set_corrupt(ds, level); if (!xchk_xattr_set_map(ds->sc, ab->usedmap, 0, hdrsize)) xchk_da_set_corrupt(ds, level); + if (leafhdr.holes) + xchk_da_set_preen(ds, level); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index c71254088dffe..056de4819f866 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -78,6 +78,22 @@ xchk_da_set_corrupt( __return_address); } +/* Flag a da btree node in need of optimization. */ +void +xchk_da_set_preen( + struct xchk_da_btree *ds, + int level) +{ + struct xfs_scrub *sc = ds->sc; + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; + trace_xchk_fblock_preen(sc, ds->dargs.whichfork, + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + __return_address); +} + +/* Find an entry at a certain level in a da btree. */ static struct xfs_da_node_entry * xchk_da_btree_node_entry( struct xchk_da_btree *ds, diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h index 4f8c2138a1ec6..d654c125feb4d 100644 --- a/fs/xfs/scrub/dabtree.h +++ b/fs/xfs/scrub/dabtree.h @@ -35,6 +35,7 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error); /* Check for da btree corruption. */ void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level); +void xchk_da_set_preen(struct xchk_da_btree *ds, int level); int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp); int xchk_da_btree(struct xfs_scrub *sc, int whichfork, diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index ba75a56b2e2de..87a68aee16bf5 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -367,6 +367,7 @@ DEFINE_EVENT(xchk_fblock_error_class, name, \ DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error); DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning); +DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_preen); #ifdef CONFIG_XFS_QUOTA DECLARE_EVENT_CLASS(xchk_dqiter_class, From patchwork Sun Dec 31 20:36:14 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507442 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4DAA7BA2B for ; Sun, 31 Dec 2023 20:36:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="F0XvZpRQ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BCF7DC433C7; Sun, 31 Dec 2023 20:36:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704054974; bh=FxJ1I10REOdyl51tTkwuLVKMcd7d4WilvrDcrazj0tQ=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=F0XvZpRQOD7kcj4QTAhPJKYEPrf0t054MA+p4G1Nuuf/qW2WB8MI8MRs56UgMJuSJ 9k8Jmwqx68OtwDHzl4atkr0tghdTpXP6fzoUUapdCFgqrM0buzVFKiW13+FgeDISf2 DPr485U4fFqPvy82dyx4h+FV2D8jMjt4qI+LpmIGg6he3vW3wim7uJEOGxD4cFbtl4 h/1iLKRS8qqChiadd+3hgobr3+1e+mtm2dGsEz18ZYLZlHINexthQOclr+jxCx+pw3 zybbAtHaZiE6nimyyWurLD8PFZuXKvR/CNFuXU/CF/19wL0FOE/rsOuDNxZuqFSsau N1yUUb8Llm1tw== Date: Sun, 31 Dec 2023 12:36:14 -0800 Subject: [PATCH 5/6] xfs: flag empty xattr leaf blocks for optimization From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404835296.1753315.1515475809477956883.stgit@frogsfrogsfrogs> In-Reply-To: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> References: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Empty xattr leaf blocks at offset zero are a waste of space but otherwise harmless. If we encounter one, flag it as an opportunity for optimization. If we encounter empty attr leaf blocks anywhere else in the attr fork, that's corruption. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/attr.c | 11 +++++++++++ fs/xfs/scrub/dabtree.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 7dccbc849b19b..07e8ca840745c 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -420,6 +420,17 @@ xchk_xattr_block( xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); hdrsize = xfs_attr3_leaf_hdr_size(leaf); + /* + * Empty xattr leaf blocks mapped at block 0 are probably a byproduct + * of a race between setxattr and a log shutdown. Anywhere else in the + * attr fork is a corruption. + */ + if (leafhdr.count == 0) { + if (blk->blkno == 0) + xchk_da_set_preen(ds, level); + else + xchk_da_set_corrupt(ds, level); + } if (leafhdr.usedbytes > mp->m_attr_geo->blksize) xchk_da_set_corrupt(ds, level); if (leafhdr.firstused > mp->m_attr_geo->blksize) diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h index d654c125feb4d..de291e3b77dd8 100644 --- a/fs/xfs/scrub/dabtree.h +++ b/fs/xfs/scrub/dabtree.h @@ -37,6 +37,8 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error); void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level); void xchk_da_set_preen(struct xchk_da_btree *ds, int level); +void xchk_da_set_preen(struct xchk_da_btree *ds, int level); + int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp); int xchk_da_btree(struct xfs_scrub *sc, int whichfork, xchk_da_btree_rec_fn scrub_fn, void *private); From patchwork Sun Dec 31 20:36:29 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13507443 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A1E4FBA2B for ; Sun, 31 Dec 2023 20:36:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="nhjivVEj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 73073C433C8; Sun, 31 Dec 2023 20:36:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1704054990; bh=d01V8c7PYyupAw8cML2w+GmsuWwXcbdfVJ7xGBw0naY=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=nhjivVEjhszCrUNjIJxu5tXR2BMmpGoVFkwCASfLuYvzVRGPaAjYqAECDFTrvl7Fg ftxeA8Ay45pxbcNHCsLdAGlUxKo4xutqbhnfAWx0I01ddSWwI6w9RBS2WLs3ulduE+ Ay00YI0tqBlTeSUgQ4MYQL87sjQgE82Fmb7WFTNq9sZIVXESY17nDlB2Oxchwsd4Hb hsfQTqHt+6wPNScvCHZI5+F3eyi0AAxjvYHgdKm7/Ej/z/2KNET43/1ucFm+Zy5Ned ZyJ/RyiuEh53kw43FEfSeph3YaDT7EmBrhiuarlNoYS55VSSfzHGhJeBskgoOHBDLI exdUwI9hMcPyg== Date: Sun, 31 Dec 2023 12:36:29 -0800 Subject: [PATCH 6/6] xfs: create an xattr iteration function for scrub From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org Message-ID: <170404835312.1753315.14584428165250628339.stgit@frogsfrogsfrogs> In-Reply-To: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> References: <170404835198.1753315.999170762222938046.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Darrick J. Wong Create a streamlined function to walk a file's xattrs, without all the cursor management stuff in the regular listxattr. Signed-off-by: Darrick J. Wong --- fs/xfs/Makefile | 1 fs/xfs/scrub/attr.c | 125 +++++++----------- fs/xfs/scrub/dab_bitmap.h | 37 +++++ fs/xfs/scrub/listxattr.c | 310 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/listxattr.h | 17 ++ 5 files changed, 412 insertions(+), 78 deletions(-) create mode 100644 fs/xfs/scrub/dab_bitmap.h create mode 100644 fs/xfs/scrub/listxattr.c create mode 100644 fs/xfs/scrub/listxattr.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 9b227be3e28b6..be3dceb85e0f9 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -165,6 +165,7 @@ xfs-y += $(addprefix scrub/, \ ialloc.o \ inode.o \ iscan.o \ + listxattr.o \ nlinks.o \ parent.o \ readdir.o \ diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 07e8ca840745c..ff83051c79818 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -21,6 +21,7 @@ #include "scrub/common.h" #include "scrub/dabtree.h" #include "scrub/attr.h" +#include "scrub/listxattr.h" #include "scrub/repair.h" /* Free the buffers linked from the xattr buffer. */ @@ -153,90 +154,81 @@ xchk_setup_xattr( /* Extended Attributes */ -struct xchk_xattr { - struct xfs_attr_list_context context; - struct xfs_scrub *sc; -}; - /* * Check that an extended attribute key can be looked up by hash. * - * We use the XFS attribute list iterator (i.e. xfs_attr_list_ilocked) - * to call this function for every attribute key in an inode. Once - * we're here, we load the attribute value to see if any errors happen, - * or if we get more or less data than we expected. + * We use the extended attribute walk helper to call this function for every + * attribute key in an inode. Once we're here, we load the attribute value to + * see if any errors happen, or if we get more or less data than we expected. */ -static void -xchk_xattr_listent( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - int valuelen) +static int +xchk_xattr_actor( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) { struct xfs_da_args args = { .op_flags = XFS_DA_OP_NOTIME, - .attr_filter = flags & XFS_ATTR_NSP_ONDISK_MASK, - .geo = context->dp->i_mount->m_attr_geo, + .attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, + .geo = sc->mp->m_attr_geo, .whichfork = XFS_ATTR_FORK, - .dp = context->dp, + .dp = ip, .name = name, .namelen = namelen, .hashval = xfs_da_hashname(name, namelen), - .trans = context->tp, + .trans = sc->tp, .valuelen = valuelen, - .owner = context->dp->i_ino, + .owner = ip->i_ino, }; struct xchk_xattr_buf *ab; - struct xchk_xattr *sx; int error = 0; - sx = container_of(context, struct xchk_xattr, context); - ab = sx->sc->buf; + ab = sc->buf; - if (xchk_should_terminate(sx->sc, &error)) { - context->seen_enough = error; - return; - } + if (xchk_should_terminate(sc, &error)) + return error; - if (flags & XFS_ATTR_INCOMPLETE) { + if (attr_flags & XFS_ATTR_INCOMPLETE) { /* Incomplete attr key, just mark the inode for preening. */ - xchk_ino_set_preen(sx->sc, context->dp->i_ino); - return; + xchk_ino_set_preen(sc, ip->i_ino); + return 0; } /* Only one namespace bit allowed. */ - if (hweight32(flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) { - xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); - goto fail_xref; + if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); + return -ECANCELED; } /* Does this name make sense? */ if (!xfs_attr_namecheck(name, namelen)) { - xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); - goto fail_xref; + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); + return -ECANCELED; } /* - * Local xattr values are stored in the attr leaf block, so we don't - * need to retrieve the value from a remote block to detect corruption - * problems. + * Local and shortform xattr values are stored in the attr leaf block, + * so we don't need to retrieve the value from a remote block to detect + * corruption problems. */ - if (flags & XFS_ATTR_LOCAL) - goto fail_xref; + if (value) + return 0; /* - * Try to allocate enough memory to extrat the attr value. If that - * doesn't work, we overload the seen_enough variable to convey - * the error message back to the main scrub function. + * Try to allocate enough memory to extract the attr value. If that + * doesn't work, return -EDEADLOCK as a signal to try again with a + * maximally sized buffer. */ - error = xchk_setup_xattr_buf(sx->sc, valuelen); + error = xchk_setup_xattr_buf(sc, valuelen); if (error == -ENOMEM) error = -EDEADLOCK; - if (error) { - context->seen_enough = error; - return; - } + if (error) + return error; args.value = ab->value; @@ -244,16 +236,13 @@ xchk_xattr_listent( /* ENODATA means the hash lookup failed and the attr is bad */ if (error == -ENODATA) error = -EFSCORRUPTED; - if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, + if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, args.blkno, &error)) - goto fail_xref; + return error; if (args.valuelen != valuelen) - xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, - args.blkno); -fail_xref: - if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - context->seen_enough = 1; - return; + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); + + return 0; } /* @@ -623,16 +612,6 @@ int xchk_xattr( struct xfs_scrub *sc) { - struct xchk_xattr sx = { - .sc = sc, - .context = { - .dp = sc->ip, - .tp = sc->tp, - .resynch = 1, - .put_listent = xchk_xattr_listent, - .allow_incomplete = true, - }, - }; xfs_dablk_t last_checked = -1U; int error = 0; @@ -661,12 +640,6 @@ xchk_xattr( /* * Look up every xattr in this file by name and hash. * - * Use the backend implementation of xfs_attr_list to call - * xchk_xattr_listent on every attribute key in this inode. - * In other words, we use the same iterator/callback mechanism - * that listattr uses to scrub extended attributes, though in our - * _listent function, we check the value of the attribute. - * * The VFS only locks i_rwsem when modifying attrs, so keep all * three locks held because that's the only way to ensure we're * the only thread poking into the da btree. We traverse the da @@ -674,13 +647,9 @@ xchk_xattr( * iteration, which doesn't really follow the usual buffer * locking order. */ - error = xfs_attr_list_ilocked(&sx.context); + error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL); if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) return error; - /* Did our listent function try to return any errors? */ - if (sx.context.seen_enough < 0) - return sx.context.seen_enough; - return 0; } diff --git a/fs/xfs/scrub/dab_bitmap.h b/fs/xfs/scrub/dab_bitmap.h new file mode 100644 index 0000000000000..0c6e3aad43954 --- /dev/null +++ b/fs/xfs/scrub/dab_bitmap.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_DAB_BITMAP_H__ +#define __XFS_SCRUB_DAB_BITMAP_H__ + +/* Bitmaps, but for type-checked for xfs_dablk_t */ + +struct xdab_bitmap { + struct xbitmap32 dabitmap; +}; + +static inline void xdab_bitmap_init(struct xdab_bitmap *bitmap) +{ + xbitmap32_init(&bitmap->dabitmap); +} + +static inline void xdab_bitmap_destroy(struct xdab_bitmap *bitmap) +{ + xbitmap32_destroy(&bitmap->dabitmap); +} + +static inline int xdab_bitmap_set(struct xdab_bitmap *bitmap, + xfs_dablk_t dabno, xfs_extlen_t len) +{ + return xbitmap32_set(&bitmap->dabitmap, dabno, len); +} + +static inline bool xdab_bitmap_test(struct xdab_bitmap *bitmap, + xfs_dablk_t dabno, xfs_extlen_t *len) +{ + return xbitmap32_test(&bitmap->dabitmap, dabno, len); +} + +#endif /* __XFS_SCRUB_DAB_BITMAP_H__ */ diff --git a/fs/xfs/scrub/listxattr.c b/fs/xfs/scrub/listxattr.c new file mode 100644 index 0000000000000..c8d7d7d723177 --- /dev/null +++ b/fs/xfs/scrub/listxattr.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_sf.h" +#include "xfs_trans.h" +#include "scrub/scrub.h" +#include "scrub/bitmap.h" +#include "scrub/dab_bitmap.h" +#include "scrub/listxattr.h" + +/* Call a function for every entry in a shortform xattr structure. */ +STATIC int +xchk_xattr_walk_sf( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + unsigned int i; + int error; + + sf = (struct xfs_attr_shortform *)ip->i_af.if_u1.if_data; + for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { + error = attr_fn(sc, ip, sfe->flags, sfe->nameval, sfe->namelen, + &sfe->nameval[sfe->namelen], sfe->valuelen, + priv); + if (error) + return error; + + sfe = xfs_attr_sf_nextentry(sfe); + } + + return 0; +} + +/* Call a function for every entry in this xattr leaf block. */ +STATIC int +xchk_xattr_walk_leaf_entries( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + struct xfs_buf *bp, + void *priv) +{ + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_attr_leaf_entry *entry; + unsigned int i; + int error; + + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); + entry = xfs_attr3_leaf_entryp(leaf); + + for (i = 0; i < ichdr.count; entry++, i++) { + void *value; + unsigned char *name; + unsigned int namelen, valuelen; + + if (entry->flags & XFS_ATTR_LOCAL) { + struct xfs_attr_leaf_name_local *name_loc; + + name_loc = xfs_attr3_leaf_name_local(leaf, i); + name = name_loc->nameval; + namelen = name_loc->namelen; + value = &name_loc->nameval[name_loc->namelen]; + valuelen = be16_to_cpu(name_loc->valuelen); + } else { + struct xfs_attr_leaf_name_remote *name_rmt; + + name_rmt = xfs_attr3_leaf_name_remote(leaf, i); + name = name_rmt->name; + namelen = name_rmt->namelen; + value = NULL; + valuelen = be32_to_cpu(name_rmt->valuelen); + } + + error = attr_fn(sc, ip, entry->flags, name, namelen, value, + valuelen, priv); + if (error) + return error; + + } + + return 0; +} + +/* + * Call a function for every entry in a leaf-format xattr structure. Avoid + * memory allocations for the loop detector since there's only one block. + */ +STATIC int +xchk_xattr_walk_leaf( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + struct xfs_buf *leaf_bp; + int error; + + error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino, 0, &leaf_bp); + if (error) + return error; + + error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp, priv); + xfs_trans_brelse(sc->tp, leaf_bp); + return error; +} + +/* Find the leftmost leaf in the xattr dabtree. */ +STATIC int +xchk_xattr_find_leftmost_leaf( + struct xfs_scrub *sc, + struct xfs_inode *ip, + struct xdab_bitmap *seen_dablks, + struct xfs_buf **leaf_bpp) +{ + struct xfs_da3_icnode_hdr nodehdr; + struct xfs_mount *mp = sc->mp; + struct xfs_trans *tp = sc->tp; + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + struct xfs_buf *bp; + xfs_failaddr_t fa; + xfs_dablk_t blkno = 0; + unsigned int expected_level = 0; + int error; + + for (;;) { + xfs_extlen_t len = 1; + uint16_t magic; + + /* Make sure we haven't seen this new block already. */ + if (xdab_bitmap_test(seen_dablks, blkno, &len)) + return -EFSCORRUPTED; + + error = xfs_da3_node_read(tp, ip, blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; + + node = bp->b_addr; + magic = be16_to_cpu(node->hdr.info.magic); + if (magic == XFS_ATTR_LEAF_MAGIC || + magic == XFS_ATTR3_LEAF_MAGIC) + break; + + error = -EFSCORRUPTED; + if (magic != XFS_DA_NODE_MAGIC && + magic != XFS_DA3_NODE_MAGIC) + goto out_buf; + + fa = xfs_da3_node_header_check(bp, ip->i_ino); + if (fa) + goto out_buf; + + xfs_da3_node_hdr_from_disk(mp, &nodehdr, node); + + if (nodehdr.count == 0 || nodehdr.level >= XFS_DA_NODE_MAXDEPTH) + goto out_buf; + + /* Check the level from the root node. */ + if (blkno == 0) + expected_level = nodehdr.level - 1; + else if (expected_level != nodehdr.level) + goto out_buf; + else + expected_level--; + + /* Remember that we've seen this node. */ + error = xdab_bitmap_set(seen_dablks, blkno, 1); + if (error) + goto out_buf; + + /* Find the next level towards the leaves of the dabtree. */ + btree = nodehdr.btree; + blkno = be32_to_cpu(btree->before); + xfs_trans_brelse(tp, bp); + } + + error = -EFSCORRUPTED; + fa = xfs_attr3_leaf_header_check(bp, ip->i_ino); + if (fa) + goto out_buf; + + if (expected_level != 0) + goto out_buf; + + /* Remember that we've seen this leaf. */ + error = xdab_bitmap_set(seen_dablks, blkno, 1); + if (error) + goto out_buf; + + *leaf_bpp = bp; + return 0; + +out_buf: + xfs_trans_brelse(tp, bp); + return error; +} + +/* Call a function for every entry in a node-format xattr structure. */ +STATIC int +xchk_xattr_walk_node( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xdab_bitmap seen_dablks; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf; + struct xfs_buf *leaf_bp; + int error; + + xdab_bitmap_init(&seen_dablks); + + error = xchk_xattr_find_leftmost_leaf(sc, ip, &seen_dablks, &leaf_bp); + if (error) + goto out_bitmap; + + for (;;) { + xfs_extlen_t len; + + error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp, + priv); + if (error) + goto out_leaf; + + /* Find the right sibling of this leaf block. */ + leaf = leaf_bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + if (leafhdr.forw == 0) + goto out_leaf; + + xfs_trans_brelse(sc->tp, leaf_bp); + + /* Make sure we haven't seen this new leaf already. */ + len = 1; + if (xdab_bitmap_test(&seen_dablks, leafhdr.forw, &len)) + goto out_bitmap; + + error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino, + leafhdr.forw, &leaf_bp); + if (error) + goto out_bitmap; + + /* Remember that we've seen this new leaf. */ + error = xdab_bitmap_set(&seen_dablks, leafhdr.forw, 1); + if (error) + goto out_leaf; + } + +out_leaf: + xfs_trans_brelse(sc->tp, leaf_bp); +out_bitmap: + xdab_bitmap_destroy(&seen_dablks); + return error; +} + +/* + * Call a function for every extended attribute in a file. + * + * Callers must hold the ILOCK. No validation or cursor restarts allowed. + * Returns -EFSCORRUPTED on any problem, including loops in the dabtree. + */ +int +xchk_xattr_walk( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + + if (!xfs_inode_hasattr(ip)) + return 0; + + if (ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) + return xchk_xattr_walk_sf(sc, ip, attr_fn, priv); + + /* attr functions require that the attr fork is loaded */ + error = xfs_iread_extents(sc->tp, ip, XFS_ATTR_FORK); + if (error) + return error; + + if (xfs_attr_is_leaf(ip)) + return xchk_xattr_walk_leaf(sc, ip, attr_fn, priv); + + return xchk_xattr_walk_node(sc, ip, attr_fn, priv); +} diff --git a/fs/xfs/scrub/listxattr.h b/fs/xfs/scrub/listxattr.h new file mode 100644 index 0000000000000..48fe89d05946b --- /dev/null +++ b/fs/xfs/scrub/listxattr.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_LISTXATTR_H__ +#define __XFS_SCRUB_LISTXATTR_H__ + +typedef int (*xchk_xattr_fn)(struct xfs_scrub *sc, struct xfs_inode *ip, + unsigned int attr_flags, const unsigned char *name, + unsigned int namelen, const void *value, unsigned int valuelen, + void *priv); + +int xchk_xattr_walk(struct xfs_scrub *sc, struct xfs_inode *ip, + xchk_xattr_fn attr_fn, void *priv); + +#endif /* __XFS_SCRUB_LISTXATTR_H__ */