From patchwork Fri May 26 02:14:50 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256274 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DFA72C77B7A for ; Fri, 26 May 2023 02:14:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229711AbjEZCO4 (ORCPT ); Thu, 25 May 2023 22:14:56 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48792 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233494AbjEZCOx (ORCPT ); Thu, 25 May 2023 22:14:53 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 92D1E13A for ; Thu, 25 May 2023 19:14:51 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 2EE496122B for ; Fri, 26 May 2023 02:14:51 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 90193C433D2; Fri, 26 May 2023 02:14:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067290; bh=SK1ENvKfdlRpEC8dzixDnI97n4fEsNiwD6QNCDOEI7I=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=TVz313Zvcp1KpSjxB7wYLCWl3+QqXwew04oR3nlGOmkvWMuaBw7rfeeMhOyuEsLmd 1vIFSHiB9WN/bcWjyGtkx8AoQJs+EziIIsmcH8dO/wu0Rl7jgNOh/VPSqgXBZvnZib dTxZUlq96gPc8MZc1QaSuuofcZ1P8a2JGeB9AW0bg/m488DUX1r8EdOLKaD3nLYf/h clgkd5sIom+JyZZB65fnpJSc3iUg4ka7ky/GUi6ij0TEnfPfIrPoI34zHrpt2p7gXw oub21r8sU9E/i0WrUpg6BYS0vij7qfUWqh8rT03wNpLfnM0TJ1elaDbksXv0qHj3ZK FT03h68HfKweg== Date: Thu, 25 May 2023 19:14:50 -0700 Subject: [PATCH 01/17] xfs: check dirents have parent pointers From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073310.3745075.9633704251345668611.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong If the fs has parent pointers, we need to check that each child dirent points to a file that has a parent pointer pointing back at us. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_parent.c | 42 ++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 10 ++++ fs/xfs/scrub/dir.c | 117 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index c8ff6316c59b..a0ffff5db76d 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -322,3 +322,45 @@ xfs_parent_irec_hashname( irec->p_namehash = xfs_dir2_hashname(mp, &dname); } + +static inline void +xfs_parent_scratch_init( + struct xfs_trans *tp, + struct xfs_inode *ip, + const struct xfs_parent_name_irec *pptr, + struct xfs_parent_scratch *scr) +{ + memset(&scr->args, 0, sizeof(struct xfs_da_args)); + scr->args.attr_filter = XFS_ATTR_PARENT; + scr->args.dp = ip; + scr->args.geo = ip->i_mount->m_attr_geo; + scr->args.name = (const unsigned char *)&scr->rec; + scr->args.namelen = sizeof(struct xfs_parent_name_rec); + scr->args.op_flags = XFS_DA_OP_NVLOOKUP; + scr->args.trans = tp; + scr->args.value = (void *)pptr->p_name; + scr->args.valuelen = pptr->p_namelen; + scr->args.whichfork = XFS_ATTR_FORK; + scr->args.hashval = xfs_da_hashname((const void *)&scr->rec, + sizeof(struct xfs_parent_name_rec)); +} + +/* + * Look up the @name associated with the parent pointer (@pptr) of @ip. + * Caller must hold at least ILOCK_SHARED. Returns 0 if the pointer is found, + * -ENOATTR if there is no match, or a negative errno. The scratchpad need not + * be initialized. + */ +int +xfs_parent_lookup( + struct xfs_trans *tp, + struct xfs_inode *ip, + const struct xfs_parent_name_irec *pptr, + struct xfs_parent_scratch *scr) +{ + xfs_parent_irec_to_disk(&scr->rec, pptr); + xfs_parent_scratch_init(tp, ip, pptr, scr); + scr->args.op_flags |= XFS_DA_OP_OKNOENT; + + return xfs_attr_get_ilocked(&scr->args); +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 0f4808990ce6..25bbb62fce5f 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -98,4 +98,14 @@ void xfs_parent_irec_to_disk(struct xfs_parent_name_rec *rec, void xfs_parent_irec_hashname(struct xfs_mount *mp, struct xfs_parent_name_irec *irec); +/* Scratchpad memory so that raw parent operations don't burn stack space. */ +struct xfs_parent_scratch { + struct xfs_parent_name_rec rec; + struct xfs_da_args args; +}; + +int xfs_parent_lookup(struct xfs_trans *tp, struct xfs_inode *ip, + const struct xfs_parent_name_irec *pptr, + struct xfs_parent_scratch *scratch); + #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index 7bcac0b0ed6e..a0e16ab3419a 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -15,6 +15,8 @@ #include "xfs_icache.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" +#include "xfs_attr.h" +#include "xfs_parent.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/dabtree.h" @@ -39,6 +41,20 @@ xchk_setup_directory( /* Directories */ +struct xchk_dir { + struct xfs_scrub *sc; + + /* Scratch buffer for scanning pptr xattrs */ + struct xfs_parent_name_irec pptr; + + /* xattr key and da args for parent pointer validation. */ + struct xfs_parent_scratch pptr_scratch; + + /* Name buffer for dirent revalidation. */ + uint8_t namebuf[MAXNAMELEN]; + +}; + /* Scrub a directory entry. */ /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */ @@ -61,6 +77,88 @@ xchk_dir_check_ftype( xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } +/* + * Try to lock a child file for checking parent pointers. Returns the inode + * flags for the locks we now hold, or zero if we failed. + */ +STATIC unsigned int +xchk_dir_lock_child( + struct xfs_scrub *sc, + struct xfs_inode *ip) +{ + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) + return 0; + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return 0; + } + + if (!xfs_inode_has_attr_fork(ip) || !xfs_need_iread_extents(&ip->i_af)) + return XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return 0; + } + + return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL; +} + +/* Check the backwards link (parent pointer) associated with this dirent. */ +STATIC int +xchk_dir_parent_pointer( + struct xchk_dir *sd, + const struct xfs_name *name, + struct xfs_inode *ip) +{ + struct xfs_scrub *sc = sd->sc; + int error; + + sd->pptr.p_ino = sc->ip->i_ino; + sd->pptr.p_gen = VFS_I(sc->ip)->i_generation; + sd->pptr.p_namelen = name->len; + memcpy(sd->pptr.p_name, name->name, name->len); + xfs_parent_irec_hashname(sc->mp, &sd->pptr); + + error = xfs_parent_lookup(sc->tp, ip, &sd->pptr, &sd->pptr_scratch); + if (error == -ENOATTR) + xchk_fblock_xref_set_corrupt(sc, XFS_DATA_FORK, 0); + + return 0; +} + +/* Look for a parent pointer matching this dirent, if the child isn't busy. */ +STATIC int +xchk_dir_check_pptr_fast( + struct xchk_dir *sd, + xfs_dir2_dataptr_t dapos, + const struct xfs_name *name, + struct xfs_inode *ip) +{ + struct xfs_scrub *sc = sd->sc; + unsigned int lockmode; + int error; + + /* dot and dotdot entries do not have parent pointers */ + if (xfs_dir2_samename(name, &xfs_name_dot) || + xfs_dir2_samename(name, &xfs_name_dotdot)) + return 0; + + /* Try to lock the inode. */ + lockmode = xchk_dir_lock_child(sc, ip); + if (!lockmode) { + xchk_set_incomplete(sc); + return -ECANCELED; + } + + error = xchk_dir_parent_pointer(sd, name, ip); + xfs_iunlock(ip, lockmode); + return error; +} + /* * Scrub a single directory entry. * @@ -78,6 +176,7 @@ xchk_dir_actor( { struct xfs_mount *mp = dp->i_mount; struct xfs_inode *ip; + struct xchk_dir *sd = priv; xfs_ino_t lookup_ino; xfs_dablk_t offset; int error = 0; @@ -144,6 +243,14 @@ xchk_dir_actor( goto out; xchk_dir_check_ftype(sc, offset, ip, name->type); + + if (xfs_has_parent(mp)) { + error = xchk_dir_check_pptr_fast(sd, dapos, name, ip); + if (error) + goto out_rele; + } + +out_rele: xchk_irele(sc, ip); out: if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) @@ -765,6 +872,7 @@ int xchk_directory( struct xfs_scrub *sc) { + struct xchk_dir *sd; int error; if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) @@ -792,10 +900,17 @@ xchk_directory( if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return 0; + sd = kvzalloc(sizeof(struct xchk_dir), XCHK_GFP_FLAGS); + if (!sd) + return -ENOMEM; + sd->sc = sc; + /* Look up every name in this directory by hash. */ - error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, NULL); + error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd); if (error == -ECANCELED) error = 0; + + kvfree(sd); return error; } From patchwork Fri May 26 02:15:05 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256275 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 89522C77B7A for ; Fri, 26 May 2023 02:15:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229631AbjEZCPJ (ORCPT ); Thu, 25 May 2023 22:15:09 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48846 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229567AbjEZCPI (ORCPT ); Thu, 25 May 2023 22:15:08 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2A9DA135 for ; Thu, 25 May 2023 19:15:07 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id BA524614A2 for ; Fri, 26 May 2023 02:15:06 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 22CFFC4339B; Fri, 26 May 2023 02:15:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067306; bh=/JXNaE8t4FCqldPDakHp3nlXrZZ8U72FV9MqADCxOok=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=ZuiIf5EF1KKUIJxhKeEkr6s8f/y56R/tCNmcXyMpxj84bX+M44rD3R81YN2cC/Fhw vc4ArGh4lelVQTDr1nAQwwSbtSU8M8dxkRbZV8ijgz3wPw9XPfILQB40RV+GcrMzB2 MLqbOeuh2HkQg3x2w3cBHDVSsZ5LBqcSVSpYnhdCLKBxZK9P8/omA67OlfaDBom9t2 yPU5AxSYxbzrbtwwux9abyrIbdZ8vVX29MefMPL+63c1bypO03VAbZ5sSWcV0/fw6d /qR+LbvsgWEXEa+Lo+FfcPqTW4laZzGxapy4dRFYfZrX+TFnTBMWvQtG+uyIK2F8tv +lU+B/EwdqizQ== Date: Thu, 25 May 2023 19:15:05 -0700 Subject: [PATCH 02/17] xfs: deferred scrub of dirents From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073324.3745075.11598495890056456087.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong If the trylock-based parent pointer check fails, retain those dirents and check them at the end. This may involve dropping the locks on the file being scanned, so yay. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/dir.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/trace.h | 33 +++++++ 2 files changed, 259 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index a0e16ab3419a..0d4aaea271a2 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -22,6 +22,10 @@ #include "scrub/dabtree.h" #include "scrub/readdir.h" #include "scrub/repair.h" +#include "scrub/trace.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" /* Set us up to scrub directories. */ int @@ -41,6 +45,18 @@ xchk_setup_directory( /* Directories */ +/* Deferred directory entry that we saved for later. */ +struct xchk_dirent { + /* Cookie for retrieval of the dirent name. */ + xfblob_cookie name_cookie; + + /* Child inode number. */ + xfs_ino_t ino; + + /* Length of the pptr name. */ + uint8_t namelen; +}; + struct xchk_dir { struct xfs_scrub *sc; @@ -50,6 +66,15 @@ struct xchk_dir { /* xattr key and da args for parent pointer validation. */ struct xfs_parent_scratch pptr_scratch; + /* Fixed-size array of xchk_dirent structures. */ + struct xfarray *dir_entries; + + /* Blobs containing dirent names. */ + struct xfblob *dir_names; + + /* If we've cycled the ILOCK, we must revalidate deferred dirents. */ + bool need_revalidate; + /* Name buffer for dirent revalidation. */ uint8_t namebuf[MAXNAMELEN]; @@ -150,8 +175,26 @@ xchk_dir_check_pptr_fast( /* Try to lock the inode. */ lockmode = xchk_dir_lock_child(sc, ip); if (!lockmode) { - xchk_set_incomplete(sc); - return -ECANCELED; + struct xchk_dirent save_de = { + .namelen = name->len, + .ino = ip->i_ino, + }; + + /* Couldn't lock the inode, so save the dirent for later. */ + trace_xchk_dir_defer(sc->ip, name->name, name->len, ip->i_ino); + + error = xfblob_store(sd->dir_names, &save_de.name_cookie, + name->name, name->len); + if (xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, + &error)) + return error; + + error = xfarray_append(sd->dir_entries, &save_de); + if (xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, + &error)) + return error; + + return 0; } error = xchk_dir_parent_pointer(sd, name, ip); @@ -867,6 +910,156 @@ xchk_directory_blocks( return error; } +/* + * Revalidate a dirent that we collected in the past but couldn't check because + * of lock contention. Returns 0 if the dirent is still valid, -ENOENT if it + * has gone away on us, or a negative errno. + */ +STATIC int +xchk_dir_revalidate_dirent( + struct xchk_dir *sd, + const struct xfs_name *xname, + xfs_ino_t ino) +{ + struct xfs_scrub *sc = sd->sc; + xfs_ino_t child_ino; + int error; + + /* + * Look up the directory entry. If we get -ENOENT, the directory entry + * went away and there's nothing to revalidate. Return any other + * error. + */ + error = xchk_dir_lookup(sc, sc->ip, xname, &child_ino); + if (error) + return error; + + /* The inode number changed, nothing to revalidate. */ + if (ino != child_ino) + return -ENOENT; + + return 0; +} + +/* + * Check a directory entry's parent pointers the slow way, which means we cycle + * locks a bunch and put up with revalidation until we get it done. + */ +STATIC int +xchk_dir_slow_dirent( + struct xchk_dir *sd, + struct xchk_dirent *dirent) +{ + struct xfs_name xname = { + .name = sd->namebuf, + .len = dirent->namelen, + }; + struct xfs_scrub *sc = sd->sc; + struct xfs_inode *ip; + unsigned int lockmode; + int error; + + /* Check that the deferred dirent still exists. */ + if (sd->need_revalidate) { + error = xchk_dir_revalidate_dirent(sd, &xname, dirent->ino); + if (error == -ENOENT) + return 0; + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, + &error)) + return error; + } + + error = xchk_iget(sc, dirent->ino, &ip); + if (error == -EINVAL || error == -ENOENT) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; + } + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + return error; + + /* + * If we can grab both IOLOCK and ILOCK of the alleged child, we can + * proceed with the validation. + */ + lockmode = xchk_dir_lock_child(sc, ip); + if (lockmode) + goto check_pptr; + + /* + * We couldn't lock the child file. Drop all the locks and try to + * get them again, one at a time. + */ + xchk_iunlock(sc, sc->ilock_flags); + sd->need_revalidate = true; + + trace_xchk_dir_slowpath(sc->ip, xname.name, xname.len, ip->i_ino); + + while (true) { + xchk_ilock(sc, XFS_IOLOCK_EXCL); + if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + xchk_ilock(sc, XFS_ILOCK_EXCL); + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + break; + } + xchk_iunlock(sc, XFS_ILOCK_EXCL); + } + xchk_iunlock(sc, XFS_IOLOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) + goto out_rele; + + delay(1); + } + lockmode = XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL; + + /* Revalidate, since we just cycled the locks. */ + error = xchk_dir_revalidate_dirent(sd, &xname, dirent->ino); + if (error == -ENOENT) + goto out_unlock; + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out_unlock; + +check_pptr: + error = xchk_dir_parent_pointer(sd, &xname, ip); +out_unlock: + xfs_iunlock(ip, lockmode); +out_rele: + xchk_irele(sc, ip); + return error; +} + +/* Check all the dirents that we deferred the first time around. */ +STATIC int +xchk_dir_finish_slow_dirents( + struct xchk_dir *sd) +{ + xfarray_idx_t array_cur; + int error; + + foreach_xfarray_idx(sd->dir_entries, array_cur) { + struct xchk_dirent dirent; + + if (sd->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return 0; + + error = xfarray_load(sd->dir_entries, array_cur, &dirent); + if (error) + return error; + + error = xfblob_load(sd->dir_names, dirent.name_cookie, + sd->namebuf, dirent.namelen); + if (error) + return error; + sd->namebuf[MAXNAMELEN - 1] = 0; + + error = xchk_dir_slow_dirent(sd, &dirent); + if (error) + return error; + } + + return 0; +} + /* Scrub a whole directory. */ int xchk_directory( @@ -905,11 +1098,42 @@ xchk_directory( return -ENOMEM; sd->sc = sc; + if (xfs_has_parent(sc->mp)) { + /* + * Set up some staging memory for dirents that we can't check + * due to locking contention. + */ + error = xfarray_create(sc->mp, "slow directory entries", 0, + sizeof(struct xchk_dirent), &sd->dir_entries); + if (error) + goto out_sd; + + error = xfblob_create(sc->mp, "slow directory entry names", + &sd->dir_names); + if (error) + goto out_entries; + } + /* Look up every name in this directory by hash. */ error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd); if (error == -ECANCELED) error = 0; + if (error) + goto out_names; + if (xfs_has_parent(sc->mp)) { + error = xchk_dir_finish_slow_dirents(sd); + if (error) + goto out_names; + } + +out_names: + if (sd->dir_names) + xfblob_destroy(sd->dir_names); +out_entries: + if (sd->dir_entries) + xfarray_destroy(sd->dir_entries); +out_sd: kvfree(sd); return error; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index e1544c044a60..539c51545bcd 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1417,6 +1417,39 @@ DEFINE_EVENT(xchk_nlinks_diff_class, name, \ TP_ARGS(mp, ip, live)) DEFINE_SCRUB_NLINKS_DIFF_EVENT(xchk_nlinks_compare_inode); +DECLARE_EVENT_CLASS(xchk_pptr_class, + TP_PROTO(struct xfs_inode *ip, const unsigned char *name, + unsigned int namelen, xfs_ino_t parent_ino), + TP_ARGS(ip, name, namelen, parent_ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + __field(xfs_ino_t, parent_ino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + __entry->parent_ino = parent_ino; + ), + TP_printk("dev %d:%d ino 0x%llx name '%.*s' parent_ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->namelen, + __get_str(name), + __entry->parent_ino) +) +#define DEFINE_XCHK_PPTR_CLASS(name) \ +DEFINE_EVENT(xchk_pptr_class, name, \ + TP_PROTO(struct xfs_inode *ip, const unsigned char *name, \ + unsigned int namelen, xfs_ino_t parent_ino), \ + TP_ARGS(ip, name, namelen, parent_ino)) +DEFINE_XCHK_PPTR_CLASS(xchk_dir_defer); +DEFINE_XCHK_PPTR_CLASS(xchk_dir_slowpath); + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) From patchwork Fri May 26 02:15:21 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256276 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3274FC77B7A for ; Fri, 26 May 2023 02:15:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229567AbjEZCPZ (ORCPT ); Thu, 25 May 2023 22:15:25 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48910 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229727AbjEZCPY (ORCPT ); Thu, 25 May 2023 22:15:24 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C4ADD135 for ; Thu, 25 May 2023 19:15:22 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 59E026122B for ; Fri, 26 May 2023 02:15:22 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id BCC31C433EF; Fri, 26 May 2023 02:15:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067321; bh=y5kX6E16xebkM21FTOZ5GSG8U7yJKC81uCkAuwZ1jDc=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=unn7XA2lZYo6cdqBO/GPxiOUDfB7uCURmbyVpdts65UN2rN/D/9XGl2K7strvvy6h o/P8VKXmSebJsk33j2gEvpYe/1sI00MUBg+0xcPSmFCAfEP3420aGROJ7ZiTQ34YZN wLvcQRFySxHFyrlNe0uviMGOnIgpvqbeNUEr+QdY85XcMteQcT+qcgRGI3aXKPKk0t ww3UzzKNkbGlPbNZyCPiL6Q9A5toBuLyDc/rddmGxwe/8YLFn2DTjbFBLR/zQGYZuG vbxI//4ysKY6x4uP6bERPk+cKJiKl4oR+igwhakZzi7b55M/yKdka+JBn7pFm4g5sD 2+1bwCCpeHyxw== Date: Thu, 25 May 2023 19:15:21 -0700 Subject: [PATCH 03/17] xfs: scrub parent pointers From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073339.3745075.6501855510146805677.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Actually check parent pointers now. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/parent.c | 341 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 333a1c8d7062..6427f4f14022 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -15,11 +15,15 @@ #include "xfs_icache.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" +#include "xfs_attr.h" +#include "xfs_parent.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/readdir.h" #include "scrub/tempfile.h" #include "scrub/repair.h" +#include "scrub/listxattr.h" +#include "scrub/trace.h" /* Set us up to scrub parents. */ int @@ -197,6 +201,340 @@ xchk_parent_validate( return error; } +/* + * Checking of Parent Pointers + * =========================== + * + * On filesystems with directory parent pointers, we check the referential + * integrity by visiting each parent pointer of a child file and checking that + * the directory referenced by the pointer actually has a dirent pointing + * forward to the child file. + */ + +struct xchk_pptrs { + struct xfs_scrub *sc; + + /* Scratch buffer for scanning pptr xattrs */ + struct xfs_parent_name_irec pptr; + + /* How many parent pointers did we find at the end? */ + unsigned long long pptrs_found; + + /* Parent of this directory. */ + xfs_ino_t parent_ino; +}; + +/* Look up the dotdot entry so that we can check it as we walk the pptrs. */ +STATIC int +xchk_parent_dotdot( + struct xchk_pptrs *pp) +{ + struct xfs_scrub *sc = pp->sc; + int error; + + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) { + pp->parent_ino = NULLFSINO; + return 0; + } + + /* Look up '..' */ + error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &pp->parent_ino); + if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) + return error; + if (!xfs_verify_dir_ino(sc->mp, pp->parent_ino)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; + } + + /* Is this the root dir? Then '..' must point to itself. */ + if (sc->ip == sc->mp->m_rootip && sc->ip->i_ino != pp->parent_ino) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + + return 0; +} + +/* + * Try to lock a parent directory for checking dirents. Returns the inode + * flags for the locks we now hold, or zero if we failed. + */ +STATIC unsigned int +xchk_parent_lock_dir( + struct xfs_scrub *sc, + struct xfs_inode *dp) +{ + if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) + return 0; + + if (!xfs_ilock_nowait(dp, XFS_ILOCK_SHARED)) { + xfs_iunlock(dp, XFS_IOLOCK_SHARED); + return 0; + } + + if (!xfs_need_iread_extents(&dp->i_df)) + return XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED; + + xfs_iunlock(dp, XFS_ILOCK_SHARED); + + if (!xfs_ilock_nowait(dp, XFS_ILOCK_EXCL)) { + xfs_iunlock(dp, XFS_IOLOCK_SHARED); + return 0; + } + + return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL; +} + +/* Check the forward link (dirent) associated with this parent pointer. */ +STATIC int +xchk_parent_dirent( + struct xchk_pptrs *pp, + struct xfs_inode *dp) +{ + struct xfs_name xname = { + .name = pp->pptr.p_name, + .len = pp->pptr.p_namelen, + }; + struct xfs_scrub *sc = pp->sc; + xfs_ino_t child_ino; + int error; + + /* + * Use the name attached to this parent pointer to look up the + * directory entry in the alleged parent. + */ + error = xchk_dir_lookup(sc, dp, &xname, &child_ino); + if (error == -ENOENT) { + xchk_fblock_xref_set_corrupt(sc, XFS_ATTR_FORK, 0); + return 0; + } + if (!xchk_fblock_xref_process_error(sc, XFS_ATTR_FORK, 0, &error)) + return error; + + /* Does the inode number match? */ + if (child_ino != sc->ip->i_ino) { + xchk_fblock_xref_set_corrupt(sc, XFS_ATTR_FORK, 0); + return 0; + } + + /* + * If we're scanning a directory, we should only ever encounter a + * single parent pointer, and it should match the dotdot entry. We set + * the parent_ino from the dotdot entry before the scan, so compare it + * now. + */ + if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) + return 0; + + if (pp->parent_ino != dp->i_ino) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0); + return 0; + } + + pp->parent_ino = NULLFSINO; + return 0; +} + +/* Try to grab a parent directory. */ +STATIC int +xchk_parent_iget( + struct xchk_pptrs *pp, + struct xfs_inode **dpp) +{ + struct xfs_scrub *sc = pp->sc; + struct xfs_inode *ip; + int error; + + /* Validate inode number. */ + error = xfs_dir_ino_validate(sc->mp, pp->pptr.p_ino); + if (error) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0); + return -ECANCELED; + } + + error = xchk_iget(sc, pp->pptr.p_ino, &ip); + if (error == -EINVAL || error == -ENOENT) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0); + return -ECANCELED; + } + if (!xchk_fblock_xref_process_error(sc, XFS_ATTR_FORK, 0, &error)) + return error; + + /* The parent must be a directory. */ + if (!S_ISDIR(VFS_I(ip)->i_mode)) { + xchk_fblock_xref_set_corrupt(sc, XFS_ATTR_FORK, 0); + goto out_rele; + } + + /* Validate generation number. */ + if (VFS_I(ip)->i_generation != pp->pptr.p_gen) { + xchk_fblock_xref_set_corrupt(sc, XFS_ATTR_FORK, 0); + goto out_rele; + } + + *dpp = ip; + return 0; +out_rele: + xchk_irele(sc, ip); + return 0; +} + +/* + * Walk an xattr of a file. If this xattr is a parent pointer, follow it up + * to a parent directory and check that the parent has a dirent pointing back + * to us. + */ +STATIC int +xchk_parent_scan_attr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xfs_name dname = { + .name = value, + .len = valuelen, + }; + struct xchk_pptrs *pp = priv; + struct xfs_inode *dp = NULL; + const struct xfs_parent_name_rec *rec = (const void *)name; + unsigned int lockmode; + xfs_dahash_t computed_hash; + int error; + + /* Ignore incomplete xattrs */ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return 0; + + /* Ignore anything that isn't a parent pointer. */ + if (!(attr_flags & XFS_ATTR_PARENT)) + return 0; + + /* Does the ondisk parent pointer structure make sense? */ + if (!xfs_parent_namecheck(sc->mp, rec, namelen, attr_flags)) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0); + return -ECANCELED; + } + + if (!xfs_parent_valuecheck(sc->mp, value, valuelen)) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0); + return -ECANCELED; + } + + xfs_parent_irec_from_disk(&pp->pptr, rec, value, valuelen); + + /* + * If the namehash of the dirent name encoded in the parent pointer + * attr value doesn't match the namehash in the parent pointer key, + * the parent pointer is corrupt. + */ + computed_hash = xfs_dir2_hashname(ip->i_mount, &dname); + if (pp->pptr.p_namehash != computed_hash) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0); + return -ECANCELED; + } + pp->pptrs_found++; + + error = xchk_parent_iget(pp, &dp); + if (error) + return error; + if (!dp) + return 0; + + /* Try to lock the inode. */ + lockmode = xchk_parent_lock_dir(sc, dp); + if (!lockmode) { + xchk_set_incomplete(sc); + error = -ECANCELED; + goto out_rele; + } + + error = xchk_parent_dirent(pp, dp); + if (error) + goto out_unlock; + +out_unlock: + xfs_iunlock(dp, lockmode); +out_rele: + xchk_irele(sc, dp); + return error; +} + +/* + * Compare the number of parent pointers to the link count. For + * non-directories these should be the same. For unlinked directories the + * count should be zero; for linked directories, it should be nonzero. + */ +STATIC int +xchk_parent_count_pptrs( + struct xchk_pptrs *pp) +{ + struct xfs_scrub *sc = pp->sc; + + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) { + if (sc->ip == sc->mp->m_rootip) + pp->pptrs_found++; + + if (VFS_I(sc->ip)->i_nlink == 0 && pp->pptrs_found > 0) + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + else if (VFS_I(sc->ip)->i_nlink > 0 && + pp->pptrs_found == 0) + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + } else { + if (VFS_I(sc->ip)->i_nlink != pp->pptrs_found) + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + } + + return 0; +} + +/* Check parent pointers of a file. */ +STATIC int +xchk_parent_pptr( + struct xfs_scrub *sc) +{ + struct xchk_pptrs *pp; + int error; + + pp = kvzalloc(sizeof(struct xchk_pptrs), XCHK_GFP_FLAGS); + if (!pp) + return -ENOMEM; + pp->sc = sc; + + /* + * Check all the parent pointers of this file, including the dotdot + * entry if there is one. + */ + error = xchk_parent_dotdot(pp); + if (error) + goto out_pp; + + error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_attr, pp); + if (error == -ECANCELED) { + error = 0; + goto out_pp; + } + if (error) + goto out_pp; + + if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + goto out_pp; + + /* + * If the parent pointers aren't corrupt, complain if the number of + * parent pointers doesn't match the link count. + */ + error = xchk_parent_count_pptrs(pp); + if (error) + goto out_pp; + +out_pp: + kvfree(pp); + return error; +} + /* Scrub a parent pointer. */ int xchk_parent( @@ -206,6 +544,9 @@ xchk_parent( xfs_ino_t parent_ino; int error = 0; + if (xfs_has_parent(mp)) + return xchk_parent_pptr(sc); + /* * If we're a directory, check that the '..' link points up to * a directory that has one entry pointing to us. From patchwork Fri May 26 02:15:36 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256277 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 79B96C7EE2E for ; Fri, 26 May 2023 02:15:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230140AbjEZCPq (ORCPT ); Thu, 25 May 2023 22:15:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49158 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229981AbjEZCPn (ORCPT ); Thu, 25 May 2023 22:15:43 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7B8D819D for ; Thu, 25 May 2023 19:15:38 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id F21F96157B for ; Fri, 26 May 2023 02:15:37 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5CFE6C433D2; Fri, 26 May 2023 02:15:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067337; bh=lluAXURufSR96U0k4fhHVJn2SBw3m2m4u3w7U0FphQ0=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=i7J67WUII6tsPLvZiPUcp4X2hzRb1O0++1dEcL69GpDc6EM6ASkD5gDbLpMGVdTe7 azdQW9m4iIAh+Zsjx0yPvVI4UDfjb4RaywH4uMxvl0O4gfkmfazhSCD+WssaIta0sT zyaA/z76tZ7avVXE2qvPnSApGZ7PaW1xUr/mdWi89V5sRy6vky+NZb4KXCpcRSaXEt PLoqcAjQZ1PoustEBHPIvppTJOrXPy2vOjUAxwhwlBiodLUxmRJgUGQOFTRb0dyflZ Zc2eDCFuvD5oFkxQQm2nsIPlqYjZkxr8CJ2+qoNZaNSgaLj1MDYTBYepQrZwSBkxzs OiyaJhH6Jj7Yw== Date: Thu, 25 May 2023 19:15:36 -0700 Subject: [PATCH 04/17] xfs: deferred scrub of parent pointers From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073353.3745075.18102281884357315758.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong If the trylock-based dirent check fails, retain those parent pointers and check them at the end. This may involve dropping the locks on the file being scanned, so yay. Signed-off-by: Darrick J. Wong --- fs/xfs/Makefile | 2 fs/xfs/scrub/parent.c | 269 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/scrub/trace.h | 2 3 files changed, 266 insertions(+), 7 deletions(-) diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 39b9443608e2..99fd4a5cf051 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -176,6 +176,7 @@ xfs-y += $(addprefix scrub/, \ scrub.o \ symlink.o \ xfarray.o \ + xfblob.o \ xfile.o \ ) @@ -214,7 +215,6 @@ xfs-y += $(addprefix scrub/, \ rmap_repair.o \ symlink_repair.o \ tempfile.o \ - xfblob.o \ xfbtree.o \ ) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 6427f4f14022..8daf08a627b7 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -23,6 +23,9 @@ #include "scrub/tempfile.h" #include "scrub/repair.h" #include "scrub/listxattr.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" #include "scrub/trace.h" /* Set us up to scrub parents. */ @@ -211,17 +214,42 @@ xchk_parent_validate( * forward to the child file. */ +/* Deferred parent pointer entry that we saved for later. */ +struct xchk_pptr { + /* Cookie for retrieval of the pptr name. */ + xfblob_cookie name_cookie; + + /* Parent pointer attr key. */ + xfs_ino_t p_ino; + uint32_t p_gen; + + /* Length of the pptr name. */ + uint8_t namelen; +}; + struct xchk_pptrs { struct xfs_scrub *sc; /* Scratch buffer for scanning pptr xattrs */ struct xfs_parent_name_irec pptr; + /* Fixed-size array of xchk_pptr structures. */ + struct xfarray *pptr_entries; + + /* Blobs containing parent pointer names. */ + struct xfblob *pptr_names; + /* How many parent pointers did we find at the end? */ unsigned long long pptrs_found; /* Parent of this directory. */ xfs_ino_t parent_ino; + + /* If we've cycled the ILOCK, we must revalidate all deferred pptrs. */ + bool need_revalidate; + + /* xattr key and da args for parent pointer revalidation. */ + struct xfs_parent_scratch pptr_scratch; }; /* Look up the dotdot entry so that we can check it as we walk the pptrs. */ @@ -446,8 +474,27 @@ xchk_parent_scan_attr( /* Try to lock the inode. */ lockmode = xchk_parent_lock_dir(sc, dp); if (!lockmode) { - xchk_set_incomplete(sc); - error = -ECANCELED; + struct xchk_pptr save_pp = { + .p_ino = pp->pptr.p_ino, + .p_gen = pp->pptr.p_gen, + .namelen = pp->pptr.p_namelen, + }; + + /* Couldn't lock the inode, so save the pptr for later. */ + trace_xchk_parent_defer(sc->ip, pp->pptr.p_name, + pp->pptr.p_namelen, dp->i_ino); + + error = xfblob_store(pp->pptr_names, &save_pp.name_cookie, + pp->pptr.p_name, pp->pptr.p_namelen); + if (xchk_fblock_xref_process_error(sc, XFS_ATTR_FORK, 0, + &error)) + goto out_rele; + + error = xfarray_append(pp->pptr_entries, &save_pp); + if (xchk_fblock_xref_process_error(sc, XFS_ATTR_FORK, 0, + &error)) + goto out_rele; + goto out_rele; } @@ -462,6 +509,180 @@ xchk_parent_scan_attr( return error; } +/* + * Revalidate a parent pointer that we collected in the past but couldn't check + * because of lock contention. Returns 0 if the parent pointer is still valid, + * -ENOENT if it has gone away on us, or a negative errno. + */ +STATIC int +xchk_parent_revalidate_pptr( + struct xchk_pptrs *pp) +{ + struct xfs_scrub *sc = pp->sc; + int error; + + error = xfs_parent_lookup(sc->tp, sc->ip, &pp->pptr, + &pp->pptr_scratch); + if (error == -ENOATTR) { + /* Parent pointer went away, nothing to revalidate. */ + return -ENOENT; + } + + return error; +} + +/* + * Check a parent pointer the slow way, which means we cycle locks a bunch + * and put up with revalidation until we get it done. + */ +STATIC int +xchk_parent_slow_pptr( + struct xchk_pptrs *pp, + struct xchk_pptr *pptr) +{ + struct xfs_scrub *sc = pp->sc; + struct xfs_inode *dp = NULL; + unsigned int lockmode; + int error; + + /* Restore the saved parent pointer into the irec. */ + pp->pptr.p_ino = pptr->p_ino; + pp->pptr.p_gen = pptr->p_gen; + + error = xfblob_load(pp->pptr_names, pptr->name_cookie, pp->pptr.p_name, + pptr->namelen); + if (error) + return error; + pp->pptr.p_name[MAXNAMELEN - 1] = 0; + pp->pptr.p_namelen = pptr->namelen; + xfs_parent_irec_hashname(sc->mp, &pp->pptr); + + /* Check that the deferred parent pointer still exists. */ + if (pp->need_revalidate) { + error = xchk_parent_revalidate_pptr(pp); + if (error == -ENOENT) + return 0; + if (!xchk_fblock_xref_process_error(sc, XFS_ATTR_FORK, 0, + &error)) + return error; + } + + error = xchk_parent_iget(pp, &dp); + if (error) + return error; + if (!dp) + return 0; + + /* + * If we can grab both IOLOCK and ILOCK of the alleged parent, we + * can proceed with the validation. + */ + lockmode = xchk_parent_lock_dir(sc, dp); + if (lockmode) + goto check_dirent; + + /* + * We couldn't lock the parent dir. Drop all the locks and try to + * get them again, one at a time. + */ + xchk_iunlock(sc, sc->ilock_flags); + pp->need_revalidate = true; + + trace_xchk_parent_slowpath(sc->ip, pp->pptr.p_name, pptr->namelen, + dp->i_ino); + + while (true) { + xchk_ilock(sc, XFS_IOLOCK_EXCL); + if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { + xchk_ilock(sc, XFS_ILOCK_EXCL); + if (xfs_ilock_nowait(dp, XFS_ILOCK_EXCL)) { + break; + } + xchk_iunlock(sc, XFS_ILOCK_EXCL); + } + xchk_iunlock(sc, XFS_IOLOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) + goto out_rele; + + delay(1); + } + lockmode = XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL; + + /* + * If we didn't already find a parent pointer matching the dotdot + * entry, re-query the dotdot entry so that we can validate it. + */ + if (pp->parent_ino != NULLFSINO) { + error = xchk_parent_dotdot(pp); + if (error) + goto out_unlock; + } + + /* Revalidate the parent pointer now that we cycled locks. */ + error = xchk_parent_revalidate_pptr(pp); + if (error == -ENOENT) + goto out_unlock; + if (!xchk_fblock_xref_process_error(sc, XFS_ATTR_FORK, 0, &error)) + goto out_unlock; + +check_dirent: + error = xchk_parent_dirent(pp, dp); +out_unlock: + xfs_iunlock(dp, lockmode); +out_rele: + xchk_irele(sc, dp); + return error; +} + +/* Check all the parent pointers that we deferred the first time around. */ +STATIC int +xchk_parent_finish_slow_pptrs( + struct xchk_pptrs *pp) +{ + xfarray_idx_t array_cur; + int error; + + foreach_xfarray_idx(pp->pptr_entries, array_cur) { + struct xchk_pptr pptr; + + if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return 0; + + error = xfarray_load(pp->pptr_entries, array_cur, &pptr); + if (error) + return error; + + error = xchk_parent_slow_pptr(pp, &pptr); + if (error) + return error; + } + + /* Empty out both xfiles now that we've checked everything. */ + xfarray_truncate(pp->pptr_entries); + xfblob_truncate(pp->pptr_names); + return 0; +} + +/* Count the number of parent pointers. */ +STATIC int +xchk_parent_count_pptr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xchk_pptrs *pp = priv; + + if (attr_flags & XFS_ATTR_PARENT) + pp->pptrs_found++; + return 0; +} + /* * Compare the number of parent pointers to the link count. For * non-directories these should be the same. For unlinked directories the @@ -472,6 +693,20 @@ xchk_parent_count_pptrs( struct xchk_pptrs *pp) { struct xfs_scrub *sc = pp->sc; + int error; + + /* + * If we cycled the ILOCK while cross-checking parent pointers with + * dirents, then we need to recalculate the number of parent pointers. + */ + if (pp->need_revalidate) { + pp->pptrs_found = 0; + error = xchk_xattr_walk(sc, sc->ip, xchk_parent_count_pptr, pp); + if (error == -ECANCELED) + return 0; + if (error) + return error; + } if (S_ISDIR(VFS_I(sc->ip)->i_mode)) { if (sc->ip == sc->mp->m_rootip) @@ -511,16 +746,34 @@ xchk_parent_pptr( if (error) goto out_pp; + /* + * Set up some staging memory for parent pointers that we can't check + * due to locking contention. + */ + error = xfarray_create(sc->mp, "slow parent pointer entries", 0, + sizeof(struct xchk_pptr), &pp->pptr_entries); + if (error) + goto out_pp; + + error = xfblob_create(sc->mp, "slow parent pointer names", + &pp->pptr_names); + if (error) + goto out_entries; + error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_attr, pp); if (error == -ECANCELED) { error = 0; - goto out_pp; + goto out_names; } if (error) - goto out_pp; + goto out_names; + + error = xchk_parent_finish_slow_pptrs(pp); + if (error) + goto out_names; if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out_pp; + goto out_names; /* * If the parent pointers aren't corrupt, complain if the number of @@ -528,8 +781,12 @@ xchk_parent_pptr( */ error = xchk_parent_count_pptrs(pp); if (error) - goto out_pp; + goto out_names; +out_names: + xfblob_destroy(pp->pptr_names); +out_entries: + xfarray_destroy(pp->pptr_entries); out_pp: kvfree(pp); return error; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 539c51545bcd..28232e4611d7 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1449,6 +1449,8 @@ DEFINE_EVENT(xchk_pptr_class, name, \ TP_ARGS(ip, name, namelen, parent_ino)) DEFINE_XCHK_PPTR_CLASS(xchk_dir_defer); DEFINE_XCHK_PPTR_CLASS(xchk_dir_slowpath); +DEFINE_XCHK_PPTR_CLASS(xchk_parent_defer); +DEFINE_XCHK_PPTR_CLASS(xchk_parent_slowpath); /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) From patchwork Fri May 26 02:15:52 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256278 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A4F59C7EE2E for ; Fri, 26 May 2023 02:15:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229832AbjEZCPz (ORCPT ); Thu, 25 May 2023 22:15:55 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49238 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229727AbjEZCPz (ORCPT ); Thu, 25 May 2023 22:15:55 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F170513A for ; Thu, 25 May 2023 19:15:53 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 8D3DE614A2 for ; Fri, 26 May 2023 02:15:53 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id ECBADC433D2; Fri, 26 May 2023 02:15:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067353; bh=ORD4utI2Hb5GAtFW2uS2URZTBJTR2Dr9KmT1fC2lGPw=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=ai4HvoHg6FMxpIlgZT/tgVYFWvLEPnLQ8PR3PrKlJDgwJPpu/YIjSq8qrPFBdKdTB n4IAGscmuMkHivuDTFoXGgy5/Rm4yK0zRhmkRByiqv5+1JHji7gi2hmuQAaAbiF9fg itxvBnxUb9GPX1c230j2GNBzbBnrbGKUPmfZmlaRz6rHY6Kig0JpJQ82ro9Nyi8aW4 fHIq6879CqHgUk+zJdM0BkdEbYVSe1sauFzDM+bJtaUOkWVRKDeF4+AsNsYBkJ5OMg /m74cRkukApPAkQQtqqg+UmokGJs5wenUvUjMkodaNlAZpahq3ImETMdoIEUueEk7L SomlwdV1FVksw== Date: Thu, 25 May 2023 19:15:52 -0700 Subject: [PATCH 05/17] xfs: add raw parent pointer apis to support repair From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073367.3745075.5122726017349785801.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Add a couple of utility functions to set or remove parent pointers from a file. These functions will be used by repair code, hence they skip the xattr logging that regular parent pointer updates use. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_dir2.c | 2 +- fs/xfs/libxfs/xfs_dir2.h | 2 +- fs/xfs/libxfs/xfs_parent.c | 37 +++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 8 ++++++++ 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 51eed639f2df..525b23a3800b 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -440,7 +440,7 @@ int xfs_dir_removename( struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, + const struct xfs_name *name, xfs_ino_t ino, xfs_extlen_t total) /* bmap's total block count */ { diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index f99788a1f3e6..ca1949ed4f5e 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -55,7 +55,7 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, const struct xfs_name *name, xfs_ino_t *inum, struct xfs_name *ci_name); extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, xfs_ino_t ino, + const struct xfs_name *name, xfs_ino_t ino, xfs_extlen_t tot); extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, const struct xfs_name *name, xfs_ino_t inum, diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index a0ffff5db76d..27685ce65a16 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -364,3 +364,40 @@ xfs_parent_lookup( return xfs_attr_get_ilocked(&scr->args); } + +/* + * Attach the parent pointer (@pptr -> @name) to @ip immediately. Caller must + * not have a transaction or hold the ILOCK. The update will not use logged + * xattrs. This is for specialized repair functions only. The scratchpad need + * not be initialized. + */ +int +xfs_parent_set( + struct xfs_inode *ip, + const struct xfs_parent_name_irec *pptr, + struct xfs_parent_scratch *scr) +{ + xfs_parent_irec_to_disk(&scr->rec, pptr); + xfs_parent_scratch_init(NULL, ip, pptr, scr); + + return xfs_attr_set(&scr->args); +} + +/* + * Remove the parent pointer (@rec -> @name) from @ip immediately. Caller must + * not have a transaction or hold the ILOCK. The update will not use logged + * xattrs. This is for specialized repair functions only. The scratchpad need + * not be initialized. + */ +int +xfs_parent_unset( + struct xfs_inode *ip, + const struct xfs_parent_name_irec *pptr, + struct xfs_parent_scratch *scr) +{ + xfs_parent_irec_to_disk(&scr->rec, pptr); + xfs_parent_scratch_init(NULL, ip, pptr, scr); + scr->args.op_flags |= XFS_DA_OP_REMOVE; + + return xfs_attr_set(&scr->args); +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index 25bbb62fce5f..f1ec9cce859e 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -108,4 +108,12 @@ int xfs_parent_lookup(struct xfs_trans *tp, struct xfs_inode *ip, const struct xfs_parent_name_irec *pptr, struct xfs_parent_scratch *scratch); +int xfs_parent_set(struct xfs_inode *ip, + const struct xfs_parent_name_irec *pptr, + struct xfs_parent_scratch *scratch); + +int xfs_parent_unset(struct xfs_inode *ip, + const struct xfs_parent_name_irec *rec, + struct xfs_parent_scratch *scratch); + #endif /* __XFS_PARENT_H__ */ From patchwork Fri May 26 02:16:08 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256279 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7002EC77B7A for ; Fri, 26 May 2023 02:16:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229981AbjEZCQL (ORCPT ); Thu, 25 May 2023 22:16:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49316 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229727AbjEZCQK (ORCPT ); Thu, 25 May 2023 22:16:10 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 92C3E195 for ; Thu, 25 May 2023 19:16:09 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 2805B6157B for ; Fri, 26 May 2023 02:16:09 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 882C8C433D2; Fri, 26 May 2023 02:16:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067368; bh=PgyuZhG8o3dNS1nBirEv76fsv/MvrMYXRvCV8ZutY0o=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=tKJEcU82cPmgPIBHE3ksOb2MZdiPYCBw5V7nQXRhmjiAD3W6mRb4KQUnSE2c9BiUW fOOl2LfwX/iD9eviDNeYKQAPhMRmHDiYkJYROYQn15we/ZhkOEg6iLPWjsoMBYNt49 1EvjqvG09XoNuAjs4iaU+R3u+exsK1l8UU5dnIWmjHU08sgDnVmLVyI6vqejqZl8js FcbV2FjwR0Jeb0w8nANnLH+vw/siMvF1bn4qH5amfcmrPemsH+HogMuv5evoEcqlkx EfubSsFyn0DvJeeTWoE//esz3sVAO55cswU1aDmxmpLv9138hwQ8VGizhyW76rgoym 5Im/7EYL4jUtw== Date: Thu, 25 May 2023 19:16:08 -0700 Subject: [PATCH 06/17] xfs: set child file owner in xfs_da_args when changing parent pointers From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073382.3745075.7085494043402000854.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Now that struct xfs_da_args has an explicit file owner field, we must set it when modifying parent pointers. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_parent.c | 13 ++++++++++--- fs/xfs/libxfs/xfs_parent.h | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 27685ce65a16..cc05d1de9eb0 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -195,6 +195,7 @@ xfs_parent_add( args->trans = tp; args->dp = child; + args->owner = child->i_ino; xfs_init_parent_davalue(&parent->args, parent_name); @@ -227,6 +228,7 @@ xfs_parent_remove( args->trans = tp; args->dp = child; + args->owner = child->i_ino; xfs_init_parent_davalue(&parent->args, parent_name); @@ -265,6 +267,7 @@ xfs_parent_replace( args->trans = tp; args->dp = child; + args->owner = child->i_ino; xfs_init_parent_davalue(&parent->args, old_name); xfs_init_parent_danewvalue(&parent->args, new_name); @@ -327,6 +330,7 @@ static inline void xfs_parent_scratch_init( struct xfs_trans *tp, struct xfs_inode *ip, + xfs_ino_t owner, const struct xfs_parent_name_irec *pptr, struct xfs_parent_scratch *scr) { @@ -343,6 +347,7 @@ xfs_parent_scratch_init( scr->args.whichfork = XFS_ATTR_FORK; scr->args.hashval = xfs_da_hashname((const void *)&scr->rec, sizeof(struct xfs_parent_name_rec)); + scr->args.owner = owner; } /* @@ -359,7 +364,7 @@ xfs_parent_lookup( struct xfs_parent_scratch *scr) { xfs_parent_irec_to_disk(&scr->rec, pptr); - xfs_parent_scratch_init(tp, ip, pptr, scr); + xfs_parent_scratch_init(tp, ip, ip->i_ino, pptr, scr); scr->args.op_flags |= XFS_DA_OP_OKNOENT; return xfs_attr_get_ilocked(&scr->args); @@ -374,11 +379,12 @@ xfs_parent_lookup( int xfs_parent_set( struct xfs_inode *ip, + xfs_ino_t owner, const struct xfs_parent_name_irec *pptr, struct xfs_parent_scratch *scr) { xfs_parent_irec_to_disk(&scr->rec, pptr); - xfs_parent_scratch_init(NULL, ip, pptr, scr); + xfs_parent_scratch_init(NULL, ip, owner, pptr, scr); return xfs_attr_set(&scr->args); } @@ -392,11 +398,12 @@ xfs_parent_set( int xfs_parent_unset( struct xfs_inode *ip, + xfs_ino_t owner, const struct xfs_parent_name_irec *pptr, struct xfs_parent_scratch *scr) { xfs_parent_irec_to_disk(&scr->rec, pptr); - xfs_parent_scratch_init(NULL, ip, pptr, scr); + xfs_parent_scratch_init(NULL, ip, owner, pptr, scr); scr->args.op_flags |= XFS_DA_OP_REMOVE; return xfs_attr_set(&scr->args); diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index f1ec9cce859e..5dbaceb97653 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -108,11 +108,11 @@ int xfs_parent_lookup(struct xfs_trans *tp, struct xfs_inode *ip, const struct xfs_parent_name_irec *pptr, struct xfs_parent_scratch *scratch); -int xfs_parent_set(struct xfs_inode *ip, +int xfs_parent_set(struct xfs_inode *ip, xfs_ino_t owner, const struct xfs_parent_name_irec *pptr, struct xfs_parent_scratch *scratch); -int xfs_parent_unset(struct xfs_inode *ip, +int xfs_parent_unset(struct xfs_inode *ip, xfs_ino_t owner, const struct xfs_parent_name_irec *rec, struct xfs_parent_scratch *scratch); From patchwork Fri May 26 02:16:23 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256280 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9873AC77B7E for ; Fri, 26 May 2023 02:16:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229727AbjEZCQb (ORCPT ); Thu, 25 May 2023 22:16:31 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49372 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230049AbjEZCQ2 (ORCPT ); Thu, 25 May 2023 22:16:28 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2718F13D for ; Thu, 25 May 2023 19:16:25 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id B87EC60DCE for ; Fri, 26 May 2023 02:16:24 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 29C61C433D2; Fri, 26 May 2023 02:16:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067384; bh=aQM0ydoJSP1MBH02CwqhzzRiYaHIwtF6egC6KDcEWL8=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=JX0D6J3BiPGQn+J7V8wMNBfIHp+XKM7P+stJn1sJRg3Ab3QBbleRGqlzvNJw321Fi BAaRFFxmy0fIIKwDCR/17qLRybn561Sg1GH8SytW/cj/llpVjO+6vfUlu+HdvBR/OQ PEebom+RNRCZFXaIpHsRpVVW9My8zj3FJguyN6uNpeOq1SyliOQMsF+yNdGm9s+qOl FExz7lM/tE25O9Qd5tpCnU55hOawcCT7HDQvP92NFBORJKbVxvPvRvM0FWL0IyLpLz Wi/+ih5mm+vvjL32fIrFGzhCrzRDsBn1yLyNLJ7Hl7W2nOjnqoXElQqqCevjSStBVv cqbsz+QJJ1mZw== Date: Thu, 25 May 2023 19:16:23 -0700 Subject: [PATCH 07/17] xfs: salvage parent pointers when rebuilding xattr structures From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073396.3745075.1972680528979448636.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong When we're salvaging extended attributes, make sure we validate the ones that claim to be parent pointers before adding them to the salvage pile. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/attr_repair.c | 41 ++++++++++++++++++++++++++++++++--------- fs/xfs/scrub/trace.c | 1 + fs/xfs/scrub/trace.h | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 1d5bacbe1b81..489abe1f028a 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -28,6 +28,7 @@ #include "xfs_swapext.h" #include "xfs_xchgrange.h" #include "xfs_acl.h" +#include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -124,6 +125,13 @@ xrep_xattr_want_salvage( return false; if (valuelen > XATTR_SIZE_MAX || valuelen < 0) return false; + if (attr_flags & XFS_ATTR_PARENT) { + if (!xfs_parent_namecheck(rx->sc->mp, name, namelen, + attr_flags)) + return false; + if (!xfs_parent_valuecheck(rx->sc->mp, value, valuelen)) + return false; + } return true; } @@ -151,14 +159,21 @@ xrep_xattr_salvage_key( * Truncate the name to the first character that would trip namecheck. * If we no longer have a name after that, ignore this attribute. */ - while (i < namelen && name[i] != 0) - i++; - if (i == 0) - return 0; - key.namelen = i; + if (flags & XFS_ATTR_PARENT) { + key.namelen = namelen; - trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen, - valuelen); + trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name, + key.namelen, value, valuelen); + } else { + while (i < namelen && name[i] != 0) + i++; + if (i == 0) + return 0; + key.namelen = i; + + trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, + key.namelen, valuelen); + } error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name, key.namelen); @@ -562,6 +577,9 @@ xrep_xattr_insert_rec( struct xchk_xattr_buf *ab = rx->sc->buf; int error; + if (key->flags & XFS_ATTR_PARENT) + args.op_flags |= XFS_DA_OP_NVLOOKUP; + /* * Grab pointers to the scrub buffer so that we can use them to insert * attrs into the temp file. @@ -595,8 +613,13 @@ xrep_xattr_insert_rec( ab->name[key->namelen] = 0; - trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name, - key->namelen, key->valuelen); + if (key->flags & XFS_ATTR_PARENT) + trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags, + ab->name, key->namelen, ab->value, + key->valuelen); + else + trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, + ab->name, key->namelen, key->valuelen); /* * xfs_attr_set creates and commits its own transaction. If the attr diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 18a1a3d1cbef..913f886380c0 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -18,6 +18,7 @@ #include "xfs_dir2.h" #include "xfs_da_format.h" #include "xfs_rmap.h" +#include "xfs_parent.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 28232e4611d7..c64594f20f73 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2481,6 +2481,46 @@ DEFINE_EVENT(xrep_xattr_salvage_class, name, \ DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec); DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec); +DECLARE_EVENT_CLASS(xrep_pptr_salvage_class, + TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name, + unsigned int namelen, const void *value, unsigned int valuelen), + TP_ARGS(ip, flags, name, namelen, value, valuelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __field(unsigned int, namelen) + __dynamic_array(char, name, valuelen) + ), + TP_fast_assign( + struct xfs_parent_name_irec pptr; + + xfs_parent_irec_from_disk(&pptr, name, value, valuelen); + + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->parent_ino = pptr.p_ino; + __entry->parent_gen = pptr.p_gen; + __entry->namelen = pptr.p_namelen; + memcpy(__get_str(name), pptr.p_name, pptr.p_namelen); + ), + TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->parent_ino, + __entry->parent_gen, + __entry->namelen, + __get_str(name)) +) +#define DEFINE_XREP_PPTR_SALVAGE_CLASS(name) \ +DEFINE_EVENT(xrep_pptr_salvage_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name, \ + unsigned int namelen, const void *value, unsigned int valuelen), \ + TP_ARGS(ip, flags, name, namelen, value, valuelen)) +DEFINE_XREP_PPTR_SALVAGE_CLASS(xrep_xattr_salvage_pptr); +DEFINE_XREP_PPTR_SALVAGE_CLASS(xrep_xattr_insert_pptr); + TRACE_EVENT(xrep_xattr_class, TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), TP_ARGS(ip, arg_ip), From patchwork Fri May 26 02:16:39 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256281 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0BF3EC77B7A for ; Fri, 26 May 2023 02:16:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230077AbjEZCQn (ORCPT ); Thu, 25 May 2023 22:16:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49406 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229999AbjEZCQm (ORCPT ); Thu, 25 May 2023 22:16:42 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BDDE213A for ; Thu, 25 May 2023 19:16:40 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 5A500614A2 for ; Fri, 26 May 2023 02:16:40 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id BF8EDC433D2; Fri, 26 May 2023 02:16:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067399; bh=14Ey3VVaOWUuMVFe9oJWmCEOsFjCn+d/2dkDrH03/gE=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=WFAW+3ZsUx8z5/p/H6UuoBYXHstnPdcVYIB1O7QjcNjMoyQi5UhuLyGffFmPnAvLw 1JLN/weK3lCqcz5VcRWpFypngF3poHO5Hs6D+uITnigejazl6QwoyKktVs7n1ujS7T Q2V/suR1pODOPDg9Ec3aBCUlwmrUb9FIjsz5DVG6uQ5EzEqoc1kDHaH1Jsy3/RgCFn vMMI3R8xUI4ruL7JLl5P0FRo0fj+be1rDMeWWusjjpOOC08pY51PvgqvlOmqgSpqfi dMXk6DAXIVtrt9TQmso0sZnuiSluB9OyzxTLL70Iket+QDyt4uVE1LMjcxSLNjEy+p Nkgu0N+8ubPfw== Date: Thu, 25 May 2023 19:16:39 -0700 Subject: [PATCH 08/17] xfs: teach the adoption code about parent pointers From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073410.3745075.12787706795487900676.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Teach the online fsck file adoption code how to create parent pointers for files that are moved to /lost+found. In addition to the parent pointer creation itself, we must also turn on logged xattrs during scrub setup. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/orphanage.c | 66 +++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/scrub/orphanage.h | 2 + fs/xfs/scrub/scrub.c | 6 ++++ fs/xfs/scrub/scrub.h | 8 +++--- fs/xfs/scrub/trace.h | 1 + 5 files changed, 75 insertions(+), 8 deletions(-) diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 8285d129db9e..c574ae5a23ec 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -19,6 +19,10 @@ #include "xfs_icache.h" #include "xfs_bmap.h" #include "xfs_bmap_btree.h" +#include "xfs_parent.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_xattr.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" @@ -97,6 +101,31 @@ xrep_chown_orphanage( return error; } +/* + * Enable logged extended attributes for parent pointers. This must get done + * before we create transactions and start making changes. + */ +STATIC int +xrep_adoption_grab_log_assist( + struct xfs_scrub *sc) +{ + int error; + + if (!xfs_has_parent(sc->mp)) + return 0; + + ASSERT(!(sc->flags & XREP_FSGATES_LARP)); + + error = xfs_attr_grab_log_assist(sc->mp); + if (error) + return error; + + trace_xchk_fsgates_enable(sc, XREP_FSGATES_LARP); + + sc->flags |= XREP_FSGATES_LARP; + return 0; +} + #define ORPHANAGE "lost+found" /* Create the orphanage directory, and set sc->orphanage to it. */ @@ -188,6 +217,12 @@ xrep_orphanage_create( out_dput_root: dput(root_dentry); out: + /* + * Turn on whatever log features are required for an adoption to be + * committed correctly. + */ + if (!error) + error = xrep_adoption_grab_log_assist(sc); return error; } @@ -267,6 +302,14 @@ xrep_adoption_init( child_blkres = xfs_rename_space_res(mp, 0, false, xfs_name_dotdot.len, false); adopt->child_blkres = child_blkres; + + if (xfs_has_parent(mp)) { + ASSERT(sc->flags & XREP_FSGATES_LARP); + return xfs_parent_start_locked(mp, &adopt->parent); + } else { + adopt->parent = NULL; + } + return 0; } @@ -466,7 +509,7 @@ xrep_adoption_commit( error = xrep_orphanage_check_dcache(adopt); if (error) - return error; + goto out_parent; /* * Create the new name in the orphanage, and bump the link count of @@ -475,7 +518,7 @@ xrep_adoption_commit( error = xfs_dir_createname(sc->tp, sc->orphanage, xname, sc->ip->i_ino, adopt->orphanage_blkres); if (error) - return error; + goto out_parent; xfs_trans_ichgtime(sc->tp, sc->orphanage, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -488,7 +531,15 @@ xrep_adoption_commit( error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, sc->orphanage->i_ino, adopt->child_blkres); if (error) - return error; + goto out_parent; + } + + /* Add a parent pointer from the file back to the lost+found. */ + if (adopt->parent) { + error = xfs_parent_add(sc->tp, adopt->parent, sc->orphanage, + xname, sc->ip); + if (error) + goto out_parent; } /* @@ -499,11 +550,14 @@ xrep_adoption_commit( xfs_dir_update_hook(sc->orphanage, sc->ip, 1, xname); error = xrep_defer_finish(sc); if (error) - return error; + goto out_parent; /* Remove negative dentries from the lost+found's dcache */ xrep_orphanage_zap_dcache(adopt); - return 0; +out_parent: + xfs_parent_finish(sc->mp, adopt->parent); + adopt->parent = NULL; + return error; } /* Cancel a proposed relocation of a file to the orphanage. */ @@ -521,6 +575,8 @@ xrep_adoption_cancel( * state to manage, we'll need to give that back. */ trace_xrep_adoption_cancel(sc->orphanage, sc->ip, error); + xfs_parent_finish(sc->mp, adopt->parent); + adopt->parent = NULL; } /* Release the orphanage. */ diff --git a/fs/xfs/scrub/orphanage.h b/fs/xfs/scrub/orphanage.h index 31f068198c8a..382c061e2fb6 100644 --- a/fs/xfs/scrub/orphanage.h +++ b/fs/xfs/scrub/orphanage.h @@ -47,6 +47,8 @@ struct xrep_adoption { struct xfs_scrub *sc; + struct xfs_parent_defer *parent; + /* Block reservations for orphanage and child (if directory). */ unsigned int orphanage_blkres; unsigned int child_blkres; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index b5bd7125ca34..70010b111d9a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -21,6 +21,9 @@ #include "xfs_rmap.h" #include "xfs_xchgrange.h" #include "xfs_swapext.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_xattr.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -177,6 +180,9 @@ xchk_fsgates_disable( if (sc->flags & XREP_FSGATES_ATOMIC_XCHG) xfs_xchg_range_rele_log_assist(sc->mp); + if (sc->flags & XREP_FSGATES_LARP) + xfs_attr_rele_log_assist(sc->mp); + sc->flags &= ~FSGATES_MASK; } #undef FSGATES_MASK diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 6f23edcac5cd..638c69e1fed9 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -135,6 +135,7 @@ struct xfs_scrub { #define XCHK_FSGATES_QUOTA (1 << 4) /* quota live update enabled */ #define XCHK_FSGATES_DIRENTS (1 << 5) /* directory live update enabled */ #define XCHK_FSGATES_RMAP (1 << 6) /* rmapbt live update enabled */ +#define XREP_FSGATES_LARP (1 << 28) /* logged xattr updates */ #define XREP_FSGATES_ATOMIC_XCHG (1 << 29) /* uses atomic file content exchange */ #define XREP_RESET_PERAG_RESV (1 << 30) /* must reset AG space reservation */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ @@ -151,10 +152,11 @@ struct xfs_scrub { XCHK_FSGATES_RMAP) /* - * The sole XREP_FSGATES* flag reflects a log intent item that is protected - * by a log-incompat feature flag. No code patching in use here. + * The sole XREP_FSGATES* flag reflects log intent items protected by + * log-incompat feature flags. No code patching in use here. */ -#define XREP_FSGATES_ALL (XREP_FSGATES_ATOMIC_XCHG) +#define XREP_FSGATES_ALL (XREP_FSGATES_ATOMIC_XCHG | \ + XREP_FSGATES_LARP) /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index c64594f20f73..96c88f4419d7 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -124,6 +124,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); { XCHK_FSGATES_QUOTA, "fsgates_quota" }, \ { XCHK_FSGATES_DIRENTS, "fsgates_dirents" }, \ { XCHK_FSGATES_RMAP, "fsgates_rmap" }, \ + { XREP_FSGATES_LARP, "fsgates_larp" }, \ { XREP_FSGATES_ATOMIC_XCHG, "fsgates_atomic_swapext" }, \ { XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \ { XREP_ALREADY_FIXED, "already_fixed" } From patchwork Fri May 26 02:16:54 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256282 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 02AAAC77B7E for ; Fri, 26 May 2023 02:16:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230099AbjEZCQ6 (ORCPT ); Thu, 25 May 2023 22:16:58 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49504 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229999AbjEZCQ5 (ORCPT ); Thu, 25 May 2023 22:16:57 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 620EAF7 for ; Thu, 25 May 2023 19:16:56 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id F3FE36122B for ; Fri, 26 May 2023 02:16:55 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5F7ADC4339B; Fri, 26 May 2023 02:16:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067415; bh=txMVw3rTM5bZCQh6XyjkLVjZdxHjYUR+RI/9z1XIQ3E=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=SR4OM+pzsVoNdYTU1UGJAWm8jOr6zLI6D+0mZzUM3gORA8vpCLPRVT5mx3Ldm2Y6x FdWds27AgUEZLemWUGODVzs25DO1KtSYoMHiq2LLj3lTu2TEd+xqVnPoffjvpPvGQ8 nlCEs1Yn6taLGTJmiZY5WjYcnIW7gdk3kQsqMWMJLV1nqks5dLakvRuCNNgPZ4oEz7 cFiaYio/hRXlrQvLfPI89i1Jegzuyw+JCNRukPwmiDEvnHEyPWxzP1RMnfRa14VFnU GhcJe0D4IV6CHByZeHLkdiSiqOCEzvmtH0SCumHLUejdtCkpy2LcvRLvi3jWsZjNeN h63U0HQUrCuxg== Date: Thu, 25 May 2023 19:16:54 -0700 Subject: [PATCH 09/17] xfs: replace namebuf with parent pointer in directory repair From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073425.3745075.8429021580807757967.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Replace the dirent name buffer at the end of struct xrep_dir with a xfs_parent_name_irec object. The namebuf and p_name usage do not overlap, so we can save 256 bytes of memory by allowing them to overlap. Doing so makes the code a bit more complex, so this is called out separately. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/dir_repair.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index c44da2f46b76..450c9b38e085 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -28,6 +28,7 @@ #include "xfs_swapext.h" #include "xfs_xchgrange.h" #include "xfs_ag.h" +#include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -131,8 +132,14 @@ struct xrep_dir { /* Should we move this directory to the orphanage? */ bool move_orphanage; - /* Directory entry name, plus the trailing null. */ - unsigned char namebuf[MAXNAMELEN]; + /* + * Scratch buffer for reading parent pointers from child files. The + * p_name field is used to flush stashed dirents into the temporary + * directory in between parent pointers. At the very end of the + * repair, it can also be used to compute the lost+found filename + * if we need to reparent the directory. + */ + struct xfs_parent_name_irec pptr; }; /* Tear down all the incore stuff we created. */ @@ -694,7 +701,7 @@ xrep_dir_replay_update( struct xfs_name name = { .len = dirent->namelen, .type = dirent->ftype, - .name = rd->namebuf, + .name = rd->pptr.p_name, }; struct xfs_mount *mp = rd->sc->mp; xfs_ino_t ino; @@ -769,10 +776,10 @@ xrep_dir_replay_updates( /* The dirent name is stored in the in-core buffer. */ error = xfblob_load(rd->dir_names, dirent.name_cookie, - rd->namebuf, dirent.namelen); + rd->pptr.p_name, dirent.namelen); if (error) return error; - rd->namebuf[MAXNAMELEN - 1] = 0; + rd->pptr.p_name[MAXNAMELEN - 1] = 0; error = xrep_dir_replay_update(rd, &dirent); if (error) @@ -1406,7 +1413,7 @@ xrep_dir_move_to_orphanage( if (error) goto err_adoption; - error = xrep_adoption_compute_name(&rd->adoption, rd->namebuf); + error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name); if (error) goto err_adoption; From patchwork Fri May 26 02:17:10 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256283 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4C6CEC77B7E for ; Fri, 26 May 2023 02:17:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230154AbjEZCRP (ORCPT ); Thu, 25 May 2023 22:17:15 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49572 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229999AbjEZCRO (ORCPT ); Thu, 25 May 2023 22:17:14 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 38530F7 for ; Thu, 25 May 2023 19:17:12 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 9EE1764768 for ; Fri, 26 May 2023 02:17:11 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0E9E7C433EF; Fri, 26 May 2023 02:17:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067431; bh=kXpbIGBF+YuICLLJBO3oKTKsd483m1gPQ8X3onEzzRo=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=ivYzJVFQwVUkhcQM8zZGcGnhqGmUIQEK2+cPOjI9QuRI8bKqOSAfutpHLQVNplAI9 oWJOufJQdhM8jWRJO3f7/VUnebxce/liajoTsxp3lAsqtxw/IikN39aOSsukkrsLEK WkxI1x9G34c4OTpo+zJuQ8HXFdu9I1/t32p1TLT94a2a0kRqefBfXOGP/8bAJ8LIVT OPEwNfdti1FZzKRt0JAkp8Jm30+/CKhdVQQLw3mM9JvbVJkPQ9n00eKwx4d3jAecYL p4ZPkOJYcMhOf5/oxlXgoJfTlOXLSEgNSxGKJSdwHhdUte/KvageaoIUEKOSYGO/cv rO7SFzEyy9nHg== Date: Thu, 25 May 2023 19:17:10 -0700 Subject: [PATCH 10/17] xfs: repair directories by scanning directory parent pointers From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073439.3745075.17837311157389108744.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong For filesystems with parent pointers, scan the entire filesystem looking for parent pointers that target the directory we're rebuilding instead of trying to salvage whatever we can from the directory data blocks. This will be more robust than salvaging, but there's more code to come. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/dir_repair.c | 323 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 320 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 450c9b38e085..4a0ebab5e245 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -44,6 +44,7 @@ #include "scrub/reap.h" #include "scrub/findparent.h" #include "scrub/orphanage.h" +#include "scrub/listxattr.h" /* * Directory Repair @@ -58,6 +59,15 @@ * being repaired and the temporary directory, and will later become important * for parent pointer scanning. * + * If parent pointers are enabled on this filesystem, we instead reconstruct + * the directory by visiting each parent pointer of each file in the filesystem + * and translating the relevant parent pointer records into dirents. In this + * case, it is advantageous to stash all directory entries created from parent + * pointers for a single child file before replaying them into the temporary + * directory. To save memory, the live filesystem scan reuses the findparent + * fields. Directory repair chooses either parent pointer scanning or + * directory entry salvaging, but not both. + * * Directory entries added to the temporary directory do not elevate the link * counts of the inodes found. When salvaging completes, the remaining stashed * entries are replayed to the temporary directory. An atomic extent swap is @@ -113,7 +123,15 @@ struct xrep_dir { /* * Information used to scan the filesystem to find the inumber of the - * dotdot entry for this directory. + * dotdot entry for this directory. For directory salvaging when + * parent pointers are not enabled, we use the findparent_* functions + * on this object and access only the parent_ino field directly. + * + * When parent pointers are enabled, however, the pptr scanner uses the + * iscan, hooks, lock, and parent_ino fields of this object directly. + * @pscan.lock coordinates access to dir_entries, dir_names, + * parent_ino, subdirs, dirents, and args. This reduces the memory + * requirements of this structure. */ struct xrep_parent_scan_info pscan; @@ -999,6 +1017,259 @@ xrep_dir_salvage_entries( } +/* + * Examine an xattr of a file. If this xattr is a parent pointer that leads us + * back to the directory that we're rebuilding, create an incore dirent from + * the parent pointer and stash it. + */ +STATIC int +xrep_dir_scan_parent_pointer( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xfs_name xname; + struct xrep_dir *rd = priv; + const struct xfs_parent_name_rec *rec = (const void *)name; + int error; + + /* Ignore incomplete xattrs */ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return 0; + + /* Ignore anything that isn't a parent pointer. */ + if (!(attr_flags & XFS_ATTR_PARENT)) + return 0; + + /* Does the ondisk parent pointer structure make sense? */ + if (!xfs_parent_namecheck(sc->mp, rec, namelen, attr_flags) || + !xfs_parent_valuecheck(sc->mp, value, valuelen)) + return -EFSCORRUPTED; + + xfs_parent_irec_from_disk(&rd->pptr, rec, value, valuelen); + + /* Ignore parent pointers that point back to a different dir. */ + if (rd->pptr.p_ino != sc->ip->i_ino || + rd->pptr.p_gen != VFS_I(sc->ip)->i_generation) + return 0; + + /* + * Transform this parent pointer into a dirent and queue it for later + * addition to the temporary directory. + */ + xname.name = rd->pptr.p_name; + xname.len = rd->pptr.p_namelen; + xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode); + + mutex_lock(&rd->pscan.lock); + error = xrep_dir_stash_createname(rd, &xname, ip->i_ino); + mutex_unlock(&rd->pscan.lock); + return error; +} + +/* + * If this child dirent points to the directory being repaired, remember that + * fact so that we can reset the dotdot entry if necessary. + */ +STATIC int +xrep_dir_scan_dirent( + struct xfs_scrub *sc, + struct xfs_inode *dp, + xfs_dir2_dataptr_t dapos, + const struct xfs_name *name, + xfs_ino_t ino, + void *priv) +{ + struct xrep_dir *rd = priv; + + /* Dirent doesn't point to this directory. */ + if (ino != rd->sc->ip->i_ino) + return 0; + + /* Ignore garbage inum. */ + if (!xfs_verify_dir_ino(rd->sc->mp, ino)) + return 0; + + /* No weird looking names. */ + if (name->len >= MAXNAMELEN || name->len <= 0) + return 0; + + /* Don't pick up dot or dotdot entries; we only want child dirents. */ + if (xfs_dir2_samename(name, &xfs_name_dotdot) || + xfs_dir2_samename(name, &xfs_name_dot)) + return 0; + + trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot, + dp->i_ino); + + xrep_findparent_scan_found(&rd->pscan, dp->i_ino); + return 0; +} + +/* + * Decide if we want to look for child dirents or parent pointers in this file. + * Skip the dir being repaired and any files being used to stage repairs. + */ +static inline bool +xrep_dir_want_scan( + struct xrep_dir *rd, + const struct xfs_inode *ip) +{ + return ip != rd->sc->ip && !xrep_is_tempfile(ip); +} + +/* + * Take ILOCK on a file that we want to scan. + * + * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or + * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED. + */ +static inline unsigned int +xrep_dir_scan_ilock( + struct xrep_dir *rd, + struct xfs_inode *ip) +{ + uint lock_mode = XFS_ILOCK_SHARED; + + /* Need to take the shared ILOCK to advance the iscan cursor. */ + if (!xrep_dir_want_scan(rd, ip)) + goto lock; + + if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) { + lock_mode = XFS_ILOCK_EXCL; + goto lock; + } + + if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af)) + lock_mode = XFS_ILOCK_EXCL; + +lock: + xfs_ilock(ip, lock_mode); + return lock_mode; +} + +/* + * Scan this file for relevant child dirents or parent pointers that point to + * the directory we're rebuilding. + */ +STATIC int +xrep_dir_scan_file( + struct xrep_dir *rd, + struct xfs_inode *ip) +{ + unsigned int lock_mode; + int error = 0; + + lock_mode = xrep_dir_scan_ilock(rd, ip); + + if (!xrep_dir_want_scan(rd, ip)) + goto scan_done; + + error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_parent_pointer, rd); + if (error) + goto scan_done; + + if (S_ISDIR(VFS_I(ip)->i_mode)) { + error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd); + if (error) + goto scan_done; + } + +scan_done: + xchk_iscan_mark_visited(&rd->pscan.iscan, ip); + xfs_iunlock(ip, lock_mode); + return error; +} + +/* + * Scan all files in the filesystem for parent pointers that we can turn into + * replacement dirents, and a dirent that we can use to set the dotdot pointer. + */ +STATIC int +xrep_dir_scan_dirtree( + struct xrep_dir *rd) +{ + struct xfs_scrub *sc = rd->sc; + struct xfs_inode *ip; + int error; + + /* Roots of directory trees are their own parents. */ + if (sc->ip == sc->mp->m_rootip) + xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino); + + /* + * Filesystem scans are time consuming. Drop the directory ILOCK and + * all other resources for the duration of the scan and hope for the + * best. The live update hooks will keep our scan information up to + * date even though we've dropped the locks. + */ + xchk_trans_cancel(sc); + if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) + xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED | + XFS_ILOCK_EXCL)); + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) { + bool flush; + + error = xrep_dir_scan_file(rd, ip); + xchk_irele(sc, ip); + if (error) + break; + + /* Flush stashed dirent updates to constrain memory usage. */ + mutex_lock(&rd->pscan.lock); + flush = xrep_dir_want_flush_stashed(rd); + mutex_unlock(&rd->pscan.lock); + if (flush) { + xchk_trans_cancel(sc); + + error = xrep_tempfile_iolock_polled(sc); + if (error) + break; + + mutex_lock(&rd->pscan.lock); + error = xrep_dir_replay_updates(rd); + mutex_unlock(&rd->pscan.lock); + xrep_tempfile_iounlock(sc); + if (error) + break; + + error = xchk_trans_alloc_empty(sc); + if (error) + break; + } + + if (xchk_should_terminate(sc, &error)) + break; + } + xchk_iscan_iter_finish(&rd->pscan.iscan); + if (error) { + /* + * If we couldn't grab an inode that was busy with a state + * change, change the error code so that we exit to userspace + * as quickly as possible. + */ + if (error == -EBUSY) + return -ECANCELED; + return error; + } + + /* + * Cancel the empty transaction so that we can (later) use the atomic + * extent swap helpers to lock files and commit the new directory. + */ + xchk_trans_cancel(rd->sc); + return 0; +} + /* * Free all the directory blocks and reset the data fork. The caller must * join the inode to the transaction. This function returns with the inode @@ -1198,6 +1469,44 @@ xrep_dir_set_nlink( return 0; } +/* + * Finish replaying stashed dirent updates, allocate a transaction for swapping + * extents, and take the ILOCKs of both directories before we commit the new + * directory structure. + */ +STATIC int +xrep_dir_finalize_tempdir( + struct xrep_dir *rd) +{ + struct xfs_scrub *sc = rd->sc; + int error; + + if (!xfs_has_parent(sc->mp)) + return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); + + do { + error = xrep_dir_replay_updates(rd); + if (error) + return error; + + error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); + if (error) + return error; + + /* + * We rely on the ILOCK to quiesce all directory updates + * because the VFS does not take the IOLOCK when moving a + * directory child during a rename. + */ + if (xfarray_length(rd->dir_entries) == 0) + break; + + xchk_trans_cancel(sc); + xrep_tempfile_iunlock_both(sc); + } while (!xchk_should_terminate(sc, &error)); + return error; +} + /* Swap the temporary directory's data fork with the one being repaired. */ STATIC int xrep_dir_swap( @@ -1297,7 +1606,12 @@ xrep_dir_rebuild_tree( if (error) return error; - error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); + /* + * Allocate transaction, lock inodes, and make sure that we've replayed + * all the stashed dirent updates to the tempdir. After this point, + * we're ready to swapext. + */ + error = xrep_dir_finalize_tempdir(rd); if (error) return error; @@ -1464,7 +1778,10 @@ xrep_directory( if (error) return error; - error = xrep_dir_salvage_entries(rd); + if (xfs_has_parent(sc->mp)) + error = xrep_dir_scan_dirtree(rd); + else + error = xrep_dir_salvage_entries(rd); if (error) goto out_teardown; From patchwork Fri May 26 02:17:26 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256284 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 05FDBC77B7E for ; Fri, 26 May 2023 02:17:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234031AbjEZCRd (ORCPT ); Thu, 25 May 2023 22:17:33 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49628 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233245AbjEZCR3 (ORCPT ); Thu, 25 May 2023 22:17:29 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9F4F213D for ; Thu, 25 May 2023 19:17:27 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 392B76122B for ; Fri, 26 May 2023 02:17:27 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 97F97C433EF; Fri, 26 May 2023 02:17:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067446; bh=Gw23T4eOQrq8FpkY3pjyre6c30g3Fbd2c1qxipibYnI=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=PaDTUc8gjSiEorVUbbcqV83K0U/z1HYfYViR64xhbI5q5rpEtri9aKnUkZeFDEKID /9JQzMqAZ5keB+WyRq56OtHfaRZADg3U8Ng50B4vCUuVTTlEGQ69Lgt6ekF9PjJZZ9 XkS57be5Z3KK++qiMOpWp/TSL+OwTvyCCEG5ZNYMlIqydBfCnmE9acavHKZ48hQV0T gzHfr3XqVh6A0vYZJ4j0Y/mYiYPE71KjqIn3vosi5ZFcLndNzHgmPGbT0w7ZSV1hqt oT4ou/kChl15HpsGk0TJngw4g5shDaurl6E0ULJraajW1XTz2SyEaBP48eu/hoHO8p grEKPtQbrkuPQ== Date: Thu, 25 May 2023 19:17:26 -0700 Subject: [PATCH 11/17] xfs: implement live updates for directory repairs From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073453.3745075.290511236327367590.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong While we're scanning the filesystem for parent pointers that we can turn into dirents, we cannot hold the IOLOCK or ILOCK of the directory being repaired. Therefore, we need to set up a dirent hook so that we can keep the temporary directory up to date with the rest of the filesystem. Hence we add the ability to *remove* entries from the temporary dir. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/dir_repair.c | 221 +++++++++++++++++++++++++++++++++++++++++---- fs/xfs/scrub/findparent.c | 8 +- fs/xfs/scrub/findparent.h | 10 ++ fs/xfs/scrub/trace.h | 2 4 files changed, 218 insertions(+), 23 deletions(-) diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 4a0ebab5e245..3a33f556616d 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -85,6 +85,12 @@ * updates from other threads. */ +/* Create a dirent in the tempdir. */ +#define XREP_DIRENT_ADD (1) + +/* Remove a dirent from the tempdir. */ +#define XREP_DIRENT_REMOVE (2) + /* Directory entry to be restored in the new directory. */ struct xrep_dirent { /* Cookie for retrieval of the dirent name. */ @@ -98,6 +104,9 @@ struct xrep_dirent { /* File type of the dirent. */ uint8_t ftype; + + /* XREP_DIRENT_{ADD,REMOVE} */ + uint8_t action; }; /* @@ -339,6 +348,7 @@ xrep_dir_stash_createname( xfs_ino_t ino) { struct xrep_dirent dirent = { + .action = XREP_DIRENT_ADD, .ino = ino, .namelen = name->len, .ftype = name->type, @@ -355,6 +365,34 @@ xrep_dir_stash_createname( return xfarray_append(rd->dir_entries, &dirent); } +/* + * Remember that we want to remove a dirent from the tempdir. These stashed + * actions will be replayed later. + */ +STATIC int +xrep_dir_stash_removename( + struct xrep_dir *rd, + const struct xfs_name *name, + xfs_ino_t ino) +{ + struct xrep_dirent dirent = { + .action = XREP_DIRENT_REMOVE, + .ino = ino, + .namelen = name->len, + .ftype = name->type, + }; + int error; + + trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino); + + error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name, + name->len); + if (error) + return error; + + return xfarray_append(rd->dir_entries, &dirent); +} + /* Allocate an in-core record to hold entries while we rebuild the dir data. */ STATIC int xrep_dir_salvage_entry( @@ -706,6 +744,43 @@ xrep_dir_replay_createname( return xfs_dir2_node_addname(&rd->args); } +/* Replay a stashed removename onto the temporary directory. */ +STATIC int +xrep_dir_replay_removename( + struct xrep_dir *rd, + const struct xfs_name *name, + xfs_extlen_t total) +{ + struct xfs_inode *dp = rd->args.dp; + bool is_block, is_leaf; + int error; + + ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); + + xrep_dir_init_args(rd, dp, name); + rd->args.op_flags = 0; + rd->args.total = total; + + trace_xrep_dir_replay_removename(dp, name, 0); + + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) + return xfs_dir2_sf_removename(&rd->args); + + error = xfs_dir2_isblock(&rd->args, &is_block); + if (error) + return error; + if (is_block) + return xfs_dir2_block_removename(&rd->args); + + error = xfs_dir2_isleaf(&rd->args, &is_leaf); + if (error) + return error; + if (is_leaf) + return xfs_dir2_leaf_removename(&rd->args); + + return xfs_dir2_node_removename(&rd->args); +} + /* * Add this stashed incore directory entry to the temporary directory. * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and @@ -735,26 +810,64 @@ xrep_dir_replay_update( xrep_tempfile_ilock(rd->sc); xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0); - /* - * Create a replacement dirent in the temporary directory. Note that - * _createname doesn't check for existing entries. There shouldn't be - * any in the temporary dir, but we'll verify this in debug mode. - */ + switch (dirent->action) { + case XREP_DIRENT_ADD: + /* + * Create a replacement dirent in the temporary directory. + * Note that _createname doesn't check for existing entries. + * There shouldn't be any in the temporary dir, but we'll + * verify this in debug mode. + */ #ifdef DEBUG - error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino); - if (error != -ENOENT) { - ASSERT(error != -ENOENT); + error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino); + if (error != -ENOENT) { + ASSERT(error != -ENOENT); + goto out_cancel; + } +#endif + + error = xrep_dir_replay_createname(rd, &name, dirent->ino, + resblks); + if (error) + goto out_cancel; + + if (name.type == XFS_DIR3_FT_DIR) + rd->subdirs++; + rd->dirents++; + break; + case XREP_DIRENT_REMOVE: + /* + * Remove a dirent from the temporary directory. Note that + * _removename doesn't check the inode target of the exist + * entry. There should be a perfect match in the temporary + * dir, but we'll verify this in debug mode. + */ +#ifdef DEBUG + error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino); + if (error) { + ASSERT(error != 0); + goto out_cancel; + } + if (ino != dirent->ino) { + ASSERT(ino == dirent->ino); + error = -EIO; + goto out_cancel; + } +#endif + + error = xrep_dir_replay_removename(rd, &name, resblks); + if (error) + goto out_cancel; + + if (name.type == XFS_DIR3_FT_DIR) + rd->subdirs--; + rd->dirents--; + break; + default: + ASSERT(0); + error = -EIO; goto out_cancel; } -#endif - - error = xrep_dir_replay_createname(rd, &name, dirent->ino, resblks); - if (error) - goto out_cancel; - - if (name.type == XFS_DIR3_FT_DIR) - rd->subdirs++; - rd->dirents++; /* Commit and unlock. */ error = xrep_trans_commit(rd->sc); @@ -1270,6 +1383,71 @@ xrep_dir_scan_dirtree( return 0; } +/* + * Capture dirent updates being made by other threads which are relevant to the + * directory being repaired. + */ +STATIC int +xrep_dir_live_update( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_dir_update_params *p = data; + struct xrep_dir *rd; + struct xfs_scrub *sc; + int error = 0; + + rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb); + sc = rd->sc; + + /* + * This thread updated a child dirent in the directory that we're + * rebuilding. Stash the update for replay against the temporary + * directory. + */ + if (p->dp->i_ino == sc->ip->i_ino && + xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) { + mutex_lock(&rd->pscan.lock); + if (p->delta > 0) + error = xrep_dir_stash_createname(rd, p->name, + p->ip->i_ino); + else + error = xrep_dir_stash_removename(rd, p->name, + p->ip->i_ino); + mutex_unlock(&rd->pscan.lock); + if (error) + goto out_abort; + } + + /* + * This thread updated another directory's child dirent that points to + * the directory that we're rebuilding, so remember the new dotdot + * target. + */ + if (p->ip->i_ino == sc->ip->i_ino && + xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) { + if (p->delta > 0) { + trace_xrep_dir_stash_createname(sc->tempip, + &xfs_name_dotdot, + p->dp->i_ino); + + xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino); + } else { + trace_xrep_dir_stash_removename(sc->tempip, + &xfs_name_dotdot, + rd->pscan.parent_ino); + + xrep_findparent_scan_found(&rd->pscan, NULLFSINO); + } + } + + return NOTIFY_DONE; +out_abort: + xchk_iscan_abort(&rd->pscan.iscan); + return NOTIFY_DONE; +} + /* * Free all the directory blocks and reset the data fork. The caller must * join the inode to the transaction. This function returns with the inode @@ -1615,6 +1793,9 @@ xrep_dir_rebuild_tree( if (error) return error; + if (xchk_iscan_aborted(&rd->pscan.iscan)) + return -ECANCELED; + /* * Swap the tempdir's data fork with the file being repaired. This * recreates the transaction and re-takes the ILOCK in the scrub @@ -1650,7 +1831,11 @@ xrep_dir_setup_scan( if (error) goto out_xfarray; - error = xrep_findparent_scan_start(sc, &rd->pscan); + if (xfs_has_parent(sc->mp)) + error = __xrep_findparent_scan_start(sc, &rd->pscan, + xrep_dir_live_update); + else + error = xrep_findparent_scan_start(sc, &rd->pscan); if (error) goto out_xfblob; diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index 0c3940d397da..da21792758d9 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -240,9 +240,10 @@ xrep_findparent_live_update( * will be called when there is a dotdot update for the inode being repaired. */ int -xrep_findparent_scan_start( +__xrep_findparent_scan_start( struct xfs_scrub *sc, - struct xrep_parent_scan_info *pscan) + struct xrep_parent_scan_info *pscan, + notifier_fn_t custom_fn) { int error; @@ -264,7 +265,8 @@ xrep_findparent_scan_start( * ILOCK, which means that any in-progress inode updates will finish * before we can scan the inode. */ - xfs_hook_setup(&pscan->hooks.dirent_hook, xrep_findparent_live_update); + xfs_hook_setup(&pscan->hooks.dirent_hook, + custom_fn ? custom_fn : xrep_findparent_live_update); error = xfs_dir_hook_add(sc->mp, &pscan->hooks); if (error) goto out_iscan; diff --git a/fs/xfs/scrub/findparent.h b/fs/xfs/scrub/findparent.h index 0bc3921e6ddc..cdd2e4405088 100644 --- a/fs/xfs/scrub/findparent.h +++ b/fs/xfs/scrub/findparent.h @@ -24,8 +24,14 @@ struct xrep_parent_scan_info { bool lookup_parent; }; -int xrep_findparent_scan_start(struct xfs_scrub *sc, - struct xrep_parent_scan_info *pscan); +int __xrep_findparent_scan_start(struct xfs_scrub *sc, + struct xrep_parent_scan_info *pscan, + notifier_fn_t custom_fn); +static inline int xrep_findparent_scan_start(struct xfs_scrub *sc, + struct xrep_parent_scan_info *pscan) +{ + return __xrep_findparent_scan_start(sc, pscan, NULL); +} int xrep_findparent_scan(struct xrep_parent_scan_info *pscan); void xrep_findparent_scan_teardown(struct xrep_parent_scan_info *pscan); diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 96c88f4419d7..a7af7f396a5a 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2636,6 +2636,8 @@ DEFINE_XREP_DIRENT_CLASS(xrep_dir_salvage_entry); DEFINE_XREP_DIRENT_CLASS(xrep_dir_stash_createname); DEFINE_XREP_DIRENT_CLASS(xrep_dir_replay_createname); DEFINE_XREP_DIRENT_CLASS(xrep_adoption_commit); +DEFINE_XREP_DIRENT_CLASS(xrep_dir_stash_removename); +DEFINE_XREP_DIRENT_CLASS(xrep_dir_replay_removename); TRACE_EVENT(xrep_adoption_cancel, TP_PROTO(struct xfs_inode *dp, struct xfs_inode *ip, int error), From patchwork Fri May 26 02:17:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256285 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7D8D3C77B7E for ; Fri, 26 May 2023 02:17:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230091AbjEZCRw (ORCPT ); Thu, 25 May 2023 22:17:52 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49876 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230465AbjEZCRv (ORCPT ); Thu, 25 May 2023 22:17:51 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 495E8195 for ; Thu, 25 May 2023 19:17:43 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id C27FE64C1F for ; Fri, 26 May 2023 02:17:42 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2CE9DC433D2; Fri, 26 May 2023 02:17:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067462; bh=Cu6RIaUetzIu5qH4wnYCYMbEo0d/7+YSxIl+XoQNygo=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=D3ZNk5SKFKIHhiOG1UB5YWcTzTbU2WZX8AVp2ZsHY0mrbLSP466tjo99fiEQvsnOy pRE/2bqAZkTOfupwCiGZL8Pw6M0gwLASJoD7L03HmRymuWRRCJjitBlNVf5ddjvCsa tbZWFdHXwiggIQD8TV6WmNhlkaea24m+crb5ujtX+h+N5e4Wfd90Xhfmdz3qCnIXlC WC3yfpsBNOb2Bl5ltgia5J4IQGOllCGywCrbjzmzkJRpO3bBV8otg0STUtVHfMnlYk BQFBJ93NJuXzHpSSY40mb5l6XzfhWzNH3zWrsawTD4//3Eonm0KQgXYHg++E8WSdrX vIqeEGcudevdw== Date: Thu, 25 May 2023 19:17:41 -0700 Subject: [PATCH 12/17] xfs: replay unlocked parent pointer updates that accrue during xattr repair From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073467.3745075.6649237914783332833.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong There are a few places where the extended attribute repair code drops the ILOCK to apply stashed xattrs to the temporary file. Although setxattr and removexattr are still locked out because we retain our hold on the IOLOCK, this doesn't prevent renames from updating parent pointers, because the VFS doesn't take i_rwsem on children that are being moved. Therefore, set up a dirent hook to capture parent pointer updates for this file, and replay(?) the updates. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/attr_repair.c | 451 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/trace.h | 72 +++++++ 2 files changed, 521 insertions(+), 2 deletions(-) diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 489abe1f028a..23bc72773e33 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -95,6 +95,56 @@ struct xrep_xattr { /* Number of attributes that we are salvaging. */ unsigned long long attrs_found; + + /* Can we flush stashed attrs to the tempfile? */ + bool can_flush; + + /* Did the live update fail, and hence the repair is now out of date? */ + bool live_update_aborted; + + /* Lock protecting parent pointer updates */ + struct mutex lock; + + /* Fixed-size array of xrep_xattr_pptr structures. */ + struct xfarray *pptr_recs; + + /* Blobs containing parent pointer names. */ + struct xfblob *pptr_names; + + /* Hook to capture parent pointer updates. */ + struct xfs_dir_hook hooks; + + /* xattr key and da args for parent pointer replay. */ + struct xfs_parent_scratch pptr_scratch; + + /* + * Scratch buffer for scanning dirents to create pptr xattrs. At the + * very end of the repair, it can also be used to compute the + * lost+found filename if we need to reparent the file. + */ + struct xfs_parent_name_irec pptr; +}; + +/* Create a parent pointer in the tempfile. */ +#define XREP_XATTR_PPTR_ADD (1) + +/* Remove a parent pointer from the tempfile. */ +#define XREP_XATTR_PPTR_REMOVE (2) + +/* A stashed parent pointer update. */ +struct xrep_xattr_pptr { + /* Cookie for retrieval of the pptr name. */ + xfblob_cookie name_cookie; + + /* Parent pointer attr key. */ + xfs_ino_t p_ino; + uint32_t p_gen; + + /* Length of the pptr name. */ + uint8_t namelen; + + /* XREP_XATTR_PPTR_{ADD,REMOVE} */ + uint8_t action; }; /* Set up to recreate the extended attributes. */ @@ -102,6 +152,9 @@ int xrep_setup_xattr( struct xfs_scrub *sc) { + if (xfs_has_parent(sc->mp)) + xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); + return xrep_tempfile_create(sc, S_IFREG); } @@ -712,11 +765,122 @@ xrep_xattr_want_flush_stashed( { unsigned long long bytes; + if (!rx->can_flush) + return false; + bytes = xfarray_bytes(rx->xattr_records) + xfblob_bytes(rx->xattr_blobs); return bytes > XREP_XATTR_MAX_STASH_BYTES; } +/* + * Did we observe rename changing parent pointer xattrs while we were flushing + * salvaged attrs? + */ +static inline bool +xrep_xattr_saw_pptr_conflict( + struct xrep_xattr *rx) +{ + bool ret; + + ASSERT(rx->can_flush); + + if (!xfs_has_parent(rx->sc->mp)) + return false; + + ASSERT(xfs_isilocked(rx->sc->ip, XFS_ILOCK_EXCL)); + + mutex_lock(&rx->lock); + ret = xfarray_bytes(rx->pptr_recs) > 0; + mutex_unlock(&rx->lock); + + return ret; +} + +/* + * Reset the entire repair state back to initial conditions, now that we've + * detected a parent pointer update to the attr structure while we were + * flushing salvaged attrs. See the locking notes in dir_repair.c for more + * information on why this is all necessary. + */ +STATIC int +xrep_xattr_full_reset( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + struct xfs_attr_sf_hdr *hdr; + struct xfs_ifork *ifp = &sc->tempip->i_af; + int error; + + trace_xrep_xattr_full_reset(sc->ip, sc->tempip); + + /* The temporary file's data fork had better not be in btree format. */ + if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) { + ASSERT(0); + return -EIO; + } + + /* + * We begin in transaction context with sc->ip ILOCKed but not joined + * to the transaction. To reset to the initial state, we must hold + * sc->ip's ILOCK to prevent rename from updating parent pointer + * information and the tempfile's ILOCK to clear its contents. + */ + xchk_iunlock(rx->sc, XFS_ILOCK_EXCL); + xrep_tempfile_ilock_both(sc); + xfs_trans_ijoin(sc->tp, sc->ip, 0); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + + /* + * Free all the blocks of the attr fork of the temp file, and reset + * it back to local format. + */ + if (xfs_ifork_has_extents(&sc->tempip->i_af)) { + error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK); + if (error) + return error; + + ASSERT(ifp->if_bytes == 0); + ifp->if_format = XFS_DINODE_FMT_LOCAL; + xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK); + } + + /* Reinitialize the attr fork to an empty shortform structure. */ + hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data; + memset(hdr, 0, sizeof(*hdr)); + hdr->totsize = cpu_to_be16(sizeof(*hdr)); + xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA); + + /* + * Roll this transaction to commit our reset ondisk. The tempfile + * should no longer be joined to the transaction, so we drop its ILOCK. + * This should leave us in transaction context with sc->ip ILOCKed but + * not joined to the transaction. + */ + error = xrep_roll_trans(sc); + if (error) + return error; + xrep_tempfile_iunlock(sc); + + /* + * Erase any accumulated parent pointer updates now that we've erased + * the tempfile's attr fork. We're resetting the entire repair state + * back to where we were initially, except now we won't flush salvaged + * xattrs until the very end. + */ + mutex_lock(&rx->lock); + xfarray_truncate(rx->pptr_recs); + xfblob_truncate(rx->pptr_names); + mutex_unlock(&rx->lock); + + rx->can_flush = false; + rx->attrs_found = 0; + + ASSERT(xfarray_bytes(rx->xattr_records) == 0); + ASSERT(xfblob_bytes(rx->xattr_blobs) == 0); + return 0; +} + /* Extract as many attribute keys and values as we can. */ STATIC int xrep_xattr_recover( @@ -731,6 +895,7 @@ xrep_xattr_recover( int nmap; int error; +restart: /* * Iterate each xattr leaf block in the attr fork to scan them for any * attributes that we might salvage. @@ -769,6 +934,14 @@ xrep_xattr_recover( error = xrep_xattr_flush_stashed(rx); if (error) return error; + + if (xrep_xattr_saw_pptr_conflict(rx)) { + error = xrep_xattr_full_reset(rx); + if (error) + return error; + + goto restart; + } } } } @@ -933,6 +1106,195 @@ xrep_xattr_salvage_attributes( return xrep_xattr_flush_stashed(rx); } +/* + * Add this stashed incore parent pointer to the temporary file. + * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and + * must not be in transaction context. + */ +STATIC int +xrep_xattr_replay_pptr_update( + struct xrep_xattr *rx, + const struct xrep_xattr_pptr *pptr) +{ + struct xfs_scrub *sc = rx->sc; + int error; + + rx->pptr.p_ino = pptr->p_ino; + rx->pptr.p_gen = pptr->p_gen; + rx->pptr.p_namelen = pptr->namelen; + xfs_parent_irec_hashname(sc->mp, &rx->pptr); + + switch (pptr->action) { + case XREP_XATTR_PPTR_ADD: + /* Create parent pointer. */ + trace_xrep_xattr_replay_parentadd(sc->tempip, &rx->pptr); + + error = xfs_parent_set(sc->tempip, sc->ip->i_ino, &rx->pptr, + &rx->pptr_scratch); + if (error) { + ASSERT(error != -EEXIST); + return error; + } + break; + case XREP_XATTR_PPTR_REMOVE: + /* Remove parent pointer. */ + trace_xrep_xattr_replay_parentremove(sc->tempip, &rx->pptr); + + error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, &rx->pptr, + &rx->pptr_scratch); + if (error) { + ASSERT(error != -ENOATTR); + return error; + } + break; + default: + ASSERT(0); + return -EIO; + } + + return 0; +} + +/* + * Flush stashed parent pointer updates that have been recorded by the scanner. + * This is done to reduce the memory requirements of the parent pointer + * rebuild, since files can have a lot of hardlinks and the fs can be busy. + * + * Caller must not hold transactions or ILOCKs. Caller must hold the tempfile + * IOLOCK. + */ +STATIC int +xrep_xattr_replay_pptr_updates( + struct xrep_xattr *rx) +{ + xfarray_idx_t array_cur; + int error; + + mutex_lock(&rx->lock); + foreach_xfarray_idx(rx->pptr_recs, array_cur) { + struct xrep_xattr_pptr pptr; + + error = xfarray_load(rx->pptr_recs, array_cur, &pptr); + if (error) + goto out_unlock; + + error = xfblob_load(rx->pptr_names, pptr.name_cookie, + rx->pptr.p_name, pptr.namelen); + if (error) + goto out_unlock; + rx->pptr.p_name[MAXNAMELEN - 1] = 0; + mutex_unlock(&rx->lock); + + error = xrep_xattr_replay_pptr_update(rx, &pptr); + if (error) + return error; + + mutex_lock(&rx->lock); + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rx->pptr_recs); + xfblob_truncate(rx->pptr_names); + mutex_unlock(&rx->lock); + return 0; +out_unlock: + mutex_unlock(&rx->lock); + return error; +} + +/* + * Remember that we want to create a parent pointer in the tempfile. These + * stashed actions will be replayed later. + */ +STATIC int +xrep_xattr_stash_parentadd( + struct xrep_xattr *rx, + const struct xfs_name *name, + const struct xfs_inode *dp) +{ + struct xrep_xattr_pptr pptr = { + .action = XREP_XATTR_PPTR_ADD, + .namelen = name->len, + .p_ino = dp->i_ino, + .p_gen = VFS_IC(dp)->i_generation, + }; + int error; + + trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name); + + error = xfblob_store(rx->pptr_names, &pptr.name_cookie, name->name, + name->len); + if (error) + return error; + + return xfarray_append(rx->pptr_recs, &pptr); +} + +/* + * Remember that we want to remove a parent pointer from the tempfile. These + * stashed actions will be replayed later. + */ +STATIC int +xrep_xattr_stash_parentremove( + struct xrep_xattr *rx, + const struct xfs_name *name, + const struct xfs_inode *dp) +{ + struct xrep_xattr_pptr pptr = { + .action = XREP_XATTR_PPTR_REMOVE, + .namelen = name->len, + .p_ino = dp->i_ino, + .p_gen = VFS_IC(dp)->i_generation, + }; + int error; + + trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name); + + error = xfblob_store(rx->pptr_names, &pptr.name_cookie, name->name, + name->len); + if (error) + return error; + + return xfarray_append(rx->pptr_recs, &pptr); +} + +/* + * Capture dirent updates being made by other threads. We will have to replay + * the parent pointer updates before swapping attr forks. + */ +STATIC int +xrep_xattr_live_dirent_update( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_dir_update_params *p = data; + struct xrep_xattr *rx; + struct xfs_scrub *sc; + int error; + + rx = container_of(nb, struct xrep_xattr, hooks.dirent_hook.nb); + sc = rx->sc; + + /* + * This thread updated a dirent that points to the file that we're + * repairing, so stash the update for replay against the temporary + * file. + */ + if (p->ip->i_ino != sc->ip->i_ino) + return NOTIFY_DONE; + + mutex_lock(&rx->lock); + if (p->delta > 0) + error = xrep_xattr_stash_parentadd(rx, p->name, p->dp); + else + error = xrep_xattr_stash_parentremove(rx, p->name, p->dp); + if (error) + rx->live_update_aborted = true; + mutex_unlock(&rx->lock); + return NOTIFY_DONE; +} + /* * Prepare both inodes' attribute forks for extent swapping. Promote the * tempfile from short format to leaf format, and if the file being repaired @@ -1035,6 +1397,44 @@ xrep_xattr_swap( return xrep_tempswap_contents(sc, tx); } +/* + * Finish replaying stashed parent pointer updates, allocate a transaction for + * swapping extents, and take the ILOCKs of both files before we commit the new + * extended attribute structure. + */ +STATIC int +xrep_xattr_finalize_tempfile( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + int error; + + if (!xfs_has_parent(sc->mp)) + return xrep_tempswap_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx); + + do { + error = xrep_xattr_replay_pptr_updates(rx); + if (error) + return error; + + error = xrep_tempswap_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx); + if (error) + return error; + + /* + * We rely on the ILOCK to quiesce all parent pointer updates + * because the VFS does not take the IOLOCK when moving a + * directory child during a rename. + */ + if (xfarray_length(rx->pptr_recs) == 0) + break; + + xchk_trans_cancel(sc); + xrep_tempfile_iunlock_both(sc); + } while (!xchk_should_terminate(sc, &error)); + return error; +} + /* * Swap the new extended attribute data (which we created in the tempfile) into * the file being repaired. @@ -1086,8 +1486,12 @@ xrep_xattr_rebuild_tree( if (error) return error; - /* Allocate swapext transaction and lock both inodes. */ - error = xrep_tempswap_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx); + /* + * Allocate transaction, lock inodes, and make sure that we've replayed + * all the stashed parent pointer updates to the temp file. After this + * point, we're ready to swapext. + */ + error = xrep_xattr_finalize_tempfile(rx); if (error) return error; @@ -1116,8 +1520,15 @@ STATIC void xrep_xattr_teardown( struct xrep_xattr *rx) { + if (xfs_has_parent(rx->sc->mp)) + xfs_dir_hook_del(rx->sc->mp, &rx->hooks); + if (rx->pptr_names) + xfblob_destroy(rx->pptr_names); + if (rx->pptr_recs) + xfarray_destroy(rx->pptr_recs); xfblob_destroy(rx->xattr_blobs); xfarray_destroy(rx->xattr_records); + mutex_destroy(&rx->lock); kfree(rx); } @@ -1135,6 +1546,9 @@ xrep_xattr_setup_scan( if (!rx) return -ENOMEM; rx->sc = sc; + rx->can_flush = true; + + mutex_init(&rx->lock); /* * Allocate enough memory to handle loading local attr values from the @@ -1158,11 +1572,39 @@ xrep_xattr_setup_scan( if (error) goto out_keys; + if (xfs_has_parent(sc->mp)) { + ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); + + error = xfarray_create(sc->mp, "xattr parent pointer entries", + 0, sizeof(struct xrep_xattr_pptr), + &rx->pptr_recs); + if (error) + goto out_values; + + error = xfblob_create(sc->mp, "xattr parent pointer names", + &rx->pptr_names); + if (error) + goto out_pprecs; + + xfs_hook_setup(&rx->hooks.dirent_hook, + xrep_xattr_live_dirent_update); + error = xfs_dir_hook_add(sc->mp, &rx->hooks); + if (error) + goto out_ppnames; + } + *rxp = rx; return 0; +out_ppnames: + xfblob_destroy(rx->pptr_names); +out_pprecs: + xfarray_destroy(rx->pptr_recs); +out_values: + xfblob_destroy(rx->xattr_blobs); out_keys: xfarray_destroy(rx->xattr_records); out_rx: + mutex_destroy(&rx->lock); kfree(rx); return error; } @@ -1199,6 +1641,11 @@ xrep_xattr( if (error) goto out_scan; + if (rx->live_update_aborted) { + error = -EIO; + goto out_scan; + } + /* Last chance to abort before we start committing fixes. */ if (xchk_should_terminate(sc, &error)) goto out_scan; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index a7af7f396a5a..cbbd8863bab0 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -27,6 +27,7 @@ struct xchk_fscounters; struct xfbtree; struct xfbtree_config; struct xfs_rmap_update_params; +struct xfs_parent_name_irec; /* * ftrace's __print_symbolic requires that all enum values be wrapped in the @@ -2546,6 +2547,43 @@ DEFINE_EVENT(xrep_xattr_class, name, \ TP_ARGS(ip, arg_ip)) DEFINE_XREP_XATTR_CLASS(xrep_xattr_rebuild_tree); DEFINE_XREP_XATTR_CLASS(xrep_xattr_reset_fork); +DEFINE_XREP_XATTR_CLASS(xrep_xattr_full_reset); + +DECLARE_EVENT_CLASS(xrep_xattr_pptr_scan_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, + const struct xfs_name *name), + TP_ARGS(ip, dp, name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __field(unsigned int, namelen) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->parent_ino = dp->i_ino; + __entry->parent_gen = VFS_IC(dp)->i_generation; + __entry->namelen = name->len; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->parent_ino, + __entry->parent_gen, + __entry->namelen, + __get_str(name)) +) +#define DEFINE_XREP_XATTR_PPTR_SCAN_CLASS(name) \ +DEFINE_EVENT(xrep_xattr_pptr_scan_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, \ + const struct xfs_name *name), \ + TP_ARGS(ip, dp, name)) +DEFINE_XREP_XATTR_PPTR_SCAN_CLASS(xrep_xattr_stash_parentadd); +DEFINE_XREP_XATTR_PPTR_SCAN_CLASS(xrep_xattr_stash_parentremove); TRACE_EVENT(xrep_dir_recover_dirblock, TP_PROTO(struct xfs_inode *dp, xfs_dablk_t dabno, uint32_t magic, @@ -2687,6 +2725,40 @@ DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_dir_salvaged_parent); DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_dirent); DEFINE_XREP_PARENT_SALVAGE_CLASS(xrep_findparent_from_dcache); +DECLARE_EVENT_CLASS(xrep_pptr_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_parent_name_irec *pptr), + TP_ARGS(ip, pptr), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __field(unsigned int, namelen) + __dynamic_array(char, name, pptr->p_namelen) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->parent_ino = pptr->p_ino; + __entry->parent_gen = pptr->p_gen; + __entry->namelen = pptr->p_namelen; + memcpy(__get_str(name), pptr->p_name, pptr->p_namelen); + ), + TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->parent_ino, + __entry->parent_gen, + __entry->namelen, + __get_str(name)) +) +#define DEFINE_XREP_PPTR_CLASS(name) \ +DEFINE_EVENT(xrep_pptr_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_parent_name_irec *pptr), \ + TP_ARGS(ip, pptr)) +DEFINE_XREP_PPTR_CLASS(xrep_xattr_replay_parentadd); +DEFINE_XREP_PPTR_CLASS(xrep_xattr_replay_parentremove); + TRACE_EVENT(xrep_nlinks_set_record, TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, const struct xchk_nlink *obs), From patchwork Fri May 26 02:17:57 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256286 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 31834C7EE2E for ; Fri, 26 May 2023 02:18:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233245AbjEZCSA (ORCPT ); Thu, 25 May 2023 22:18:00 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49946 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230289AbjEZCR7 (ORCPT ); Thu, 25 May 2023 22:17:59 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BC775B2 for ; Thu, 25 May 2023 19:17:58 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 50D3B64768 for ; Fri, 26 May 2023 02:17:58 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id B05C3C4339B; Fri, 26 May 2023 02:17:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067477; bh=qwnVCCZ8PfKIcqnjTZRp6nmkh+HSSLPe76IwbPtDywQ=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=dy2SDyyZOQzB5ejx2F7SG7EZ/dNx8/DFwb9wFDVqBUko1E0PnyLiySpsSM3T1Ba6a czyUT1h+pXtVrfarparHg8oC3GzMj7Yk5ucAhckA1kdLXBQGvIElQLm/bB0enuaYgg LwL6nzT5a8+WpZdPNJqyFfzVIUBDhV+ySUeFRnK7eQfEARqi1UJSlNX9PQeYBGWMop GNfyEyPNvy4D9erRpTfwK/290ATEEI3iD58h8TiiZrziIFagZ3cGt4efm9YyHVZEm8 0eTf3ehjV8I/bcaStZheaS7LMjIMQA6KJzXUYJ0WfWVAKMwrqrwlIwgYLf8b6zOyVb Coz90xSF7YBig== Date: Thu, 25 May 2023 19:17:57 -0700 Subject: [PATCH 13/17] xfs: replace namebuf with parent pointer in parent pointer repair From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073481.3745075.16880906423617126770.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Replace the dirent name buffer at the end of struct xrep_parent with a xfs_parent_name_irec object. The namebuf and p_name usage do not overlap, so we can save 256 bytes of memory by allowing them to overlap. Doing so makes the code a bit more complex, so this is called out separately. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/parent_repair.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index b57ba7559361..ff73b6c5b77f 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -24,6 +24,7 @@ #include "xfs_trans_space.h" #include "xfs_health.h" #include "xfs_swapext.h" +#include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -63,8 +64,12 @@ struct xrep_parent { /* Orphanage reparenting request. */ struct xrep_adoption adoption; - /* Directory entry name, plus the trailing null. */ - unsigned char namebuf[MAXNAMELEN]; + /* + * Scratch buffer for scanning dirents to create pptr xattrs. At the + * very end of the repair, it can also be used to compute the + * lost+found filename if we need to reparent the file. + */ + struct xfs_parent_name_irec pptr; }; /* Tear down all the incore stuff we created. */ @@ -234,7 +239,7 @@ xrep_parent_move_to_orphanage( if (error) goto err_adoption; - error = xrep_adoption_compute_name(&rp->adoption, rp->namebuf); + error = xrep_adoption_compute_name(&rp->adoption, rp->pptr.p_name); if (error) goto err_adoption; From patchwork Fri May 26 02:18:12 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256287 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E7281C77B7A for ; Fri, 26 May 2023 02:18:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231691AbjEZCSR (ORCPT ); Thu, 25 May 2023 22:18:17 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50070 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233274AbjEZCSQ (ORCPT ); Thu, 25 May 2023 22:18:16 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5221118D for ; Thu, 25 May 2023 19:18:14 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id D3778614A2 for ; Fri, 26 May 2023 02:18:13 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 42645C433EF; Fri, 26 May 2023 02:18:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067493; bh=b5HxSfIiyvXAMPvUI6JWFLlAtCGfGg3CiECY8kcaH58=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=unudI4oOyDZ4o5kmvj5mYyr0ENBsIy1sHA8/Q2mbsB4gtaiw2iEc+eYC8P4BpSBUD pEPpzC7OCNIGzeQyaINcKyHiggsyAbewC267jazNRI2MT7a51Uulfh8KS+5ajsJmQQ lYG5rcv+zO+iKPrdcPNVKhGTsN/N5GTz9yh+MnGeQJL1hTT1zyqM9y/6VbQALuTUz+ OCvz2XbJVk/dzwALH/w7Kp0dfw1TxjfEMqZMhMa0Fgl3if35vrlLfu9YpREnyN35Oq /NdLbsAzGBhoiDQDZm7bLgWKXiDvBDrmBg4w2VYzpoHRSxWnvKnpJN4fdv8iQQqSj7 wOMOVrWN+0jzg== Date: Thu, 25 May 2023 19:18:12 -0700 Subject: [PATCH 14/17] xfs: repair directory parent pointers by scanning for dirents From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073495.3745075.3607357423221197815.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong If parent pointers are enabled on the filesystem, we can repair the entire dataset by walking the directories of the filesystem looking for dirents that we can turn into parent pointers. Once we have a full incore dataset, we'll figure out what to do with it, but that's for a subsequent patch. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/parent_repair.c | 403 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/trace.h | 36 ++++ 2 files changed, 436 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index ff73b6c5b77f..adf23180c822 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -35,6 +35,9 @@ #include "scrub/readdir.h" #include "scrub/tempfile.h" #include "scrub/orphanage.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" /* * Repairing The Directory Parent Pointer @@ -50,20 +53,71 @@ * See the section on locking issues in dir_repair.c for more information about * conflicts with the VFS. The findparent code wll keep our incore parent * inode up to date. + * + * If parent pointers are enabled, we instead reconstruct the parent pointer + * information by visiting every directory entry of every directory in the + * system and translating the relevant dirents into parent pointers. In this + * case, it is advantageous to stash all parent pointers created from dirents + * from a single parent file before replaying them into the temporary file. To + * save memory, the live filesystem scan reuses the findparent object. Parent + * pointer repair chooses either directory scanning or findparent, but not + * both. + * + * When salvaging completes, the remaining stashed entries are replayed to the + * temporary file. All non-parent pointer extended attributes are copied to + * the temporary file's extended attributes. An atomic extent swap is used to + * commit the new directory blocks to the directory being repaired. This will + * disrupt attrmulti cursors. */ +/* A stashed parent pointer update. */ +struct xrep_pptr { + /* Cookie for retrieval of the pptr name. */ + xfblob_cookie name_cookie; + + /* Parent pointer attr key. */ + xfs_ino_t p_ino; + uint32_t p_gen; + + /* Length of the pptr name. */ + uint8_t namelen; +}; + +/* + * Stash up to 8 pages of recovered parent pointers in pptr_recs and + * pptr_names before we write them to the temp file. + */ +#define XREP_PARENT_MAX_STASH_BYTES (PAGE_SIZE * 8) + struct xrep_parent { struct xfs_scrub *sc; + /* Fixed-size array of xrep_pptr structures. */ + struct xfarray *pptr_recs; + + /* Blobs containing parent pointer names. */ + struct xfblob *pptr_names; + /* * Information used to scan the filesystem to find the inumber of the - * dotdot entry for this directory. + * dotdot entry for this directory. On filesystems without parent + * pointers, we use the findparent_* functions on this object and + * access only the parent_ino field directly. + * + * When parent pointers are enabled, the directory entry scanner uses + * the iscan, hooks, and lock fields of this object directly. + * @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and + * pptr_scratch. This reduces the memory requirements of this + * structure. */ struct xrep_parent_scan_info pscan; /* Orphanage reparenting request. */ struct xrep_adoption adoption; + /* xattr key and da args for parent pointer replay. */ + struct xfs_parent_scratch pptr_scratch; + /* * Scratch buffer for scanning dirents to create pptr xattrs. At the * very end of the repair, it can also be used to compute the @@ -78,6 +132,12 @@ xrep_parent_teardown( struct xrep_parent *rp) { xrep_findparent_scan_teardown(&rp->pscan); + if (rp->pptr_names) + xfblob_destroy(rp->pptr_names); + rp->pptr_names = NULL; + if (rp->pptr_recs) + xfarray_destroy(rp->pptr_recs); + rp->pptr_recs = NULL; } /* Set up for a parent repair. */ @@ -86,6 +146,7 @@ xrep_setup_parent( struct xfs_scrub *sc) { struct xrep_parent *rp; + int error; xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); @@ -95,6 +156,10 @@ xrep_setup_parent( rp->sc = sc; sc->buf = rp; + error = xrep_tempfile_create(sc, S_IFREG); + if (error) + return error; + return xrep_orphanage_try_create(sc); } @@ -150,6 +215,309 @@ xrep_parent_find_dotdot( return error; } +/* + * Add this stashed incore parent pointer to the temporary file. + * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and + * must not be in transaction context. + */ +STATIC int +xrep_parent_replay_update( + struct xrep_parent *rp, + const struct xrep_pptr *pptr) +{ + struct xfs_scrub *sc = rp->sc; + int error; + + rp->pptr.p_ino = pptr->p_ino; + rp->pptr.p_gen = pptr->p_gen; + rp->pptr.p_namelen = pptr->namelen; + xfs_parent_irec_hashname(sc->mp, &rp->pptr); + + /* Create parent pointer. */ + trace_xrep_parent_replay_parentadd(sc->tempip, &rp->pptr); + + error = xfs_parent_set(sc->tempip, sc->ip->i_ino, &rp->pptr, + &rp->pptr_scratch); + if (error) + return error; + + return 0; +} + +/* + * Flush stashed parent pointer updates that have been recorded by the scanner. + * This is done to reduce the memory requirements of the parent pointer + * rebuild, since files can have a lot of hardlinks and the fs can be busy. + * + * Caller must not hold transactions or ILOCKs. Caller must hold the tempfile + * IOLOCK. + */ +STATIC int +xrep_parent_replay_updates( + struct xrep_parent *rp) +{ + xfarray_idx_t array_cur; + int error; + + mutex_lock(&rp->pscan.lock); + foreach_xfarray_idx(rp->pptr_recs, array_cur) { + struct xrep_pptr pptr; + + error = xfarray_load(rp->pptr_recs, array_cur, &pptr); + if (error) + goto out_unlock; + + error = xfblob_load(rp->pptr_names, pptr.name_cookie, + rp->pptr.p_name, pptr.namelen); + if (error) + goto out_unlock; + rp->pptr.p_name[MAXNAMELEN - 1] = 0; + mutex_unlock(&rp->pscan.lock); + + error = xrep_parent_replay_update(rp, &pptr); + if (error) + return error; + + mutex_lock(&rp->pscan.lock); + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rp->pptr_recs); + xfblob_truncate(rp->pptr_names); + mutex_unlock(&rp->pscan.lock); + return 0; +out_unlock: + mutex_unlock(&rp->pscan.lock); + return error; +} + +/* + * Remember that we want to create a parent pointer in the tempfile. These + * stashed actions will be replayed later. + */ +STATIC int +xrep_parent_stash_parentadd( + struct xrep_parent *rp, + const struct xfs_name *name, + const struct xfs_inode *dp) +{ + struct xrep_pptr pptr = { + .namelen = name->len, + .p_ino = dp->i_ino, + .p_gen = VFS_IC(dp)->i_generation, + }; + int error; + + trace_xrep_parent_stash_parentadd(rp->sc->tempip, dp, name); + + error = xfblob_store(rp->pptr_names, &pptr.name_cookie, name->name, + name->len); + if (error) + return error; + + return xfarray_append(rp->pptr_recs, &pptr); +} + +/* + * Examine an entry of a directory. If this dirent leads us back to the file + * whose parent pointers we're rebuilding, add a pptr to the temporary + * directory. + */ +STATIC int +xrep_parent_scan_dirent( + struct xfs_scrub *sc, + struct xfs_inode *dp, + xfs_dir2_dataptr_t dapos, + const struct xfs_name *name, + xfs_ino_t ino, + void *priv) +{ + struct xrep_parent *rp = priv; + int error; + + /* Dirent doesn't point to this directory. */ + if (ino != rp->sc->ip->i_ino) + return 0; + + /* No weird looking names. */ + if (!xfs_dir2_namecheck(name->name, name->len)) + return -EFSCORRUPTED; + + /* No mismatching ftypes. */ + if (name->type != xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode)) + return -EFSCORRUPTED; + + /* Don't pick up dot or dotdot entries; we only want child dirents. */ + if (xfs_dir2_samename(name, &xfs_name_dotdot) || + xfs_dir2_samename(name, &xfs_name_dot)) + return 0; + + /* + * Transform this dirent into a parent pointer and queue it for later + * addition to the temporary file. + */ + mutex_lock(&rp->pscan.lock); + error = xrep_parent_stash_parentadd(rp, name, dp); + mutex_unlock(&rp->pscan.lock); + return error; +} + +/* + * Decide if we want to look for dirents in this directory. Skip the file + * being repaired and any files being used to stage repairs. + */ +static inline bool +xrep_parent_want_scan( + struct xrep_parent *rp, + const struct xfs_inode *ip) +{ + return ip != rp->sc->ip && !xrep_is_tempfile(ip); +} + +/* + * Take ILOCK on a file that we want to scan. + * + * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt. + * Otherwise, take ILOCK_SHARED. + */ +static inline unsigned int +xrep_parent_scan_ilock( + struct xrep_parent *rp, + struct xfs_inode *ip) +{ + uint lock_mode = XFS_ILOCK_SHARED; + + /* Still need to take the shared ILOCK to advance the iscan cursor. */ + if (!xrep_parent_want_scan(rp, ip)) + goto lock; + + if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) { + lock_mode = XFS_ILOCK_EXCL; + goto lock; + } + +lock: + xfs_ilock(ip, lock_mode); + return lock_mode; +} + +/* + * Scan this file for relevant child dirents that point to the file whose + * parent pointers we're rebuilding. + */ +STATIC int +xrep_parent_scan_file( + struct xrep_parent *rp, + struct xfs_inode *ip) +{ + unsigned int lock_mode; + int error = 0; + + lock_mode = xrep_parent_scan_ilock(rp, ip); + + if (!xrep_parent_want_scan(rp, ip)) + goto scan_done; + + if (S_ISDIR(VFS_I(ip)->i_mode)) { + error = xchk_dir_walk(rp->sc, ip, xrep_parent_scan_dirent, rp); + if (error) + goto scan_done; + } + +scan_done: + xchk_iscan_mark_visited(&rp->pscan.iscan, ip); + xfs_iunlock(ip, lock_mode); + return error; +} + +/* Decide if we've stashed too much pptr data in memory. */ +static inline bool +xrep_parent_want_flush_stashed( + struct xrep_parent *rp) +{ + unsigned long long bytes; + + bytes = xfarray_bytes(rp->pptr_recs) + xfblob_bytes(rp->pptr_names); + return bytes > XREP_PARENT_MAX_STASH_BYTES; +} + +/* + * Scan all directories in the filesystem to look for dirents that we can turn + * into parent pointers. + */ +STATIC int +xrep_parent_scan_dirtree( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + struct xfs_inode *ip; + int error; + + /* + * Filesystem scans are time consuming. Drop the file ILOCK and all + * other resources for the duration of the scan and hope for the best. + * The live update hooks will keep our scan information up to date. + */ + xchk_trans_cancel(sc); + if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) + xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED | + XFS_ILOCK_EXCL)); + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) { + bool flush; + + error = xrep_parent_scan_file(rp, ip); + xchk_irele(sc, ip); + if (error) + break; + + /* Flush stashed pptr updates to constrain memory usage. */ + mutex_lock(&rp->pscan.lock); + flush = xrep_parent_want_flush_stashed(rp); + mutex_unlock(&rp->pscan.lock); + if (flush) { + xchk_trans_cancel(sc); + + error = xrep_tempfile_iolock_polled(sc); + if (error) + break; + + error = xrep_parent_replay_updates(rp); + xrep_tempfile_iounlock(sc); + if (error) + break; + + error = xchk_trans_alloc_empty(sc); + if (error) + break; + } + + if (xchk_should_terminate(sc, &error)) + break; + } + xchk_iscan_iter_finish(&rp->pscan.iscan); + if (error) { + /* + * If we couldn't grab an inode that was busy with a state + * change, change the error code so that we exit to userspace + * as quickly as possible. + */ + if (error == -EBUSY) + return -ECANCELED; + return error; + } + + /* + * Cancel the empty transaction so that we can (later) use the atomic + * extent swap helpers to lock files and commit the new directory. + */ + xchk_trans_cancel(rp->sc); + return 0; +} + /* Reset a directory's dotdot entry, if needed. */ STATIC int xrep_parent_reset_dotdot( @@ -283,8 +651,34 @@ xrep_parent_setup_scan( struct xrep_parent *rp) { struct xfs_scrub *sc = rp->sc; + int error; - return xrep_findparent_scan_start(sc, &rp->pscan); + if (!xfs_has_parent(sc->mp)) + return xrep_findparent_scan_start(sc, &rp->pscan); + + /* Set up some staging memory for logging parent pointer updates. */ + error = xfarray_create(sc->mp, "parent pointer entries", 0, + sizeof(struct xrep_pptr), &rp->pptr_recs); + if (error) + return error; + + error = xfblob_create(sc->mp, "parent pointer names", &rp->pptr_names); + if (error) + goto out_recs; + + error = xrep_findparent_scan_start(sc, &rp->pscan); + if (error) + goto out_names; + + return 0; + +out_names: + xfblob_destroy(rp->pptr_names); + rp->pptr_names = NULL; +out_recs: + xfarray_destroy(rp->pptr_recs); + rp->pptr_recs = NULL; + return error; } int @@ -298,7 +692,10 @@ xrep_parent( if (error) return error; - error = xrep_parent_find_dotdot(rp); + if (xfs_has_parent(sc->mp)) + error = xrep_parent_scan_dirtree(rp); + else + error = xrep_parent_find_dotdot(rp); if (error) goto out_teardown; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index cbbd8863bab0..e355f8f7a444 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2758,6 +2758,42 @@ DEFINE_EVENT(xrep_pptr_class, name, \ TP_ARGS(ip, pptr)) DEFINE_XREP_PPTR_CLASS(xrep_xattr_replay_parentadd); DEFINE_XREP_PPTR_CLASS(xrep_xattr_replay_parentremove); +DEFINE_XREP_PPTR_CLASS(xrep_parent_replay_parentadd); + +DECLARE_EVENT_CLASS(xrep_pptr_scan_class, + TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, + const struct xfs_name *name), + TP_ARGS(ip, dp, name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, parent_ino) + __field(unsigned int, parent_gen) + __field(unsigned int, namelen) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->parent_ino = dp->i_ino; + __entry->parent_gen = VFS_IC(dp)->i_generation; + __entry->namelen = name->len; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->parent_ino, + __entry->parent_gen, + __entry->namelen, + __get_str(name)) +) +#define DEFINE_XREP_PPTR_SCAN_CLASS(name) \ +DEFINE_EVENT(xrep_pptr_scan_class, name, \ + TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, \ + const struct xfs_name *name), \ + TP_ARGS(ip, dp, name)) +DEFINE_XREP_PPTR_SCAN_CLASS(xrep_parent_stash_parentadd); TRACE_EVENT(xrep_nlinks_set_record, TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, From patchwork Fri May 26 02:18:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256288 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1792BC77B7E for ; Fri, 26 May 2023 02:18:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230289AbjEZCSd (ORCPT ); Thu, 25 May 2023 22:18:33 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50116 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229568AbjEZCSb (ORCPT ); Thu, 25 May 2023 22:18:31 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D3BF7B2 for ; Thu, 25 May 2023 19:18:29 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 6E8D26122B for ; Fri, 26 May 2023 02:18:29 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id CECAAC433D2; Fri, 26 May 2023 02:18:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067508; bh=oVcWySd0NkNaU28Yq9j8ViIgoQOKktbJMu72S0pECXs=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=kQPsB8zp4bSeDdBiAu+jpydvqtMkerHw1m+yVNbsw1uMdSJWvJVITG49PjtiFEdKo 5vs4timAVj+vPpxYdGdDv3leUDeSDza15iJPtYnWZ2JbqxrAH7o4T0UmoxWZoAgqZK XnOUL6fBB2bLK2Md2bQBrsKUBS58ymwLFgwYdjMOIKylLujyNUX+bv1Jx24KFHrn4u D0eIO1MHq7wDcbhAD0vII4t27C3PiWtT6dP51+IhsEF2iF0JG+NFBWWx9S8EkMooXf Q5MU1erC7D3hnPCBx3DycqwY3F/Fz6XfDtqXf/PoriXnMKbitpEalbbD98NBZIY6ch 3k8jAJDaOcqnw== Date: Thu, 25 May 2023 19:18:28 -0700 Subject: [PATCH 15/17] xfs: implement live updates for parent pointer repairs From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073510.3745075.3794570658317339096.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong While we're scanning the filesystem for dirents that we can turn into parent pointers, we cannot hold the IOLOCK or ILOCK of the file being repaired. Therefore, we need to set up a dirent hook so that we can keep the temporary file's parent pionters up to date with the rest of the filesystem. Hence we add the ability to *remove* pptrs from the temporary file. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/parent_repair.c | 111 +++++++++++++++++++++++++++++++++++++++--- fs/xfs/scrub/trace.h | 2 + 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index adf23180c822..b87eb389e45e 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -70,6 +70,12 @@ * disrupt attrmulti cursors. */ +/* Create a parent pointer in the tempfile. */ +#define XREP_PPTR_ADD (1) + +/* Remove a parent pointer from the tempfile. */ +#define XREP_PPTR_REMOVE (2) + /* A stashed parent pointer update. */ struct xrep_pptr { /* Cookie for retrieval of the pptr name. */ @@ -81,6 +87,9 @@ struct xrep_pptr { /* Length of the pptr name. */ uint8_t namelen; + + /* XREP_PPTR_{ADD,REMOVE} */ + uint8_t action; }; /* @@ -233,13 +242,29 @@ xrep_parent_replay_update( rp->pptr.p_namelen = pptr->namelen; xfs_parent_irec_hashname(sc->mp, &rp->pptr); - /* Create parent pointer. */ - trace_xrep_parent_replay_parentadd(sc->tempip, &rp->pptr); + switch (pptr->action) { + case XREP_PPTR_ADD: + /* Create parent pointer. */ + trace_xrep_parent_replay_parentadd(sc->tempip, &rp->pptr); - error = xfs_parent_set(sc->tempip, sc->ip->i_ino, &rp->pptr, - &rp->pptr_scratch); - if (error) - return error; + error = xfs_parent_set(sc->tempip, sc->ip->i_ino, &rp->pptr, + &rp->pptr_scratch); + if (error) + return error; + break; + case XREP_PPTR_REMOVE: + /* Remove parent pointer. */ + trace_xrep_parent_replay_parentremove(sc->tempip, &rp->pptr); + + error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, &rp->pptr, + &rp->pptr_scratch); + if (error) + return error; + break; + default: + ASSERT(0); + return -EIO; + } return 0; } @@ -302,6 +327,7 @@ xrep_parent_stash_parentadd( const struct xfs_inode *dp) { struct xrep_pptr pptr = { + .action = XREP_PPTR_ADD, .namelen = name->len, .p_ino = dp->i_ino, .p_gen = VFS_IC(dp)->i_generation, @@ -318,6 +344,34 @@ xrep_parent_stash_parentadd( return xfarray_append(rp->pptr_recs, &pptr); } +/* + * Remember that we want to remove a parent pointer from the tempfile. These + * stashed actions will be replayed later. + */ +STATIC int +xrep_parent_stash_parentremove( + struct xrep_parent *rp, + const struct xfs_name *name, + const struct xfs_inode *dp) +{ + struct xrep_pptr pptr = { + .action = XREP_PPTR_REMOVE, + .namelen = name->len, + .p_ino = dp->i_ino, + .p_gen = VFS_IC(dp)->i_generation, + }; + int error; + + trace_xrep_parent_stash_parentremove(rp->sc->tempip, dp, name); + + error = xfblob_store(rp->pptr_names, &pptr.name_cookie, name->name, + name->len); + if (error) + return error; + + return xfarray_append(rp->pptr_recs, &pptr); +} + /* * Examine an entry of a directory. If this dirent leads us back to the file * whose parent pointers we're rebuilding, add a pptr to the temporary @@ -518,6 +572,48 @@ xrep_parent_scan_dirtree( return 0; } +/* + * Capture dirent updates being made by other threads which are relevant to the + * file being repaired. + */ +STATIC int +xrep_parent_live_update( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_dir_update_params *p = data; + struct xrep_parent *rp; + struct xfs_scrub *sc; + int error; + + rp = container_of(nb, struct xrep_parent, pscan.hooks.dirent_hook.nb); + sc = rp->sc; + + /* + * This thread updated a dirent that points to the file that we're + * repairing, so stash the update for replay against the temporary + * file. + */ + if (p->ip->i_ino == sc->ip->i_ino && + xchk_iscan_want_live_update(&rp->pscan.iscan, p->dp->i_ino)) { + mutex_lock(&rp->pscan.lock); + if (p->delta > 0) + error = xrep_parent_stash_parentadd(rp, p->name, p->dp); + else + error = xrep_parent_stash_parentremove(rp, p->name, + p->dp); + mutex_unlock(&rp->pscan.lock); + if (error) + goto out_abort; + } + + return NOTIFY_DONE; +out_abort: + xchk_iscan_abort(&rp->pscan.iscan); + return NOTIFY_DONE; +} + /* Reset a directory's dotdot entry, if needed. */ STATIC int xrep_parent_reset_dotdot( @@ -666,7 +762,8 @@ xrep_parent_setup_scan( if (error) goto out_recs; - error = xrep_findparent_scan_start(sc, &rp->pscan); + error = __xrep_findparent_scan_start(sc, &rp->pscan, + xrep_parent_live_update); if (error) goto out_names; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index e355f8f7a444..cc164c34d853 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2759,6 +2759,7 @@ DEFINE_EVENT(xrep_pptr_class, name, \ DEFINE_XREP_PPTR_CLASS(xrep_xattr_replay_parentadd); DEFINE_XREP_PPTR_CLASS(xrep_xattr_replay_parentremove); DEFINE_XREP_PPTR_CLASS(xrep_parent_replay_parentadd); +DEFINE_XREP_PPTR_CLASS(xrep_parent_replay_parentremove); DECLARE_EVENT_CLASS(xrep_pptr_scan_class, TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, @@ -2794,6 +2795,7 @@ DEFINE_EVENT(xrep_pptr_scan_class, name, \ const struct xfs_name *name), \ TP_ARGS(ip, dp, name)) DEFINE_XREP_PPTR_SCAN_CLASS(xrep_parent_stash_parentadd); +DEFINE_XREP_PPTR_SCAN_CLASS(xrep_parent_stash_parentremove); TRACE_EVENT(xrep_nlinks_set_record, TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, From patchwork Fri May 26 02:18:44 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256289 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8CDDEC77B7A for ; Fri, 26 May 2023 02:18:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229646AbjEZCSq (ORCPT ); Thu, 25 May 2023 22:18:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50152 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229568AbjEZCSq (ORCPT ); Thu, 25 May 2023 22:18:46 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 66A94B2 for ; Thu, 25 May 2023 19:18:45 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 02A90614A2 for ; Fri, 26 May 2023 02:18:45 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 64E88C433EF; Fri, 26 May 2023 02:18:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067524; bh=qRzxaIVpihVpMPYH9kx/kUqB3XOVrn45MruINGd9px0=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=cntLXnQjESln1/4G5OrJioiPbT8LeJ7/R0w5/kMKY/sacbmbB9QaHfkR+eIATmvAH YRBJ8QdIqbJb8Wv64rkEEQYP0O9u0Wm0Ba2DAKuu8JP2zKlqCk3ab0z45gfbfeZQP/ Uo4JhusKZ769Byq09hKSyhyUduB0sghU72WhfjR2mY82sF4+KDCSvQ0oAUlwpjvw/f iggag2w9gYPwifaVpu6k45wXujhG870EshD/ydZqX8Wq5uC1LrNWHVLLcTrVKm/beG /3BX3F27rnushKEO/to10VL+Vp7HrsND+j8r3hVFrP92VGf/4Cf2Q93FOfs7e+UOlY J2onkgRUg1smg== Date: Thu, 25 May 2023 19:18:44 -0700 Subject: [PATCH 16/17] xfs: remove pointless unlocked assertion From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073524.3745075.9496123254727679567.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Remove this assertion about the inode not having an attr fork from xfs_bmap_add_attrfork because the function handles that case just fine. Weirder still, the function actually /requires/ the caller not to hold the ILOCK, which means that its accesses are not stabilized. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 42932fb55cf3..1570b6210e9e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1017,8 +1017,6 @@ xfs_bmap_add_attrfork( int logflags; /* logging flags */ int error; /* error return value */ - ASSERT(xfs_inode_has_attr_fork(ip) == 0); - mp = ip->i_mount; ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); From patchwork Fri May 26 02:18:59 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Darrick J. Wong" X-Patchwork-Id: 13256290 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 66BE8C77B7A for ; Fri, 26 May 2023 02:19:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229865AbjEZCTI (ORCPT ); Thu, 25 May 2023 22:19:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50194 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229827AbjEZCTG (ORCPT ); Thu, 25 May 2023 22:19:06 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1FE2DF7 for ; Thu, 25 May 2023 19:19:03 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 9D33861207 for ; Fri, 26 May 2023 02:19:00 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 06002C433D2; Fri, 26 May 2023 02:18:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685067540; bh=4QKU+v29WBL3qPbEAnBfBdKkbqGjUlKpQK6J+2MX2Ps=; h=Date:Subject:From:To:Cc:In-Reply-To:References:From; b=XB4QQSzVVD47s3vDeQ9Dil7rZQMeiiea4Bryoro0Mn1dejCNQ0+XtR2JoUA479p2G qis7WNP3dq5Xs71rzNzcS/S67zP6dEpUM/AG/eGEHqJSiTfic9wAgWDt3XvoGUgKrX ouYaGN242xDf8AlrQE0BTxZMd8M4xq6ELQmr5kfMqxV+5K8M33lmC+2a7Jer9c6rff fSWeLOGpovYjpHvOHZ+Do1b9g5fOwFPWNTol/nGr9Vx+EJnjVdFRdEIg7YxyGrA6k2 LL1/kLHc+wIKl4Sa7XmOS5ZZHeADXZnMJve79OG7hcjrk61pzKZZWlGzaXJ+mNUCbg 7R9N1WOI52Z0g== Date: Thu, 25 May 2023 19:18:59 -0700 Subject: [PATCH 17/17] xfs: actually rebuild the parent pointer xattrs From: "Darrick J. Wong" To: djwong@kernel.org Cc: linux-xfs@vger.kernel.org, allison.henderson@oracle.com, catherine.hoang@oracle.com Message-ID: <168506073538.3745075.4196304428643360910.stgit@frogsfrogsfrogs> In-Reply-To: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> References: <168506073275.3745075.7865645835865818396.stgit@frogsfrogsfrogs> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Darrick J. Wong Once we've assembled all the parent pointers for a file, we need to commit the new dataset atomically to that file. Parent pointer records are embedded in the xattr structure, which means that we must write a new extended attribute structure, again, atomically. Therefore, we must copy the non-parent-pointer attributes from the file being repaired into the temporary file's extended attributes and then call the atomic extent swap mechanism to exchange the blocks. Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/attr.c | 2 fs/xfs/scrub/attr_repair.c | 4 fs/xfs/scrub/attr_repair.h | 4 fs/xfs/scrub/dir_repair.c | 3 fs/xfs/scrub/findparent.c | 52 +++ fs/xfs/scrub/findparent.h | 2 fs/xfs/scrub/listxattr.c | 10 + fs/xfs/scrub/listxattr.h | 4 fs/xfs/scrub/parent.c | 5 fs/xfs/scrub/parent_repair.c | 706 +++++++++++++++++++++++++++++++++++++++++- fs/xfs/scrub/trace.h | 2 11 files changed, 770 insertions(+), 24 deletions(-) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index f213d745746f..555e1b65c78a 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -647,7 +647,7 @@ xchk_xattr( * iteration, which doesn't really follow the usual buffer * locking order. */ - error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL); + error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL, NULL); if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) return error; diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 23bc72773e33..71dcbe64609f 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -1036,7 +1036,7 @@ xrep_xattr_reset_fork( * fork. The caller must ILOCK the tempfile and join it to the transaction. * This function returns with the inode joined to a clean scrub transaction. */ -STATIC int +int xrep_xattr_reset_tempfile_fork( struct xfs_scrub *sc) { @@ -1360,7 +1360,7 @@ xrep_xattr_swap_prep( } /* Swap the temporary file's attribute fork with the one being repaired. */ -STATIC int +int xrep_xattr_swap( struct xfs_scrub *sc, struct xrep_tempswap *tx) diff --git a/fs/xfs/scrub/attr_repair.h b/fs/xfs/scrub/attr_repair.h index 372c2d0eff68..f0f2c7edcb4c 100644 --- a/fs/xfs/scrub/attr_repair.h +++ b/fs/xfs/scrub/attr_repair.h @@ -6,6 +6,10 @@ #ifndef __XFS_SCRUB_ATTR_REPAIR_H__ #define __XFS_SCRUB_ATTR_REPAIR_H__ +struct xrep_tempswap; + +int xrep_xattr_swap(struct xfs_scrub *sc, struct xrep_tempswap *tx); int xrep_xattr_reset_fork(struct xfs_scrub *sc); +int xrep_xattr_reset_tempfile_fork(struct xfs_scrub *sc); #endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */ diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 3a33f556616d..46ce6d06312e 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -1283,7 +1283,8 @@ xrep_dir_scan_file( if (!xrep_dir_want_scan(rd, ip)) goto scan_done; - error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_parent_pointer, rd); + error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_parent_pointer, NULL, + rd); if (error) goto scan_done; diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index da21792758d9..cc2ac55f57bf 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -24,6 +24,7 @@ #include "xfs_trans_space.h" #include "xfs_health.h" #include "xfs_swapext.h" +#include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -33,6 +34,7 @@ #include "scrub/findparent.h" #include "scrub/readdir.h" #include "scrub/tempfile.h" +#include "scrub/listxattr.h" /* * Finding the Parent of a Directory @@ -453,3 +455,53 @@ xrep_findparent_from_dcache( out: return ret; } + +/* Pass back the parent inumber if this a parent pointer */ +STATIC int +xrep_findparent_from_pptr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xfs_parent_name_irec pptr; + struct xfs_mount *mp = sc->mp; + const void *rec = name; + xfs_ino_t *inop = priv; + + if (!(attr_flags & XFS_ATTR_PARENT)) + return 0; + + if (!xfs_parent_namecheck(mp, rec, namelen, attr_flags) || + !xfs_parent_valuecheck(mp, value, valuelen)) + return -EFSCORRUPTED; + + xfs_parent_irec_from_disk(&pptr, rec, value, valuelen); + *inop = pptr.p_ino; + return -ECANCELED; +} + +/* + * Find the first parent of the inode being scrubbed by walking parent + * pointers. Caller must hold sc->ip's ILOCK. + */ +int +xrep_findparent_from_pptrs( + struct xfs_scrub *sc, + xfs_ino_t *inop) +{ + int error; + + *inop = NULLFSINO; + + error = xchk_xattr_walk(sc, sc->ip, xrep_findparent_from_pptr, NULL, + inop); + if (error && error != -ECANCELED) + return error; + return 0; +} + diff --git a/fs/xfs/scrub/findparent.h b/fs/xfs/scrub/findparent.h index cdd2e4405088..3abc4309f89d 100644 --- a/fs/xfs/scrub/findparent.h +++ b/fs/xfs/scrub/findparent.h @@ -53,4 +53,6 @@ int xrep_findparent_confirm(struct xfs_scrub *sc, xfs_ino_t *parent_ino); xfs_ino_t xrep_findparent_self_reference(struct xfs_scrub *sc); xfs_ino_t xrep_findparent_from_dcache(struct xfs_scrub *sc); +int xrep_findparent_from_pptrs(struct xfs_scrub *sc, xfs_ino_t *inop); + #endif /* __XFS_SCRUB_FINDPARENT_H__ */ diff --git a/fs/xfs/scrub/listxattr.c b/fs/xfs/scrub/listxattr.c index 322715b2fd68..3dbf1a2f8bc9 100644 --- a/fs/xfs/scrub/listxattr.c +++ b/fs/xfs/scrub/listxattr.c @@ -220,6 +220,7 @@ xchk_xattr_walk_node( struct xfs_scrub *sc, struct xfs_inode *ip, xchk_xattr_fn attr_fn, + xchk_xattrleaf_fn leaf_fn, void *priv) { struct xfs_attr3_icleaf_hdr leafhdr; @@ -251,6 +252,12 @@ xchk_xattr_walk_node( xfs_trans_brelse(sc->tp, leaf_bp); + if (leaf_fn) { + error = leaf_fn(sc, priv); + if (error) + goto out_bitmap; + } + /* Make sure we haven't seen this new leaf already. */ len = 1; if (xbitmap_test(&seen_blocks, leafhdr.forw, &len)) @@ -285,6 +292,7 @@ xchk_xattr_walk( struct xfs_scrub *sc, struct xfs_inode *ip, xchk_xattr_fn attr_fn, + xchk_xattrleaf_fn leaf_fn, void *priv) { int error; @@ -305,5 +313,5 @@ xchk_xattr_walk( if (xfs_attr_is_leaf(ip)) return xchk_xattr_walk_leaf(sc, ip, attr_fn, priv); - return xchk_xattr_walk_node(sc, ip, attr_fn, priv); + return xchk_xattr_walk_node(sc, ip, attr_fn, leaf_fn, priv); } diff --git a/fs/xfs/scrub/listxattr.h b/fs/xfs/scrub/listxattr.h index fce419255dc0..0cebeecd49ae 100644 --- a/fs/xfs/scrub/listxattr.h +++ b/fs/xfs/scrub/listxattr.h @@ -11,7 +11,9 @@ typedef int (*xchk_xattr_fn)(struct xfs_scrub *sc, struct xfs_inode *ip, unsigned int namelen, const void *value, unsigned int valuelen, void *priv); +typedef int (*xchk_xattrleaf_fn)(struct xfs_scrub *sc, void *priv); + int xchk_xattr_walk(struct xfs_scrub *sc, struct xfs_inode *ip, - xchk_xattr_fn attr_fn, void *priv); + xchk_xattr_fn attr_fn, xchk_xattrleaf_fn leaf_fn, void *priv); #endif /* __XFS_SCRUB_LISTXATTR_H__ */ diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 8daf08a627b7..05ff3896db15 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -701,7 +701,8 @@ xchk_parent_count_pptrs( */ if (pp->need_revalidate) { pp->pptrs_found = 0; - error = xchk_xattr_walk(sc, sc->ip, xchk_parent_count_pptr, pp); + error = xchk_xattr_walk(sc, sc->ip, xchk_parent_count_pptr, + NULL, pp); if (error == -ECANCELED) return 0; if (error) @@ -760,7 +761,7 @@ xchk_parent_pptr( if (error) goto out_entries; - error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_attr, pp); + error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_attr, NULL, pp); if (error == -ECANCELED) { error = 0; goto out_names; diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index b87eb389e45e..c5cda42e53ad 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -25,6 +25,8 @@ #include "xfs_health.h" #include "xfs_swapext.h" #include "xfs_parent.h" +#include "xfs_attr.h" +#include "xfs_bmap.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -34,10 +36,13 @@ #include "scrub/findparent.h" #include "scrub/readdir.h" #include "scrub/tempfile.h" +#include "scrub/tempswap.h" #include "scrub/orphanage.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/xfblob.h" +#include "scrub/attr_repair.h" +#include "scrub/listxattr.h" /* * Repairing The Directory Parent Pointer @@ -107,6 +112,23 @@ struct xrep_parent { /* Blobs containing parent pointer names. */ struct xfblob *pptr_names; + /* xattr keys */ + struct xfarray *xattr_records; + + /* xattr values */ + struct xfblob *xattr_blobs; + + /* Scratch buffers for saving extended attributes */ + unsigned char *xattr_name; + void *xattr_value; + unsigned int xattr_value_sz; + + /* + * Information used to swap the attr fork, if the fs supports parent + * pointers. + */ + struct xrep_tempswap tx; + /* * Information used to scan the filesystem to find the inumber of the * dotdot entry for this directory. On filesystems without parent @@ -118,12 +140,17 @@ struct xrep_parent { * @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and * pptr_scratch. This reduces the memory requirements of this * structure. + * + * The lock also controls access to xattr_records and xattr_blobs(?) */ struct xrep_parent_scan_info pscan; /* Orphanage reparenting request. */ struct xrep_adoption adoption; + /* Have we seen any live updates of parent pointers recently? */ + bool saw_pptr_updates; + /* xattr key and da args for parent pointer replay. */ struct xfs_parent_scratch pptr_scratch; @@ -135,12 +162,45 @@ struct xrep_parent { struct xfs_parent_name_irec pptr; }; +struct xrep_parent_xattr { + /* Cookie for retrieval of the xattr name. */ + xfblob_cookie name_cookie; + + /* Cookie for retrieval of the xattr value. */ + xfblob_cookie value_cookie; + + /* XFS_ATTR_* flags */ + int flags; + + /* Length of the value and name. */ + uint32_t valuelen; + uint16_t namelen; +}; + +/* + * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write + * them to the temp file. + */ +#define XREP_PARENT_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8) + /* Tear down all the incore stuff we created. */ static void xrep_parent_teardown( struct xrep_parent *rp) { xrep_findparent_scan_teardown(&rp->pscan); + if (rp->xattr_name) + kvfree(rp->xattr_name); + rp->xattr_name = NULL; + if (rp->xattr_value) + kvfree(rp->xattr_value); + rp->xattr_value = NULL; + if (rp->xattr_blobs) + xfblob_destroy(rp->xattr_blobs); + rp->xattr_blobs = NULL; + if (rp->xattr_records) + xfarray_destroy(rp->xattr_records); + rp->xattr_records = NULL; if (rp->pptr_names) xfblob_destroy(rp->pptr_names); rp->pptr_names = NULL; @@ -565,10 +625,11 @@ xrep_parent_scan_dirtree( } /* - * Cancel the empty transaction so that we can (later) use the atomic - * extent swap helpers to lock files and commit the new directory. + * Retake sc->ip's ILOCK now that we're done flushing stashed parent + * pointers. We end this function with an empty transaction and the + * ILOCK. */ - xchk_trans_cancel(rp->sc); + xchk_ilock(rp->sc, XFS_ILOCK_EXCL); return 0; } @@ -603,6 +664,8 @@ xrep_parent_live_update( else error = xrep_parent_stash_parentremove(rp, p->name, p->dp); + if (!error) + rp->saw_pptr_updates = true; mutex_unlock(&rp->pscan.lock); if (error) goto out_abort; @@ -669,14 +732,25 @@ xrep_parent_move_to_orphanage( if (!sc->orphanage) return -EFSCORRUPTED; - /* - * We are about to drop the ILOCK on sc->ip to lock the orphanage and - * prepare for the adoption. Therefore, look up the old dotdot entry - * for sc->ip so that we can compare it after we re-lock sc->ip. - */ - error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent); - if (error) - return error; + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) { + /* + * We are about to drop the ILOCK on sc->ip to lock the + * orphanage and prepare for the adoption. Therefore, look up + * the old dotdot entry for sc->ip so that we can compare it + * after we re-lock sc->ip. + */ + error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, + &orig_parent); + if (error) + return error; + } else { + /* + * We haven't dropped the ILOCK since we swapped in the new + * parent pointers, which means that the file cannot have been + * moved in the directory tree, and there are no parents. + */ + orig_parent = NULLFSINO; + } /* * Because the orphanage is just another directory in the filesystem, @@ -711,9 +785,14 @@ xrep_parent_move_to_orphanage( * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot * entry again. If the parent changed or the child was unlinked while * the child directory was unlocked, we don't need to move the child to - * the orphanage after all. + * the orphanage after all. For a non-directory, we have to scan for + * the first parent pointer to see if one has been added. */ - error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent); + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) + error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, + &new_parent); + else + error = xrep_findparent_from_pptrs(sc, &new_parent); if (error) goto err_adoption; if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) { @@ -727,6 +806,546 @@ xrep_parent_move_to_orphanage( return error; } +/* Ensure that the xattr value buffer is large enough. */ +STATIC int +xrep_parent_alloc_xattr_value( + struct xrep_parent *rp, + size_t bufsize) +{ + void *new_val; + + if (rp->xattr_value_sz >= bufsize) + return 0; + + if (rp->xattr_value) { + kvfree(rp->xattr_value); + rp->xattr_value = NULL; + rp->xattr_value_sz = 0; + } + + new_val = kvmalloc(bufsize, XCHK_GFP_FLAGS); + if (!new_val) + return -ENOMEM; + + rp->xattr_value = new_val; + rp->xattr_value_sz = bufsize; + return 0; +} + +/* Retrieve the (remote) value of a non-pptr xattr. */ +STATIC int +xrep_parent_fetch_xattr_remote( + struct xrep_parent *rp, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + unsigned int valuelen) +{ + struct xfs_scrub *sc = rp->sc; + struct xfs_da_args args = { + .op_flags = XFS_DA_OP_NOTIME, + .attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, + .geo = sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .dp = ip, + .name = name, + .namelen = namelen, + .hashval = xfs_da_hashname(name, namelen), + .trans = sc->tp, + .valuelen = valuelen, + .owner = ip->i_ino, + }; + int error; + + /* + * If we need a larger value buffer, try to allocate one. If that + * fails, return with -EDEADLOCK to try harder. + */ + error = xrep_parent_alloc_xattr_value(rp, valuelen); + if (error == -ENOMEM) + return -EDEADLOCK; + if (error) + return error; + + args.value = rp->xattr_value; + return xfs_attr_get_ilocked(&args); +} + +/* Stash non-pptr attributes for later replay into the temporary file. */ +STATIC int +xrep_parent_stash_xattr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xrep_parent_xattr key = { + .valuelen = valuelen, + .namelen = namelen, + .flags = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, + }; + struct xrep_parent *rp = priv; + int error; + + if (attr_flags & (XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT)) + return 0; + + if (!value) { + error = xrep_parent_fetch_xattr_remote(rp, ip, attr_flags, + name, namelen, valuelen); + if (error) + return error; + + value = rp->xattr_value; + } + + trace_xrep_parent_stash_xattr(rp->sc->tempip, key.flags, (void *)name, + key.namelen, key.valuelen); + + error = xfblob_store(rp->xattr_blobs, &key.name_cookie, name, + key.namelen); + if (error) + return error; + + error = xfblob_store(rp->xattr_blobs, &key.value_cookie, value, + key.valuelen); + if (error) + return error; + + return xfarray_append(rp->xattr_records, &key); +} + +/* Insert one xattr key/value. */ +STATIC int +xrep_parent_insert_xattr( + struct xrep_parent *rp, + const struct xrep_parent_xattr *key) +{ + struct xfs_da_args args = { + .dp = rp->sc->tempip, + .attr_filter = key->flags, + .namelen = key->namelen, + .valuelen = key->valuelen, + .op_flags = XFS_DA_OP_NOTIME, + .owner = rp->sc->ip->i_ino, + }; + int error; + + ASSERT(!(key->flags & XFS_ATTR_PARENT)); + + /* + * Grab pointers to the scrub buffer so that we can use them to insert + * attrs into the temp file. + */ + args.name = rp->xattr_name; + args.value = rp->xattr_value; + + /* + * The attribute name is stored near the end of the in-core buffer, + * though we reserve one more byte to ensure null termination. + */ + rp->xattr_name[XATTR_NAME_MAX] = 0; + + error = xfblob_load(rp->xattr_blobs, key->name_cookie, rp->xattr_name, + key->namelen); + if (error) + return error; + + error = xfblob_free(rp->xattr_blobs, key->name_cookie); + if (error) + return error; + + error = xfblob_load(rp->xattr_blobs, key->value_cookie, args.value, + key->valuelen); + if (error) + return error; + + error = xfblob_free(rp->xattr_blobs, key->value_cookie); + if (error) + return error; + + rp->xattr_name[key->namelen] = 0; + + trace_xrep_parent_insert_xattr(rp->sc->tempip, key->flags, + rp->xattr_name, key->namelen, key->valuelen); + + error = xfs_attr_set(&args); + if (error) { + ASSERT(error != -EEXIST); + return error; + } + + return 0; +} + +/* + * Periodically flush salvaged attributes to the temporary file. This is done + * to reduce the memory requirements of the xattr rebuild because files can + * contain millions of attributes. + */ +STATIC int +xrep_parent_flush_xattrs( + struct xrep_parent *rp) +{ + xfarray_idx_t array_cur; + int error; + + /* + * Entering this function, the scrub context has a reference to the + * inode being repaired, the temporary file, and the empty scrub + * transaction that we created for the xattr scan. We hold ILOCK_EXCL + * on the inode being repaired. + * + * To constrain kernel memory use, we occasionally flush salvaged + * xattrs from the xfarray and xfblob structures into the temporary + * file in preparation for swapping the xattr structures at the end. + * Updating the temporary file requires a transaction, so we commit the + * scrub transaction and drop the ILOCK so that xfs_attr_set can + * allocate whatever transaction it wants. + * + * We still hold IOLOCK_EXCL on the inode being repaired, which + * prevents anyone from adding non-parent pointer xattrs while we're + * flushing. However, the VFS can add parent pointers as part of + * moving a child directory because it doesn't take i_rwsem; see the + * locking issue comment in dir_repair.c. + */ + xchk_trans_cancel(rp->sc); + xchk_iunlock(rp->sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK of the temporary file while we modify xattrs. This + * isn't strictly required because the temporary file is never revealed + * to userspace, but we follow the same locking rules. We still hold + * sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rp->sc); + if (error) + return error; + + /* Add all the salvaged attrs to the temporary file. */ + foreach_xfarray_idx(rp->xattr_records, array_cur) { + struct xrep_parent_xattr key; + + error = xfarray_load(rp->xattr_records, array_cur, &key); + if (error) + return error; + + error = xrep_parent_insert_xattr(rp, &key); + if (error) + return error; + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rp->xattr_records); + xfblob_truncate(rp->xattr_blobs); + + xrep_tempfile_iounlock(rp->sc); + + /* Recreate the empty transaction and relock the inode. */ + error = xchk_trans_alloc_empty(rp->sc); + if (error) + return error; + xchk_ilock(rp->sc, XFS_ILOCK_EXCL); + return 0; +} + +/* Decide if we've stashed too much xattr data in memory. */ +static inline bool +xrep_parent_want_flush_xattrs( + struct xrep_parent *rp) +{ + unsigned long long bytes; + + bytes = xfarray_bytes(rp->xattr_records) + + xfblob_bytes(rp->xattr_blobs); + return bytes > XREP_PARENT_XATTR_MAX_STASH_BYTES; +} + +/* Flush staged attributes to the temporary file if we're over the limit. */ +STATIC int +xrep_parent_try_flush_xattrs( + struct xfs_scrub *sc, + void *priv) +{ + struct xrep_parent *rp = priv; + int error; + + if (!xrep_parent_want_flush_xattrs(rp)) + return 0; + + error = xrep_parent_flush_xattrs(rp); + if (error) + return error; + + /* + * If there were any parent pointer updates to the xattr structure + * while we dropped the ILOCK, the xattr structure is now stale. + * Signal to the attr copy process that we need to start over, but + * this time without opportunistic attr flushing. + * + * This is unlikely to happen, so we're ok with restarting the copy. + */ + mutex_lock(&rp->pscan.lock); + if (rp->saw_pptr_updates) + error = -ESTALE; + mutex_unlock(&rp->pscan.lock); + return error; +} + +/* Copy all the non-pptr extended attributes into the temporary file. */ +STATIC int +xrep_parent_copy_xattrs( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + int error; + + /* + * Clear the pptr updates flag. We hold sc->ip ILOCKed, so there + * can't be any parent pointer updates in progress. + */ + mutex_lock(&rp->pscan.lock); + rp->saw_pptr_updates = false; + mutex_unlock(&rp->pscan.lock); + + /* Copy xattrs, stopping periodically to flush the incore buffers. */ + error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr, + xrep_parent_try_flush_xattrs, rp); + if (error && error != -ESTALE) + return error; + + if (error == -ESTALE) { + /* + * The xattr copy collided with a parent pointer update. + * Restart the copy, but this time hold the ILOCK all the way + * to the end to lock out any directory parent pointer updates. + */ + error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr, + NULL, rp); + if (error) + return error; + } + + /* Flush any remaining stashed xattrs to the temporary file. */ + if (xfarray_bytes(rp->xattr_records) == 0) + return 0; + + return xrep_parent_flush_xattrs(rp); +} + +/* Do we have any attrs (or parent pointers) at all? */ +STATIC int +xrep_parent_has_xattr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + return -ECANCELED; +} + +/* + * Ensure that the file being repaired has an attr fork if it needs one. + * Returns 0 if we're ready to swap; -ENOATTR if there's nothing to swap; + * or a negative errno. In the case of -ENOATTR we leave sc->ip ILOCKed + * to the scrub transaction. + */ +STATIC int +xrep_parent_ensure_attr_fork( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + bool tempfile_has_attr = false; + int error; + + error = xchk_trans_alloc(sc, 0); + if (error) + return error; + + xrep_tempfile_ilock_both(sc); + + ASSERT(xfs_ifork_ptr(sc->tempip, XFS_ATTR_FORK) != NULL); + + /* If the file being repaired has an attr fork, we're done. */ + if (xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK) != NULL) { + error = 0; + goto out_cancel; + } + + /* Does the tempfile have any xattrs or parent pointers? */ + error = xchk_xattr_walk(sc, sc->tempip, xrep_parent_has_xattr, NULL, + NULL); + if (error == -ECANCELED) { + tempfile_has_attr = true; + error = 0; + } + if (error) + goto out_cancel; + + /* + * The file does not have an attr fork. If the tempfile has no xattrs + * or parent pointers, there's nothing to swap right now. Drop the + * ILOCK on the temporary file, but leave the transaction and sc->ip's + * ILOCK held. Return -ENOATTR to signal to the caller that we can + * skip the swap. + */ + if (!tempfile_has_attr) { + xrep_tempfile_iunlock(sc); + return -ENOATTR; + } + + /* + * The tempfile has xattrs (or parent pointers). Cancel the + * transaction that we created above and initialize the attr fork so + * that we can swap the attr forks. + */ + xchk_trans_cancel(sc); + xrep_tempfile_iunlock_both(sc); + + return xfs_bmap_add_attrfork(sc->ip, sizeof(struct xfs_attr_sf_hdr), 1); + +out_cancel: + xchk_trans_cancel(sc); + xrep_tempfile_iunlock_both(sc); + return error; +} + +/* + * Finish replaying stashed parent pointer updates, allocate a transaction for + * swapping extents, and take the ILOCKs of both files before we commit the new + * attribute structure. + */ +STATIC int +xrep_parent_finalize_tempfile( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + int error; + + do { + error = xrep_parent_replay_updates(rp); + if (error) + return error; + + error = xrep_parent_ensure_attr_fork(rp); + if (error) + return error; + + error = xrep_tempswap_trans_alloc(sc, XFS_ATTR_FORK, &rp->tx); + if (error) + return error; + + /* + * We rely on the ILOCK to quiesce all parent pointer updates + * because the VFS does not take the IOLOCK when moving a + * directory child during a rename. + */ + if (xfarray_length(rp->pptr_recs) == 0) + break; + + xchk_trans_cancel(sc); + xrep_tempfile_iunlock_both(sc); + } while (!xchk_should_terminate(sc, &error)); + return error; +} + +/* + * Replay all the stashed parent pointers into the temporary file, copy all + * the non-pptr xattrs from the file being repaired into the temporary file, + * and swap the extents atomically. + */ +STATIC int +xrep_parent_rebuild_pptrs( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + xfs_ino_t parent_ino = NULLFSINO; + int error; + + /* + * Copy non-ppttr xattrs from the file being repaired into the + * temporary file's xattr structure. We hold sc->ip's IOLOCK, which + * prevents setxattr/removexattr calls from occurring, but renames + * update the parent pointers without holding IOLOCK. If we detect + * stale attr structures, we restart the scan but only flush at the + * end. + */ + error = xrep_parent_copy_xattrs(rp); + if (error) + return error; + + /* + * Cancel the empty transaction that we used to walk and copy attrs, + * and drop the ILOCK so that we can take the IOLOCK on the temporary + * file. We still hold sc->ip's IOLOCK. + */ + xchk_trans_cancel(sc); + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + error = xrep_tempfile_iolock_polled(sc); + if (error) + return error; + + error = xrep_parent_finalize_tempfile(rp); + if (error == -ENOATTR) + goto out_findparents; + if (error) + return error; + + /* Last chance to abort before we start committing pptr fixes. */ + if (xchk_should_terminate(sc, &error)) + return error; + + if (xchk_iscan_aborted(&rp->pscan.iscan)) + return -ECANCELED; + + /* + * Swap the attr fork and junk the old attr fork contents, which are + * now in the tempfile. + */ + error = xrep_xattr_swap(sc, &rp->tx); + if (error) + return error; + error = xrep_xattr_reset_tempfile_fork(sc); + if (error) + return error; + + /* + * Roll transaction to detach both inodes from the transaction, then + * drop the ILOCK of the temporary file since we no longer need it. + */ + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + xrep_tempfile_iunlock(sc); + +out_findparents: + /* + * We've committed the new parent pointers. Find at least one parent + * so that we can decide if we're moving this file to the orphanage. + * For this purpose, root directories are their own parents. + */ + if (sc->ip == sc->mp->m_rootip) { + xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino); + } else { + error = xrep_findparent_from_pptrs(sc, &parent_ino); + if (error) + return error; + if (parent_ino != NULLFSINO) + xrep_findparent_scan_found(&rp->pscan, parent_ino); + } + return 0; +} + /* * Commit the new parent pointer structure (currently only the dotdot entry) to * the file that we're repairing. @@ -735,9 +1354,20 @@ STATIC int xrep_parent_rebuild_tree( struct xrep_parent *rp) { + int error; + + if (xfs_has_parent(rp->sc->mp)) { + error = xrep_parent_rebuild_pptrs(rp); + if (error) + return error; + } + if (rp->pscan.parent_ino == NULLFSINO) return xrep_parent_move_to_orphanage(rp); + if (!S_ISDIR(VFS_I(rp->sc->ip)->i_mode)) + return 0; + return xrep_parent_reset_dotdot(rp); } @@ -747,34 +1377,78 @@ xrep_parent_setup_scan( struct xrep_parent *rp) { struct xfs_scrub *sc = rp->sc; + struct xfs_da_geometry *geo = sc->mp->m_attr_geo; + int max_len; int error; if (!xfs_has_parent(sc->mp)) return xrep_findparent_scan_start(sc, &rp->pscan); + /* Buffers for copying non-pptr attrs to the tempfile */ + rp->xattr_name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS); + if (!rp->xattr_name) + return -ENOMEM; + + /* + * Allocate enough memory to handle loading local attr values from the + * xfblob data while flushing stashed attrs to the temporary file. + * We only realloc the buffer when salvaging remote attr values, so + * TRY_HARDER means we allocate the maximal attr value size. + */ + if (sc->flags & XCHK_TRY_HARDER) + max_len = XATTR_SIZE_MAX; + else + max_len = xfs_attr_leaf_entsize_local_max(geo->blksize); + error = xrep_parent_alloc_xattr_value(rp, max_len); + if (error) + goto out_xattr_name; + /* Set up some staging memory for logging parent pointer updates. */ error = xfarray_create(sc->mp, "parent pointer entries", 0, sizeof(struct xrep_pptr), &rp->pptr_recs); if (error) - return error; + goto out_xattr_value; error = xfblob_create(sc->mp, "parent pointer names", &rp->pptr_names); if (error) goto out_recs; + /* Set up some storage for copying attrs before the swap */ + error = xfarray_create(sc->mp, "parent pointer xattr names", 0, + sizeof(struct xrep_parent_xattr), &rp->xattr_records); + if (error) + goto out_names; + + error = xfblob_create(sc->mp, "parent pointer xattr values", + &rp->xattr_blobs); + if (error) + goto out_attr_keys; + error = __xrep_findparent_scan_start(sc, &rp->pscan, xrep_parent_live_update); if (error) - goto out_names; + goto out_attr_values; return 0; +out_attr_values: + xfblob_destroy(rp->xattr_blobs); + rp->xattr_blobs = NULL; +out_attr_keys: + xfarray_destroy(rp->xattr_records); + rp->xattr_records = NULL; out_names: xfblob_destroy(rp->pptr_names); rp->pptr_names = NULL; out_recs: xfarray_destroy(rp->pptr_recs); rp->pptr_recs = NULL; +out_xattr_value: + kvfree(rp->xattr_value); + rp->xattr_value = NULL; +out_xattr_name: + kvfree(rp->xattr_name); + rp->xattr_name = NULL; return error; } @@ -796,7 +1470,7 @@ xrep_parent( if (error) goto out_teardown; - /* Last chance to abort before we start committing fixes. */ + /* Last chance to abort before we start committing dotdot fixes. */ if (xchk_should_terminate(sc, &error)) goto out_teardown; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index cc164c34d853..4136bb342326 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2482,6 +2482,8 @@ DEFINE_EVENT(xrep_xattr_salvage_class, name, \ TP_ARGS(ip, flags, name, namelen, valuelen)) DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec); DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_parent_stash_xattr); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_parent_insert_xattr); DECLARE_EVENT_CLASS(xrep_pptr_salvage_class, TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name,