Message ID | 161284384405.3057868.8114203697655713495.stgit@magnolia (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: add the ability to flag a fs for repair | expand |
On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > There are a few places in xfs_repair's directory checking code where we > deliberately corrupt a directory entry as a sentinel to trigger a > correction in later repair phase. In the mean time, the filesystem is > inconsistent, so set the needsrepair flag to force a re-run of repair if > the system goes down. I guess this is an improvement over what we have no, but I suspect an in-core side band way to notify the later phases would be much better than these corrupt sentinel values..
On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@kernel.org> > > There are a few places in xfs_repair's directory checking code where we > deliberately corrupt a directory entry as a sentinel to trigger a > correction in later repair phase. In the mean time, the filesystem is > inconsistent, so set the needsrepair flag to force a re-run of repair if > the system goes down. > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > --- Hmm.. this seems orthogonal to the rest of the series. I'm sure we can come up with various additional uses for the bit, but it seems a little odd to me that repair might set it in some cases after a crash but not others (if the filesystem happens to already be corrupt, for example). Brian > repair/agheader.h | 2 ++ > repair/dir2.c | 3 +++ > repair/phase6.c | 7 +++++++ > repair/xfs_repair.c | 37 +++++++++++++++++++++++++++++++++++++ > 4 files changed, 49 insertions(+) > > > diff --git a/repair/agheader.h b/repair/agheader.h > index a63827c8..fa6fe596 100644 > --- a/repair/agheader.h > +++ b/repair/agheader.h > @@ -82,3 +82,5 @@ typedef struct fs_geo_list { > #define XR_AG_AGF 0x2 > #define XR_AG_AGI 0x4 > #define XR_AG_SB_SEC 0x8 > + > +void force_needsrepair(struct xfs_mount *mp); > diff --git a/repair/dir2.c b/repair/dir2.c > index eabdb4f2..922b8a3e 100644 > --- a/repair/dir2.c > +++ b/repair/dir2.c > @@ -15,6 +15,7 @@ > #include "da_util.h" > #include "prefetch.h" > #include "progress.h" > +#include "agheader.h" > > /* > * Known bad inode list. These are seen when the leaf and node > @@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 > do_warn( > _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"), > (intptr_t)ptr - (intptr_t)d); > + force_needsrepair(mp); > dep->name[0] = '/'; > *dirty = 1; > } else { > @@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "), > */ > if (junkit) { > if (!no_modify) { > + force_needsrepair(mp); > dep->name[0] = '/'; > *dirty = 1; > do_warn(_("clearing entry\n")); > diff --git a/repair/phase6.c b/repair/phase6.c > index 14464bef..5ecbe9b2 100644 > --- a/repair/phase6.c > +++ b/repair/phase6.c > @@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data( > if (entry_junked( > _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""), > fname, ip->i_ino, inum)) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > } > @@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data( > if (entry_junked( > _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64), > fname, ip->i_ino, inum)) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > } > @@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data( > if (entry_junked( > _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), > ORPHANAGE, inum, ip->i_ino)) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > } > @@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data( > if (entry_junked( > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), > fname, inum, ip->i_ino)) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > } > @@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data( > if (entry_junked( > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname, > inum, ip->i_ino)) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > } > @@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data( > if (entry_junked( > _("entry \"%s\" in dir %" PRIu64 " is not the first entry"), > fname, inum, ip->i_ino)) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > } > @@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 " > orphanage_ino = 0; > nbad++; > if (!no_modify) { > + force_needsrepair(mp); > dep->name[0] = '/'; > libxfs_dir2_data_log_entry(&da, bp, dep); > if (verbose) > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c > index f607afcb..9dc73854 100644 > --- a/repair/xfs_repair.c > +++ b/repair/xfs_repair.c > @@ -754,6 +754,43 @@ clear_needsrepair( > libxfs_buf_relse(bp); > } > > +/* > + * Mark the filesystem as needing repair. This should only be called by code > + * that deliberately sets invalid sentinel values in the on-disk metadata to > + * trigger a later reconstruction, and only after we've settled the primary > + * super contents (i.e. after phase 1). > + */ > +void > +force_needsrepair( > + struct xfs_mount *mp) > +{ > + struct xfs_buf *bp; > + int error; > + > + if (!xfs_sb_version_hascrc(&mp->m_sb) || > + xfs_sb_version_needsrepair(&mp->m_sb)) > + return; > + > + bp = libxfs_getsb(mp); > + if (!bp || bp->b_error) { > + do_log( > + _("couldn't get superblock to set needsrepair, err=%d\n"), > + bp ? bp->b_error : ENOMEM); > + return; > + } else { > + mp->m_sb.sb_features_incompat |= > + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; > + libxfs_sb_to_disk(bp->b_addr, &mp->m_sb); > + > + /* Force the primary super to disk immediately. */ > + error = -libxfs_bwrite(bp); > + if (error) > + do_log(_("couldn't force needsrepair, err=%d\n"), error); > + } > + if (bp) > + libxfs_buf_relse(bp); > +} > + > int > main(int argc, char **argv) > { >
On Tue, Feb 09, 2021 at 12:20:59PM -0500, Brian Foster wrote: > On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: > > From: Darrick J. Wong <djwong@kernel.org> > > > > There are a few places in xfs_repair's directory checking code where we > > deliberately corrupt a directory entry as a sentinel to trigger a > > correction in later repair phase. In the mean time, the filesystem is > > inconsistent, so set the needsrepair flag to force a re-run of repair if > > the system goes down. > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > --- > > Hmm.. this seems orthogonal to the rest of the series. I'm sure we can > come up with various additional uses for the bit, but it seems a little > odd to me that repair might set it in some cases after a crash but not > others (if the filesystem happens to already be corrupt, for example). <nod> Another option I thought of is to add a hook to the buffer cache so that the first time anyone tries to bwrite a buffer (either directly or via a delwri list or normal buffer cache writeback) we'll also set needsrepair on the ondisk primary super. That would protect us against other scenarios like crashing after writing a new AGF but before writing the new AGI, where the fs is left in an indeterminate state. Hmm, maybe I should pursue /that/ instead. --D > Brian > > > repair/agheader.h | 2 ++ > > repair/dir2.c | 3 +++ > > repair/phase6.c | 7 +++++++ > > repair/xfs_repair.c | 37 +++++++++++++++++++++++++++++++++++++ > > 4 files changed, 49 insertions(+) > > > > > > diff --git a/repair/agheader.h b/repair/agheader.h > > index a63827c8..fa6fe596 100644 > > --- a/repair/agheader.h > > +++ b/repair/agheader.h > > @@ -82,3 +82,5 @@ typedef struct fs_geo_list { > > #define XR_AG_AGF 0x2 > > #define XR_AG_AGI 0x4 > > #define XR_AG_SB_SEC 0x8 > > + > > +void force_needsrepair(struct xfs_mount *mp); > > diff --git a/repair/dir2.c b/repair/dir2.c > > index eabdb4f2..922b8a3e 100644 > > --- a/repair/dir2.c > > +++ b/repair/dir2.c > > @@ -15,6 +15,7 @@ > > #include "da_util.h" > > #include "prefetch.h" > > #include "progress.h" > > +#include "agheader.h" > > > > /* > > * Known bad inode list. These are seen when the leaf and node > > @@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 > > do_warn( > > _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"), > > (intptr_t)ptr - (intptr_t)d); > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > *dirty = 1; > > } else { > > @@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "), > > */ > > if (junkit) { > > if (!no_modify) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > *dirty = 1; > > do_warn(_("clearing entry\n")); > > diff --git a/repair/phase6.c b/repair/phase6.c > > index 14464bef..5ecbe9b2 100644 > > --- a/repair/phase6.c > > +++ b/repair/phase6.c > > @@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data( > > if (entry_junked( > > _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""), > > fname, ip->i_ino, inum)) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > } > > @@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data( > > if (entry_junked( > > _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64), > > fname, ip->i_ino, inum)) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > } > > @@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data( > > if (entry_junked( > > _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), > > ORPHANAGE, inum, ip->i_ino)) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > } > > @@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data( > > if (entry_junked( > > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), > > fname, inum, ip->i_ino)) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > } > > @@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data( > > if (entry_junked( > > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname, > > inum, ip->i_ino)) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > } > > @@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data( > > if (entry_junked( > > _("entry \"%s\" in dir %" PRIu64 " is not the first entry"), > > fname, inum, ip->i_ino)) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > } > > @@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 " > > orphanage_ino = 0; > > nbad++; > > if (!no_modify) { > > + force_needsrepair(mp); > > dep->name[0] = '/'; > > libxfs_dir2_data_log_entry(&da, bp, dep); > > if (verbose) > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c > > index f607afcb..9dc73854 100644 > > --- a/repair/xfs_repair.c > > +++ b/repair/xfs_repair.c > > @@ -754,6 +754,43 @@ clear_needsrepair( > > libxfs_buf_relse(bp); > > } > > > > +/* > > + * Mark the filesystem as needing repair. This should only be called by code > > + * that deliberately sets invalid sentinel values in the on-disk metadata to > > + * trigger a later reconstruction, and only after we've settled the primary > > + * super contents (i.e. after phase 1). > > + */ > > +void > > +force_needsrepair( > > + struct xfs_mount *mp) > > +{ > > + struct xfs_buf *bp; > > + int error; > > + > > + if (!xfs_sb_version_hascrc(&mp->m_sb) || > > + xfs_sb_version_needsrepair(&mp->m_sb)) > > + return; > > + > > + bp = libxfs_getsb(mp); > > + if (!bp || bp->b_error) { > > + do_log( > > + _("couldn't get superblock to set needsrepair, err=%d\n"), > > + bp ? bp->b_error : ENOMEM); > > + return; > > + } else { > > + mp->m_sb.sb_features_incompat |= > > + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; > > + libxfs_sb_to_disk(bp->b_addr, &mp->m_sb); > > + > > + /* Force the primary super to disk immediately. */ > > + error = -libxfs_bwrite(bp); > > + if (error) > > + do_log(_("couldn't force needsrepair, err=%d\n"), error); > > + } > > + if (bp) > > + libxfs_buf_relse(bp); > > +} > > + > > int > > main(int argc, char **argv) > > { > > >
On Tue, Feb 09, 2021 at 09:13:36AM +0000, Christoph Hellwig wrote: > On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: > > From: Darrick J. Wong <djwong@kernel.org> > > > > There are a few places in xfs_repair's directory checking code where we > > deliberately corrupt a directory entry as a sentinel to trigger a > > correction in later repair phase. In the mean time, the filesystem is > > inconsistent, so set the needsrepair flag to force a re-run of repair if > > the system goes down. > > I guess this is an improvement over what we have no, but I suspect an > in-core side band way to notify the later phases would be much better > than these corrupt sentinel values.. It would be, but we'd still have to track which directory entries are thee ones that we don't want to preserve. We could rewrite the directory entry to point to a plausible inode number that isn't allocated, but that runs the risk that someone will come along and allocate that inode for lost+found and then we're really in a mess. Alternately, I suppose we could rewrite the directory entry to be a DHT_WHITEOUT entry that doesn't look like any that the kernel would produce ... insofar as I'm not even sure what the real ones were supposed to look like, given the weird left turn that overlayfs took on that. :( --D
On Tue, Feb 09, 2021 at 10:35:42AM -0800, Darrick J. Wong wrote: > On Tue, Feb 09, 2021 at 12:20:59PM -0500, Brian Foster wrote: > > On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: > > > From: Darrick J. Wong <djwong@kernel.org> > > > > > > There are a few places in xfs_repair's directory checking code where we > > > deliberately corrupt a directory entry as a sentinel to trigger a > > > correction in later repair phase. In the mean time, the filesystem is > > > inconsistent, so set the needsrepair flag to force a re-run of repair if > > > the system goes down. > > > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > > --- > > > > Hmm.. this seems orthogonal to the rest of the series. I'm sure we can > > come up with various additional uses for the bit, but it seems a little > > odd to me that repair might set it in some cases after a crash but not > > others (if the filesystem happens to already be corrupt, for example). > > <nod> Another option I thought of is to add a hook to the buffer cache > so that the first time anyone tries to bwrite a buffer (either directly > or via a delwri list or normal buffer cache writeback) we'll also set > needsrepair on the ondisk primary super. That would protect us against > other scenarios like crashing after writing a new AGF but before writing > the new AGI, where the fs is left in an indeterminate state. > Yeah, that _seems_ more appropriate to me. It's not immediately clear to me what the implementation should look like, but in general behavior that sets needsrepair on first modification and clears it as a final step sounds more practical. Then the behavior can be easily explained as "once repair starts on an fs, it must be completed before it is allowed to mount." I do think this should be lifted off for a followon series so we can make progress on the feature upgrade bits without growing more requirements and complexity.. Brian > Hmm, maybe I should pursue /that/ instead. > > --D > > > Brian > > > > > repair/agheader.h | 2 ++ > > > repair/dir2.c | 3 +++ > > > repair/phase6.c | 7 +++++++ > > > repair/xfs_repair.c | 37 +++++++++++++++++++++++++++++++++++++ > > > 4 files changed, 49 insertions(+) > > > > > > > > > diff --git a/repair/agheader.h b/repair/agheader.h > > > index a63827c8..fa6fe596 100644 > > > --- a/repair/agheader.h > > > +++ b/repair/agheader.h > > > @@ -82,3 +82,5 @@ typedef struct fs_geo_list { > > > #define XR_AG_AGF 0x2 > > > #define XR_AG_AGI 0x4 > > > #define XR_AG_SB_SEC 0x8 > > > + > > > +void force_needsrepair(struct xfs_mount *mp); > > > diff --git a/repair/dir2.c b/repair/dir2.c > > > index eabdb4f2..922b8a3e 100644 > > > --- a/repair/dir2.c > > > +++ b/repair/dir2.c > > > @@ -15,6 +15,7 @@ > > > #include "da_util.h" > > > #include "prefetch.h" > > > #include "progress.h" > > > +#include "agheader.h" > > > > > > /* > > > * Known bad inode list. These are seen when the leaf and node > > > @@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 > > > do_warn( > > > _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"), > > > (intptr_t)ptr - (intptr_t)d); > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > *dirty = 1; > > > } else { > > > @@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "), > > > */ > > > if (junkit) { > > > if (!no_modify) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > *dirty = 1; > > > do_warn(_("clearing entry\n")); > > > diff --git a/repair/phase6.c b/repair/phase6.c > > > index 14464bef..5ecbe9b2 100644 > > > --- a/repair/phase6.c > > > +++ b/repair/phase6.c > > > @@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data( > > > if (entry_junked( > > > _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""), > > > fname, ip->i_ino, inum)) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > } > > > @@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data( > > > if (entry_junked( > > > _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64), > > > fname, ip->i_ino, inum)) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > } > > > @@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data( > > > if (entry_junked( > > > _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), > > > ORPHANAGE, inum, ip->i_ino)) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > } > > > @@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data( > > > if (entry_junked( > > > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), > > > fname, inum, ip->i_ino)) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > } > > > @@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data( > > > if (entry_junked( > > > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname, > > > inum, ip->i_ino)) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > } > > > @@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data( > > > if (entry_junked( > > > _("entry \"%s\" in dir %" PRIu64 " is not the first entry"), > > > fname, inum, ip->i_ino)) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > } > > > @@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 " > > > orphanage_ino = 0; > > > nbad++; > > > if (!no_modify) { > > > + force_needsrepair(mp); > > > dep->name[0] = '/'; > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > if (verbose) > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c > > > index f607afcb..9dc73854 100644 > > > --- a/repair/xfs_repair.c > > > +++ b/repair/xfs_repair.c > > > @@ -754,6 +754,43 @@ clear_needsrepair( > > > libxfs_buf_relse(bp); > > > } > > > > > > +/* > > > + * Mark the filesystem as needing repair. This should only be called by code > > > + * that deliberately sets invalid sentinel values in the on-disk metadata to > > > + * trigger a later reconstruction, and only after we've settled the primary > > > + * super contents (i.e. after phase 1). > > > + */ > > > +void > > > +force_needsrepair( > > > + struct xfs_mount *mp) > > > +{ > > > + struct xfs_buf *bp; > > > + int error; > > > + > > > + if (!xfs_sb_version_hascrc(&mp->m_sb) || > > > + xfs_sb_version_needsrepair(&mp->m_sb)) > > > + return; > > > + > > > + bp = libxfs_getsb(mp); > > > + if (!bp || bp->b_error) { > > > + do_log( > > > + _("couldn't get superblock to set needsrepair, err=%d\n"), > > > + bp ? bp->b_error : ENOMEM); > > > + return; > > > + } else { > > > + mp->m_sb.sb_features_incompat |= > > > + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; > > > + libxfs_sb_to_disk(bp->b_addr, &mp->m_sb); > > > + > > > + /* Force the primary super to disk immediately. */ > > > + error = -libxfs_bwrite(bp); > > > + if (error) > > > + do_log(_("couldn't force needsrepair, err=%d\n"), error); > > > + } > > > + if (bp) > > > + libxfs_buf_relse(bp); > > > +} > > > + > > > int > > > main(int argc, char **argv) > > > { > > > > > >
On Tue, Feb 09, 2021 at 02:14:22PM -0500, Brian Foster wrote: > On Tue, Feb 09, 2021 at 10:35:42AM -0800, Darrick J. Wong wrote: > > On Tue, Feb 09, 2021 at 12:20:59PM -0500, Brian Foster wrote: > > > On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: > > > > From: Darrick J. Wong <djwong@kernel.org> > > > > > > > > There are a few places in xfs_repair's directory checking code where we > > > > deliberately corrupt a directory entry as a sentinel to trigger a > > > > correction in later repair phase. In the mean time, the filesystem is > > > > inconsistent, so set the needsrepair flag to force a re-run of repair if > > > > the system goes down. > > > > > > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org> > > > > --- > > > > > > Hmm.. this seems orthogonal to the rest of the series. I'm sure we can > > > come up with various additional uses for the bit, but it seems a little > > > odd to me that repair might set it in some cases after a crash but not > > > others (if the filesystem happens to already be corrupt, for example). > > > > <nod> Another option I thought of is to add a hook to the buffer cache > > so that the first time anyone tries to bwrite a buffer (either directly > > or via a delwri list or normal buffer cache writeback) we'll also set > > needsrepair on the ondisk primary super. That would protect us against > > other scenarios like crashing after writing a new AGF but before writing > > the new AGI, where the fs is left in an indeterminate state. > > > > Yeah, that _seems_ more appropriate to me. It's not immediately clear to > me what the implementation should look like, but in general behavior > that sets needsrepair on first modification and clears it as a final > step sounds more practical. Then the behavior can be easily explained as > "once repair starts on an fs, it must be completed before it is allowed > to mount." I do think this should be lifted off for a followon series so > we can make progress on the feature upgrade bits without growing more > requirements and complexity.. Oh, definitely. I'll withdraw this patch for now in the interests of getting everything else going for Eric. :) --D > Brian > > > Hmm, maybe I should pursue /that/ instead. > > > > --D > > > > > Brian > > > > > > > repair/agheader.h | 2 ++ > > > > repair/dir2.c | 3 +++ > > > > repair/phase6.c | 7 +++++++ > > > > repair/xfs_repair.c | 37 +++++++++++++++++++++++++++++++++++++ > > > > 4 files changed, 49 insertions(+) > > > > > > > > > > > > diff --git a/repair/agheader.h b/repair/agheader.h > > > > index a63827c8..fa6fe596 100644 > > > > --- a/repair/agheader.h > > > > +++ b/repair/agheader.h > > > > @@ -82,3 +82,5 @@ typedef struct fs_geo_list { > > > > #define XR_AG_AGF 0x2 > > > > #define XR_AG_AGI 0x4 > > > > #define XR_AG_SB_SEC 0x8 > > > > + > > > > +void force_needsrepair(struct xfs_mount *mp); > > > > diff --git a/repair/dir2.c b/repair/dir2.c > > > > index eabdb4f2..922b8a3e 100644 > > > > --- a/repair/dir2.c > > > > +++ b/repair/dir2.c > > > > @@ -15,6 +15,7 @@ > > > > #include "da_util.h" > > > > #include "prefetch.h" > > > > #include "progress.h" > > > > +#include "agheader.h" > > > > > > > > /* > > > > * Known bad inode list. These are seen when the leaf and node > > > > @@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 > > > > do_warn( > > > > _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"), > > > > (intptr_t)ptr - (intptr_t)d); > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > *dirty = 1; > > > > } else { > > > > @@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "), > > > > */ > > > > if (junkit) { > > > > if (!no_modify) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > *dirty = 1; > > > > do_warn(_("clearing entry\n")); > > > > diff --git a/repair/phase6.c b/repair/phase6.c > > > > index 14464bef..5ecbe9b2 100644 > > > > --- a/repair/phase6.c > > > > +++ b/repair/phase6.c > > > > @@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data( > > > > if (entry_junked( > > > > _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""), > > > > fname, ip->i_ino, inum)) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > } > > > > @@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data( > > > > if (entry_junked( > > > > _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64), > > > > fname, ip->i_ino, inum)) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > } > > > > @@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data( > > > > if (entry_junked( > > > > _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), > > > > ORPHANAGE, inum, ip->i_ino)) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > } > > > > @@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data( > > > > if (entry_junked( > > > > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), > > > > fname, inum, ip->i_ino)) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > } > > > > @@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data( > > > > if (entry_junked( > > > > _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname, > > > > inum, ip->i_ino)) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > } > > > > @@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data( > > > > if (entry_junked( > > > > _("entry \"%s\" in dir %" PRIu64 " is not the first entry"), > > > > fname, inum, ip->i_ino)) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > } > > > > @@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 " > > > > orphanage_ino = 0; > > > > nbad++; > > > > if (!no_modify) { > > > > + force_needsrepair(mp); > > > > dep->name[0] = '/'; > > > > libxfs_dir2_data_log_entry(&da, bp, dep); > > > > if (verbose) > > > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c > > > > index f607afcb..9dc73854 100644 > > > > --- a/repair/xfs_repair.c > > > > +++ b/repair/xfs_repair.c > > > > @@ -754,6 +754,43 @@ clear_needsrepair( > > > > libxfs_buf_relse(bp); > > > > } > > > > > > > > +/* > > > > + * Mark the filesystem as needing repair. This should only be called by code > > > > + * that deliberately sets invalid sentinel values in the on-disk metadata to > > > > + * trigger a later reconstruction, and only after we've settled the primary > > > > + * super contents (i.e. after phase 1). > > > > + */ > > > > +void > > > > +force_needsrepair( > > > > + struct xfs_mount *mp) > > > > +{ > > > > + struct xfs_buf *bp; > > > > + int error; > > > > + > > > > + if (!xfs_sb_version_hascrc(&mp->m_sb) || > > > > + xfs_sb_version_needsrepair(&mp->m_sb)) > > > > + return; > > > > + > > > > + bp = libxfs_getsb(mp); > > > > + if (!bp || bp->b_error) { > > > > + do_log( > > > > + _("couldn't get superblock to set needsrepair, err=%d\n"), > > > > + bp ? bp->b_error : ENOMEM); > > > > + return; > > > > + } else { > > > > + mp->m_sb.sb_features_incompat |= > > > > + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; > > > > + libxfs_sb_to_disk(bp->b_addr, &mp->m_sb); > > > > + > > > > + /* Force the primary super to disk immediately. */ > > > > + error = -libxfs_bwrite(bp); > > > > + if (error) > > > > + do_log(_("couldn't force needsrepair, err=%d\n"), error); > > > > + } > > > > + if (bp) > > > > + libxfs_buf_relse(bp); > > > > +} > > > > + > > > > int > > > > main(int argc, char **argv) > > > > { > > > > > > > > > >
On 2/9/21 1:43 PM, Darrick J. Wong wrote: > On Tue, Feb 09, 2021 at 02:14:22PM -0500, Brian Foster wrote: >> On Tue, Feb 09, 2021 at 10:35:42AM -0800, Darrick J. Wong wrote: >>> On Tue, Feb 09, 2021 at 12:20:59PM -0500, Brian Foster wrote: >>>> On Mon, Feb 08, 2021 at 08:10:44PM -0800, Darrick J. Wong wrote: >>>>> From: Darrick J. Wong <djwong@kernel.org> >>>>> >>>>> There are a few places in xfs_repair's directory checking code where we >>>>> deliberately corrupt a directory entry as a sentinel to trigger a >>>>> correction in later repair phase. In the mean time, the filesystem is >>>>> inconsistent, so set the needsrepair flag to force a re-run of repair if >>>>> the system goes down. >>>>> >>>>> Signed-off-by: Darrick J. Wong <djwong@kernel.org> >>>>> --- >>>> >>>> Hmm.. this seems orthogonal to the rest of the series. I'm sure we can >>>> come up with various additional uses for the bit, but it seems a little >>>> odd to me that repair might set it in some cases after a crash but not >>>> others (if the filesystem happens to already be corrupt, for example). >>> >>> <nod> Another option I thought of is to add a hook to the buffer cache >>> so that the first time anyone tries to bwrite a buffer (either directly >>> or via a delwri list or normal buffer cache writeback) we'll also set >>> needsrepair on the ondisk primary super. That would protect us against >>> other scenarios like crashing after writing a new AGF but before writing >>> the new AGI, where the fs is left in an indeterminate state. >>> >> >> Yeah, that _seems_ more appropriate to me. It's not immediately clear to >> me what the implementation should look like, but in general behavior >> that sets needsrepair on first modification and clears it as a final >> step sounds more practical. Then the behavior can be easily explained as >> "once repair starts on an fs, it must be completed before it is allowed >> to mount." I do think this should be lifted off for a followon series so >> we can make progress on the feature upgrade bits without growing more >> requirements and complexity.. > > Oh, definitely. I'll withdraw this patch for now in the interests of > getting everything else going for Eric. :) Noted, I'll drop this one for now, thanks. -Eric
diff --git a/repair/agheader.h b/repair/agheader.h index a63827c8..fa6fe596 100644 --- a/repair/agheader.h +++ b/repair/agheader.h @@ -82,3 +82,5 @@ typedef struct fs_geo_list { #define XR_AG_AGF 0x2 #define XR_AG_AGI 0x4 #define XR_AG_SB_SEC 0x8 + +void force_needsrepair(struct xfs_mount *mp); diff --git a/repair/dir2.c b/repair/dir2.c index eabdb4f2..922b8a3e 100644 --- a/repair/dir2.c +++ b/repair/dir2.c @@ -15,6 +15,7 @@ #include "da_util.h" #include "prefetch.h" #include "progress.h" +#include "agheader.h" /* * Known bad inode list. These are seen when the leaf and node @@ -774,6 +775,7 @@ _("entry at block %u offset %" PRIdPTR " in directory inode %" PRIu64 do_warn( _("\tclearing inode number in entry at offset %" PRIdPTR "...\n"), (intptr_t)ptr - (intptr_t)d); + force_needsrepair(mp); dep->name[0] = '/'; *dirty = 1; } else { @@ -914,6 +916,7 @@ _("entry \"%*.*s\" in directory inode %" PRIu64 " points to self: "), */ if (junkit) { if (!no_modify) { + force_needsrepair(mp); dep->name[0] = '/'; *dirty = 1; do_warn(_("clearing entry\n")); diff --git a/repair/phase6.c b/repair/phase6.c index 14464bef..5ecbe9b2 100644 --- a/repair/phase6.c +++ b/repair/phase6.c @@ -1649,6 +1649,7 @@ longform_dir2_entry_check_data( if (entry_junked( _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""), fname, ip->i_ino, inum)) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); } @@ -1666,6 +1667,7 @@ longform_dir2_entry_check_data( if (entry_junked( _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64), fname, ip->i_ino, inum)) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); } @@ -1684,6 +1686,7 @@ longform_dir2_entry_check_data( if (entry_junked( _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), ORPHANAGE, inum, ip->i_ino)) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); } @@ -1706,6 +1709,7 @@ longform_dir2_entry_check_data( if (entry_junked( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), fname, inum, ip->i_ino)) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); } @@ -1737,6 +1741,7 @@ longform_dir2_entry_check_data( if (entry_junked( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname, inum, ip->i_ino)) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); } @@ -1764,6 +1769,7 @@ longform_dir2_entry_check_data( if (entry_junked( _("entry \"%s\" in dir %" PRIu64 " is not the first entry"), fname, inum, ip->i_ino)) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); } @@ -1852,6 +1858,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 " orphanage_ino = 0; nbad++; if (!no_modify) { + force_needsrepair(mp); dep->name[0] = '/'; libxfs_dir2_data_log_entry(&da, bp, dep); if (verbose) diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index f607afcb..9dc73854 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -754,6 +754,43 @@ clear_needsrepair( libxfs_buf_relse(bp); } +/* + * Mark the filesystem as needing repair. This should only be called by code + * that deliberately sets invalid sentinel values in the on-disk metadata to + * trigger a later reconstruction, and only after we've settled the primary + * super contents (i.e. after phase 1). + */ +void +force_needsrepair( + struct xfs_mount *mp) +{ + struct xfs_buf *bp; + int error; + + if (!xfs_sb_version_hascrc(&mp->m_sb) || + xfs_sb_version_needsrepair(&mp->m_sb)) + return; + + bp = libxfs_getsb(mp); + if (!bp || bp->b_error) { + do_log( + _("couldn't get superblock to set needsrepair, err=%d\n"), + bp ? bp->b_error : ENOMEM); + return; + } else { + mp->m_sb.sb_features_incompat |= + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; + libxfs_sb_to_disk(bp->b_addr, &mp->m_sb); + + /* Force the primary super to disk immediately. */ + error = -libxfs_bwrite(bp); + if (error) + do_log(_("couldn't force needsrepair, err=%d\n"), error); + } + if (bp) + libxfs_buf_relse(bp); +} + int main(int argc, char **argv) {