diff mbox series

xfs: use log_incompat feature instead of speculate matching

Message ID 20200825100601.2529-1-hsiangkao@redhat.com (mailing list archive)
State New, archived
Headers show
Series xfs: use log_incompat feature instead of speculate matching | expand

Commit Message

Gao Xiang Aug. 25, 2020, 10:06 a.m. UTC
Add a log_incompat (v5) or sb_features2 (v4) feature
of a single long iunlinked list just to be safe. Hence,
older kernels will refuse to replay log for v5 images
or mount entirely for v4 images.

If the current mount is in RO state, it will defer
to the next RW (re)mount to add such flag instead.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
Different combinations have been tested (v4/v5 and before/after patch).

Based on the top of
`[PATCH 13/13] xfs: reorder iunlink remove operation in xfs_ifree`
https://lore.kernel.org/r/20200812092556.2567285-14-david@fromorbit.com

Either folding or rearranging this patch would be okay.

Maybe xfsprogs could be also patched as well to change the default
feature setting, but let me send out this first...

(It's possible that I'm still missing something...
 Kindly point out any time.)

 fs/xfs/libxfs/xfs_format.h | 29 +++++++++++++++++++++++++++--
 fs/xfs/xfs_inode.c         |  2 +-
 fs/xfs/xfs_mount.c         |  6 ++++++
 3 files changed, 34 insertions(+), 3 deletions(-)

Comments

Darrick J. Wong Aug. 25, 2020, 2:54 p.m. UTC | #1
On Tue, Aug 25, 2020 at 06:06:01PM +0800, Gao Xiang wrote:
> Add a log_incompat (v5) or sb_features2 (v4) feature
> of a single long iunlinked list just to be safe. Hence,
> older kernels will refuse to replay log for v5 images
> or mount entirely for v4 images.
> 
> If the current mount is in RO state, it will defer
> to the next RW (re)mount to add such flag instead.

This commit log needs to state /why/ we need a new feature flag in
addition to summarizing what is being added here.  For example,

"Introduce a new feature flag to collapse the unlinked hash to a single
bucket.  Doing so removes the need to lock the AGI in addition to the
previous and next items in the unlinked list.  Older kernels will think
that inodes are in the wrong unlinked hash bucket and declare the fs
corrupt, so the new feature is needed to prevent them from touching the
filesystem."

(or whatever the real reason is, I'm attending DebConf and LPC and
wasn't following 100%...)

Note that the above was a guess, because I actually can't tell if this
feature is needed to prevent old kernels from tripping over our new
strategy, or to prevent new kernels from running off the road if an old
kernel wrote all the hash buckets.  I would've thought both cases would
be fine...?

> Suggested-by: Christoph Hellwig <hch@infradead.org>
> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
> ---
> Different combinations have been tested (v4/v5 and before/after patch).
> 
> Based on the top of
> `[PATCH 13/13] xfs: reorder iunlink remove operation in xfs_ifree`
> https://lore.kernel.org/r/20200812092556.2567285-14-david@fromorbit.com
> 
> Either folding or rearranging this patch would be okay.
> 
> Maybe xfsprogs could be also patched as well to change the default
> feature setting, but let me send out this first...
> 
> (It's possible that I'm still missing something...
>  Kindly point out any time.)
> 
>  fs/xfs/libxfs/xfs_format.h | 29 +++++++++++++++++++++++++++--
>  fs/xfs/xfs_inode.c         |  2 +-
>  fs/xfs/xfs_mount.c         |  6 ++++++
>  3 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index 31b7ece985bb..a859fe601f6e 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -79,12 +79,14 @@ struct xfs_ifork;
>  #define XFS_SB_VERSION2_PROJID32BIT	0x00000080	/* 32 bit project id */
>  #define XFS_SB_VERSION2_CRCBIT		0x00000100	/* metadata CRCs */
>  #define XFS_SB_VERSION2_FTYPE		0x00000200	/* inode type in dir */
> +#define XFS_SB_VERSION2_NEW_IUNLINK	0x00000400	/* (v4) new iunlink */
>  
>  #define	XFS_SB_VERSION2_OKBITS		\
>  	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
>  	 XFS_SB_VERSION2_ATTR2BIT	| \
>  	 XFS_SB_VERSION2_PROJID32BIT	| \
> -	 XFS_SB_VERSION2_FTYPE)
> +	 XFS_SB_VERSION2_FTYPE		| \
> +	 XFS_SB_VERSION2_NEW_IUNLINK)

NAK on this part; as I said earlier, don't add things to V4 filesystems.

If the rest of you have compelling reasons to want V4 support, now is
the time to speak up.

>  /* Maximum size of the xfs filesystem label, no terminating NULL */
>  #define XFSLABEL_MAX			12
> @@ -479,7 +481,9 @@ xfs_sb_has_incompat_feature(
>  	return (sbp->sb_features_incompat & feature) != 0;
>  }
>  
> -#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
> +#define XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK	(1 << 0)
> +#define XFS_SB_FEAT_INCOMPAT_LOG_ALL	\
> +		(XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK)

There's a trick here: Define the feature flag at the very start of your
patchset, then make the last patch in the set add it to the _ALL macro
so that people bisecting their way through the git tree (with this
feature turned on) won't unwittingly build a kernel with the feature
half built and blow their filesystem to pieces.

>  #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
>  static inline bool
>  xfs_sb_has_incompat_log_feature(
> @@ -563,6 +567,27 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
>  		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
>  }
>  
> +static inline bool xfs_sb_has_new_iunlink(struct xfs_sb *sbp)
> +{
> +	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
> +		return sbp->sb_features_log_incompat &
> +			XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK;
> +
> +	return xfs_sb_version_hasmorebits(sbp) &&
> +		(sbp->sb_features2 & XFS_SB_VERSION2_NEW_IUNLINK);
> +}
> +
> +static inline void xfs_sb_add_new_iunlink(struct xfs_sb *sbp)
> +{
> +	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
> +		sbp->sb_features_log_incompat |=
> +			XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK;
> +		return;
> +	}
> +	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
> +	sbp->sb_features2 |= XFS_SB_VERSION2_NEW_IUNLINK;

All metadata updates need to be logged.  Dave just spent a bunch of time
heckling me for that in the y2038 patchset. ;)

Also, I don't think it's a good idea to enable new incompat features
automatically, since this makes the fs unmountable on old kernels.

> +}
> +
>  /*
>   * end of superblock version macros
>   */
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 7ee778bcde06..1656ed7dcadf 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -1952,7 +1952,7 @@ xfs_iunlink_update_bucket(
>  	if (!log || log->l_flags & XLOG_RECOVERY_NEEDED) {
>  		ASSERT(cur_agino != NULLAGINO);
>  
> -		if (be32_to_cpu(agi->agi_unlinked[0]) != cur_agino)
> +		if (!xfs_sb_has_new_iunlink(&mp->m_sb))
>  			bucket_index = cur_agino % XFS_AGI_UNLINKED_BUCKETS;

Oh, is this the one change added by the feature? :)

>  	}
>  
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index f28c969af272..a3b2e3c3d32f 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -836,6 +836,12 @@ xfs_mountfs(
>  		goto out_fail_wait;
>  	}
>  
> +	if (!xfs_sb_has_new_iunlink(sbp)) {
> +		xfs_warn(mp, "will switch to long iunlinked list on r/w");
> +		xfs_sb_add_new_iunlink(sbp);
> +		mp->m_update_sb = true;
> +	}
> +
>  	/* Make sure the summary counts are ok. */
>  	error = xfs_check_summary_counts(mp);
>  	if (error)
> -- 
> 2.18.1
>
Gao Xiang Aug. 25, 2020, 3:30 p.m. UTC | #2
Hi Darrick,

On Tue, Aug 25, 2020 at 07:54:58AM -0700, Darrick J. Wong wrote:
> On Tue, Aug 25, 2020 at 06:06:01PM +0800, Gao Xiang wrote:
> > Add a log_incompat (v5) or sb_features2 (v4) feature
> > of a single long iunlinked list just to be safe. Hence,
> > older kernels will refuse to replay log for v5 images
> > or mount entirely for v4 images.
> > 
> > If the current mount is in RO state, it will defer
> > to the next RW (re)mount to add such flag instead.
> 
> This commit log needs to state /why/ we need a new feature flag in
> addition to summarizing what is being added here.  For example,
> 
> "Introduce a new feature flag to collapse the unlinked hash to a single
> bucket.  Doing so removes the need to lock the AGI in addition to the
> previous and next items in the unlinked list.  Older kernels will think
> that inodes are in the wrong unlinked hash bucket and declare the fs
> corrupt, so the new feature is needed to prevent them from touching the
> filesystem."
> 
> (or whatever the real reason is, I'm attending DebConf and LPC and
> wasn't following 100%...)
> 
> Note that the above was a guess, because I actually can't tell if this
> feature is needed to prevent old kernels from tripping over our new
> strategy, or to prevent new kernels from running off the road if an old
> kernel wrote all the hash buckets.  I would've thought both cases would
> be fine...?

To prevent old kernels from tripping over our new strategy.

Images generated by old kernels would be fine.

> 

...

> >  #define	XFS_SB_VERSION2_OKBITS		\
> >  	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
> >  	 XFS_SB_VERSION2_ATTR2BIT	| \
> >  	 XFS_SB_VERSION2_PROJID32BIT	| \
> > -	 XFS_SB_VERSION2_FTYPE)
> > +	 XFS_SB_VERSION2_FTYPE		| \
> > +	 XFS_SB_VERSION2_NEW_IUNLINK)
> 
> NAK on this part; as I said earlier, don't add things to V4 filesystems.
> 
> If the rest of you have compelling reasons to want V4 support, now is
> the time to speak up.

The simple reason is that the current xfs_iunlink() code only generates
unlinked list in the new way but no multiple buckets. So, we must have
a choice for V4 since it's still supported by the current kernel:

 1) add some feature to entirely refuse new v4 images on older kernels;
 2) allow speculate matching so older kernel would bail out as fs corruption
    (but I have no idea if it has any harm);  

> 
> >  /* Maximum size of the xfs filesystem label, no terminating NULL */
> >  #define XFSLABEL_MAX			12
> > @@ -479,7 +481,9 @@ xfs_sb_has_incompat_feature(
> >  	return (sbp->sb_features_incompat & feature) != 0;
> >  }
> >  
> > -#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
> > +#define XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK	(1 << 0)
> > +#define XFS_SB_FEAT_INCOMPAT_LOG_ALL	\
> > +		(XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK)
> 
> There's a trick here: Define the feature flag at the very start of your
> patchset, then make the last patch in the set add it to the _ALL macro
> so that people bisecting their way through the git tree (with this
> feature turned on) won't unwittingly build a kernel with the feature
> half built and blow their filesystem to pieces.

hmmm... not quite get the point though.
For this specific patch, I think it'll be folded into some patch or
rearranged.

It should not be a followed-up patch (we must do some decision in advance
 -- whether or not add this feature).

> 
> >  #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
> >  static inline bool
> >  xfs_sb_has_incompat_log_feature(
> > @@ -563,6 +567,27 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
> >  		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
> >  }
> >  
> > +static inline bool xfs_sb_has_new_iunlink(struct xfs_sb *sbp)
> > +{
> > +	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
> > +		return sbp->sb_features_log_incompat &
> > +			XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK;
> > +
> > +	return xfs_sb_version_hasmorebits(sbp) &&
> > +		(sbp->sb_features2 & XFS_SB_VERSION2_NEW_IUNLINK);
> > +}
> > +
> > +static inline void xfs_sb_add_new_iunlink(struct xfs_sb *sbp)
> > +{
> > +	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
> > +		sbp->sb_features_log_incompat |=
> > +			XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK;
> > +		return;
> > +	}
> > +	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
> > +	sbp->sb_features2 |= XFS_SB_VERSION2_NEW_IUNLINK;
> 
> All metadata updates need to be logged.  Dave just spent a bunch of time
> heckling me for that in the y2038 patchset. ;)

hmmm... xfs_sync_sb in xfs_mountfs() will generate a sb transaction,
right? I don't get the risk here.

> 
> Also, I don't think it's a good idea to enable new incompat features
> automatically, since this makes the fs unmountable on old kernels.

As I said above, new xfs_iunlink() doesn't support multiple buckets
anymore (just support it for log recovery). So this feature would be
needed.

If supporting old multiple buckets xfs_iunlink() is needed, that's
a quite large modification of this entire patchset.

Thanks,
Gao Xiang
Christoph Hellwig Aug. 27, 2020, 7:19 a.m. UTC | #3
On Tue, Aug 25, 2020 at 07:54:58AM -0700, Darrick J. Wong wrote:
> > +	 XFS_SB_VERSION2_FTYPE		| \
> > +	 XFS_SB_VERSION2_NEW_IUNLINK)
> 
> NAK on this part; as I said earlier, don't add things to V4 filesystems.
> 
> If the rest of you have compelling reasons to want V4 support, now is
> the time to speak up.

I think that it because the series uses the longer unhashed chains
unconditionally.  And given that old kernels can't deal with that
at all I suspect that needs to be changed and support the old buckets
for old file systems.  And once that is done I don't think we should
enable the long single chain without the log item anyway, and remove
another possibly combination of flags.
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 31b7ece985bb..a859fe601f6e 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -79,12 +79,14 @@  struct xfs_ifork;
 #define XFS_SB_VERSION2_PROJID32BIT	0x00000080	/* 32 bit project id */
 #define XFS_SB_VERSION2_CRCBIT		0x00000100	/* metadata CRCs */
 #define XFS_SB_VERSION2_FTYPE		0x00000200	/* inode type in dir */
+#define XFS_SB_VERSION2_NEW_IUNLINK	0x00000400	/* (v4) new iunlink */
 
 #define	XFS_SB_VERSION2_OKBITS		\
 	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
 	 XFS_SB_VERSION2_ATTR2BIT	| \
 	 XFS_SB_VERSION2_PROJID32BIT	| \
-	 XFS_SB_VERSION2_FTYPE)
+	 XFS_SB_VERSION2_FTYPE		| \
+	 XFS_SB_VERSION2_NEW_IUNLINK)
 
 /* Maximum size of the xfs filesystem label, no terminating NULL */
 #define XFSLABEL_MAX			12
@@ -479,7 +481,9 @@  xfs_sb_has_incompat_feature(
 	return (sbp->sb_features_incompat & feature) != 0;
 }
 
-#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
+#define XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK	(1 << 0)
+#define XFS_SB_FEAT_INCOMPAT_LOG_ALL	\
+		(XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK)
 #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
 static inline bool
 xfs_sb_has_incompat_log_feature(
@@ -563,6 +567,27 @@  static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
 }
 
+static inline bool xfs_sb_has_new_iunlink(struct xfs_sb *sbp)
+{
+	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
+		return sbp->sb_features_log_incompat &
+			XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK;
+
+	return xfs_sb_version_hasmorebits(sbp) &&
+		(sbp->sb_features2 & XFS_SB_VERSION2_NEW_IUNLINK);
+}
+
+static inline void xfs_sb_add_new_iunlink(struct xfs_sb *sbp)
+{
+	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
+		sbp->sb_features_log_incompat |=
+			XFS_SB_FEAT_INCOMPAT_LOG_NEW_IUNLINK;
+		return;
+	}
+	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
+	sbp->sb_features2 |= XFS_SB_VERSION2_NEW_IUNLINK;
+}
+
 /*
  * end of superblock version macros
  */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7ee778bcde06..1656ed7dcadf 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1952,7 +1952,7 @@  xfs_iunlink_update_bucket(
 	if (!log || log->l_flags & XLOG_RECOVERY_NEEDED) {
 		ASSERT(cur_agino != NULLAGINO);
 
-		if (be32_to_cpu(agi->agi_unlinked[0]) != cur_agino)
+		if (!xfs_sb_has_new_iunlink(&mp->m_sb))
 			bucket_index = cur_agino % XFS_AGI_UNLINKED_BUCKETS;
 	}
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f28c969af272..a3b2e3c3d32f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -836,6 +836,12 @@  xfs_mountfs(
 		goto out_fail_wait;
 	}
 
+	if (!xfs_sb_has_new_iunlink(sbp)) {
+		xfs_warn(mp, "will switch to long iunlinked list on r/w");
+		xfs_sb_add_new_iunlink(sbp);
+		mp->m_update_sb = true;
+	}
+
 	/* Make sure the summary counts are ok. */
 	error = xfs_check_summary_counts(mp);
 	if (error)