diff mbox series

xfs: stop advertising SB_I_VERSION

Message ID 20240228042859.841623-1-david@fromorbit.com (mailing list archive)
State New
Headers show
Series xfs: stop advertising SB_I_VERSION | expand

Commit Message

Dave Chinner Feb. 28, 2024, 4:28 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

The redefinition of how NFS wants inode->i_version to be updated is
incomaptible with the XFS i_version mechanism. The VFS now wants
inode->i_version to only change when ctime changes (i.e. it has
become a ctime change counter, not an inode change counter). XFS has
fine grained timestamps, so it can just use ctime for the NFS change
cookie like it still does for V4 XFS filesystems.

We still want XFS to update the inode change counter as it currently
does, so convert all the code that checks SB_I_VERSION to check for
v5 format support. Then we can remove the SB_I_VERSION flag from the
VFS superblock to indicate that inode->i_version is not a valid
change counter and should not be used as such.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_trans_inode.c | 15 +++++----------
 fs/xfs/xfs_iops.c               | 16 +++-------------
 fs/xfs/xfs_super.c              |  8 --------
 3 files changed, 8 insertions(+), 31 deletions(-)

Comments

Darrick J. Wong Feb. 28, 2024, 4:08 p.m. UTC | #1
On Wed, Feb 28, 2024 at 03:28:59PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The redefinition of how NFS wants inode->i_version to be updated is
> incomaptible with the XFS i_version mechanism. The VFS now wants
> inode->i_version to only change when ctime changes (i.e. it has
> become a ctime change counter, not an inode change counter). XFS has
> fine grained timestamps, so it can just use ctime for the NFS change
> cookie like it still does for V4 XFS filesystems.
> 
> We still want XFS to update the inode change counter as it currently
> does, so convert all the code that checks SB_I_VERSION to check for
> v5 format support. Then we can remove the SB_I_VERSION flag from the
> VFS superblock to indicate that inode->i_version is not a valid
> change counter and should not be used as such.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Seeing as NFS and XFS' definition of i_version have diverged, I suppose
divorce is the only option.  But please, let's get rid of all the
*iversion() calls in the codebase.

With my paranoia hat on: let's add an i_changecounter to xfs_inode and
completely stop using the inode.i_version to prevent the vfs from
messing with us.

At some point we can rev the ondisk format to add a new field so that
"di_version" can be whatever u64 cookie the vfs passes us through
i_version.

--D

> ---
>  fs/xfs/libxfs/xfs_trans_inode.c | 15 +++++----------
>  fs/xfs/xfs_iops.c               | 16 +++-------------
>  fs/xfs/xfs_super.c              |  8 --------
>  3 files changed, 8 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> index 70e97ea6eee7..8071aefad728 100644
> --- a/fs/xfs/libxfs/xfs_trans_inode.c
> +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> @@ -97,17 +97,12 @@ xfs_trans_log_inode(
>  
>  	/*
>  	 * First time we log the inode in a transaction, bump the inode change
> -	 * counter if it is configured for this to occur. While we have the
> -	 * inode locked exclusively for metadata modification, we can usually
> -	 * avoid setting XFS_ILOG_CORE if no one has queried the value since
> -	 * the last time it was incremented. If we have XFS_ILOG_CORE already
> -	 * set however, then go ahead and bump the i_version counter
> -	 * unconditionally.
> +	 * counter if it is configured for this to occur.
>  	 */
> -	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
> -		if (IS_I_VERSION(inode) &&
> -		    inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
> -			flags |= XFS_ILOG_IVERSION;
> +	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags) &&
> +	    xfs_has_crc(ip->i_mount)) {
> +		inode->i_version++;
> +		flags |= XFS_ILOG_IVERSION;
>  	}
>  
>  	iip->ili_dirty_flags |= flags;
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index be102fd49560..97e792d9d79a 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -584,11 +584,6 @@ xfs_vn_getattr(
>  		}
>  	}
>  
> -	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
> -		stat->change_cookie = inode_query_iversion(inode);
> -		stat->result_mask |= STATX_CHANGE_COOKIE;
> -	}
> -
>  	/*
>  	 * Note: If you add another clause to set an attribute flag, please
>  	 * update attributes_mask below.
> @@ -1044,16 +1039,11 @@ xfs_vn_update_time(
>  	struct timespec64	now;
>  
>  	trace_xfs_update_time(ip);
> +	ASSERT(!(flags & S_VERSION));
>  
>  	if (inode->i_sb->s_flags & SB_LAZYTIME) {
> -		if (!((flags & S_VERSION) &&
> -		      inode_maybe_inc_iversion(inode, false))) {
> -			generic_update_time(inode, flags);
> -			return 0;
> -		}
> -
> -		/* Capture the iversion update that just occurred */
> -		log_flags |= XFS_ILOG_CORE;
> +		generic_update_time(inode, flags);
> +		return 0;
>  	}
>  
>  	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 6ce1e6deb7ec..657ce0423f1d 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -1692,10 +1692,6 @@ xfs_fs_fill_super(
>  
>  	set_posix_acl_flag(sb);
>  
> -	/* version 5 superblocks support inode version counters. */
> -	if (xfs_has_crc(mp))
> -		sb->s_flags |= SB_I_VERSION;
> -
>  	if (xfs_has_dax_always(mp)) {
>  		error = xfs_setup_dax_always(mp);
>  		if (error)
> @@ -1917,10 +1913,6 @@ xfs_fs_reconfigure(
>  	int			flags = fc->sb_flags;
>  	int			error;
>  
> -	/* version 5 superblocks always support version counters. */
> -	if (xfs_has_crc(mp))
> -		fc->sb_flags |= SB_I_VERSION;
> -
>  	error = xfs_fs_validate_params(new_mp);
>  	if (error)
>  		return error;
> -- 
> 2.43.0
> 
>
Dave Chinner Feb. 28, 2024, 11:55 p.m. UTC | #2
On Wed, Feb 28, 2024 at 08:08:48AM -0800, Darrick J. Wong wrote:
> On Wed, Feb 28, 2024 at 03:28:59PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > The redefinition of how NFS wants inode->i_version to be updated is
> > incomaptible with the XFS i_version mechanism. The VFS now wants
> > inode->i_version to only change when ctime changes (i.e. it has
> > become a ctime change counter, not an inode change counter). XFS has
> > fine grained timestamps, so it can just use ctime for the NFS change
> > cookie like it still does for V4 XFS filesystems.
> > 
> > We still want XFS to update the inode change counter as it currently
> > does, so convert all the code that checks SB_I_VERSION to check for
> > v5 format support. Then we can remove the SB_I_VERSION flag from the
> > VFS superblock to indicate that inode->i_version is not a valid
> > change counter and should not be used as such.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> 
> Seeing as NFS and XFS' definition of i_version have diverged, I suppose
> divorce is the only option.  But please, let's get rid of all the
> *iversion() calls in the codebase.
> 
> With my paranoia hat on: let's add an i_changecounter to xfs_inode and
> completely stop using the inode.i_version to prevent the vfs from
> messing with us.

Ok, I'll do that in a new patch rather than try to do everything in
a single complicated patch.

Cheers,

Dave.
Darrick J. Wong Feb. 29, 2024, 12:09 a.m. UTC | #3
On Thu, Feb 29, 2024 at 10:55:22AM +1100, Dave Chinner wrote:
> On Wed, Feb 28, 2024 at 08:08:48AM -0800, Darrick J. Wong wrote:
> > On Wed, Feb 28, 2024 at 03:28:59PM +1100, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > The redefinition of how NFS wants inode->i_version to be updated is
> > > incomaptible with the XFS i_version mechanism. The VFS now wants
> > > inode->i_version to only change when ctime changes (i.e. it has
> > > become a ctime change counter, not an inode change counter). XFS has
> > > fine grained timestamps, so it can just use ctime for the NFS change
> > > cookie like it still does for V4 XFS filesystems.
> > > 
> > > We still want XFS to update the inode change counter as it currently
> > > does, so convert all the code that checks SB_I_VERSION to check for
> > > v5 format support. Then we can remove the SB_I_VERSION flag from the
> > > VFS superblock to indicate that inode->i_version is not a valid
> > > change counter and should not be used as such.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > 
> > Seeing as NFS and XFS' definition of i_version have diverged, I suppose
> > divorce is the only option.  But please, let's get rid of all the
> > *iversion() calls in the codebase.
> > 
> > With my paranoia hat on: let's add an i_changecounter to xfs_inode and
> > completely stop using the inode.i_version to prevent the vfs from
> > messing with us.
> 
> Ok, I'll do that in a new patch rather than try to do everything in
> a single complicated patch.

Sounds good!

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Jeffrey Layton March 1, 2024, 1:42 p.m. UTC | #4
On Wed, 2024-02-28 at 15:28 +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The redefinition of how NFS wants inode->i_version to be updated is
> incomaptible with the XFS i_version mechanism. The VFS now wants
> inode->i_version to only change when ctime changes (i.e. it has
> become a ctime change counter, not an inode change counter). XFS has
> fine grained timestamps, so it can just use ctime for the NFS change
> cookie like it still does for V4 XFS filesystems.
> 

Are you saying that XFS has timestamp granularity finer than
current_time() reports? I thought XFS used the same clocksource as
everyone else.

At LPC, you mentioned you had some patches in progress to use the unused
bits in the tv_nsec field as a change counter to track changes that
occurred within the same timer tick.

Did that not pan out for some reason? I'd like to understand why if so.
It sounded like a reasonable solution to the problem.

> 
> We still want XFS to update the inode change counter as it currently
> does, so convert all the code that checks SB_I_VERSION to check for
> v5 format support. Then we can remove the SB_I_VERSION flag from the
> VFS superblock to indicate that inode->i_version is not a valid
> change counter and should not be used as such.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/libxfs/xfs_trans_inode.c | 15 +++++----------
>  fs/xfs/xfs_iops.c               | 16 +++-------------
>  fs/xfs/xfs_super.c              |  8 --------
>  3 files changed, 8 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> index 70e97ea6eee7..8071aefad728 100644
> --- a/fs/xfs/libxfs/xfs_trans_inode.c
> +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> @@ -97,17 +97,12 @@ xfs_trans_log_inode(
>  
>  	/*
>  	 * First time we log the inode in a transaction, bump the inode change
> -	 * counter if it is configured for this to occur. While we have the
> -	 * inode locked exclusively for metadata modification, we can usually
> -	 * avoid setting XFS_ILOG_CORE if no one has queried the value since
> -	 * the last time it was incremented. If we have XFS_ILOG_CORE already
> -	 * set however, then go ahead and bump the i_version counter
> -	 * unconditionally.
> +	 * counter if it is configured for this to occur.
>  	 */
> -	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
> -		if (IS_I_VERSION(inode) &&
> -		    inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
> -			flags |= XFS_ILOG_IVERSION;
> +	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags) &&
> +	    xfs_has_crc(ip->i_mount)) {
> +		inode->i_version++;
> +		flags |= XFS_ILOG_IVERSION;
>  	}
>  
>  	iip->ili_dirty_flags |= flags;
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index be102fd49560..97e792d9d79a 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -584,11 +584,6 @@ xfs_vn_getattr(
>  		}
>  	}
>  
> -	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
> -		stat->change_cookie = inode_query_iversion(inode);
> -		stat->result_mask |= STATX_CHANGE_COOKIE;
> -	}
> -
>  	/*
>  	 * Note: If you add another clause to set an attribute flag, please
>  	 * update attributes_mask below.
> @@ -1044,16 +1039,11 @@ xfs_vn_update_time(
>  	struct timespec64	now;
>  
>  	trace_xfs_update_time(ip);
> +	ASSERT(!(flags & S_VERSION));
>  
>  	if (inode->i_sb->s_flags & SB_LAZYTIME) {
> -		if (!((flags & S_VERSION) &&
> -		      inode_maybe_inc_iversion(inode, false))) {
> -			generic_update_time(inode, flags);
> -			return 0;
> -		}
> -
> -		/* Capture the iversion update that just occurred */
> -		log_flags |= XFS_ILOG_CORE;
> +		generic_update_time(inode, flags);
> +		return 0;
>  	}
>  
>  	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 6ce1e6deb7ec..657ce0423f1d 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -1692,10 +1692,6 @@ xfs_fs_fill_super(
>  
>  	set_posix_acl_flag(sb);
>  
> -	/* version 5 superblocks support inode version counters. */
> -	if (xfs_has_crc(mp))
> -		sb->s_flags |= SB_I_VERSION;
> -
>  	if (xfs_has_dax_always(mp)) {
>  		error = xfs_setup_dax_always(mp);
>  		if (error)
> @@ -1917,10 +1913,6 @@ xfs_fs_reconfigure(
>  	int			flags = fc->sb_flags;
>  	int			error;
>  
> -	/* version 5 superblocks always support version counters. */
> -	if (xfs_has_crc(mp))
> -		fc->sb_flags |= SB_I_VERSION;
> -
>  	error = xfs_fs_validate_params(new_mp);
>  	if (error)
>  		return error;

Acked-by: Jeff Layton <jlayton@kernel.org>
Dave Chinner March 3, 2024, 11:35 p.m. UTC | #5
On Fri, Mar 01, 2024 at 08:42:17AM -0500, Jeff Layton wrote:
> On Wed, 2024-02-28 at 15:28 +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > The redefinition of how NFS wants inode->i_version to be updated is
> > incomaptible with the XFS i_version mechanism. The VFS now wants
> > inode->i_version to only change when ctime changes (i.e. it has
> > become a ctime change counter, not an inode change counter). XFS has
> > fine grained timestamps, so it can just use ctime for the NFS change
> > cookie like it still does for V4 XFS filesystems.
> > 
> 
> Are you saying that XFS has timestamp granularity finer than
> current_time() reports?

No.

> I thought XFS used the same clocksource as
> everyone else.

It does.

> At LPC, you mentioned you had some patches in progress to use the unused
> bits in the tv_nsec field as a change counter to track changes that
> occurred within the same timer tick.

Still a possibility, but I wasn't going to do anything in that
direction because it still seemed like you were still trying to make
progress down the path of generic timestamp granularity
improvements.

> Did that not pan out for some reason? I'd like to understand why if so.
> It sounded like a reasonable solution to the problem.

Time. And the fact that ctime granularity isn't SB_I_VERSION at all,
so whilst we might support statx change cookies in the future, that
will not be via a SB_I_VERSION mechanism.

i.e. statx doesn't require us to support SB_I_VERSION for the change
cookies, so until we are in a position to present a higher
resolution change cookie via ctime we're just going to remove
support for both.

> Acked-by: Jeff Layton <jlayton@kernel.org>

Thanks!

-Dave.
Jeffrey Layton March 4, 2024, 12:45 a.m. UTC | #6
On Mon, 2024-03-04 at 10:35 +1100, Dave Chinner wrote:
> On Fri, Mar 01, 2024 at 08:42:17AM -0500, Jeff Layton wrote:
> > On Wed, 2024-02-28 at 15:28 +1100, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > The redefinition of how NFS wants inode->i_version to be updated is
> > > incomaptible with the XFS i_version mechanism. The VFS now wants
> > > inode->i_version to only change when ctime changes (i.e. it has
> > > become a ctime change counter, not an inode change counter). XFS has
> > > fine grained timestamps, so it can just use ctime for the NFS change
> > > cookie like it still does for V4 XFS filesystems.
> > > 
> > 
> > Are you saying that XFS has timestamp granularity finer than
> > current_time() reports?
> 
> No.
> 
> > I thought XFS used the same clocksource as
> > everyone else.
> 
> It does.
> 
> > At LPC, you mentioned you had some patches in progress to use the unused
> > bits in the tv_nsec field as a change counter to track changes that
> > occurred within the same timer tick.
> 
> Still a possibility, but I wasn't going to do anything in that
> direction because it still seemed like you were still trying to make
> progress down the path of generic timestamp granularity
> improvements.
> 

Ok, good. I'm glad to hear that that approach isn't unworkable (yet!).

As far as timestamp granularity goes, Linus made it clear that he was
not on board with any approach that would even more complexity than the
original multigrain ctime patches. That pretty much ended my work in
that area.


> > Did that not pan out for some reason? I'd like to understand why if so.
> > It sounded like a reasonable solution to the problem.
> 
> Time. And the fact that ctime granularity isn't SB_I_VERSION at all,
> so whilst we might support statx change cookies in the future, that
> will not be via a SB_I_VERSION mechanism.
>
> i.e. statx doesn't require us to support SB_I_VERSION for the change
> cookies, so until we are in a position to present a higher
> resolution change cookie via ctime we're just going to remove
> support for both.
>

Yeah, that makes sense.

> > Acked-by: Jeff Layton <jlayton@kernel.org>
> 
> Thanks!
> 
> -Dave.
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 70e97ea6eee7..8071aefad728 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -97,17 +97,12 @@  xfs_trans_log_inode(
 
 	/*
 	 * First time we log the inode in a transaction, bump the inode change
-	 * counter if it is configured for this to occur. While we have the
-	 * inode locked exclusively for metadata modification, we can usually
-	 * avoid setting XFS_ILOG_CORE if no one has queried the value since
-	 * the last time it was incremented. If we have XFS_ILOG_CORE already
-	 * set however, then go ahead and bump the i_version counter
-	 * unconditionally.
+	 * counter if it is configured for this to occur.
 	 */
-	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
-		if (IS_I_VERSION(inode) &&
-		    inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
-			flags |= XFS_ILOG_IVERSION;
+	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags) &&
+	    xfs_has_crc(ip->i_mount)) {
+		inode->i_version++;
+		flags |= XFS_ILOG_IVERSION;
 	}
 
 	iip->ili_dirty_flags |= flags;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index be102fd49560..97e792d9d79a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -584,11 +584,6 @@  xfs_vn_getattr(
 		}
 	}
 
-	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
-		stat->change_cookie = inode_query_iversion(inode);
-		stat->result_mask |= STATX_CHANGE_COOKIE;
-	}
-
 	/*
 	 * Note: If you add another clause to set an attribute flag, please
 	 * update attributes_mask below.
@@ -1044,16 +1039,11 @@  xfs_vn_update_time(
 	struct timespec64	now;
 
 	trace_xfs_update_time(ip);
+	ASSERT(!(flags & S_VERSION));
 
 	if (inode->i_sb->s_flags & SB_LAZYTIME) {
-		if (!((flags & S_VERSION) &&
-		      inode_maybe_inc_iversion(inode, false))) {
-			generic_update_time(inode, flags);
-			return 0;
-		}
-
-		/* Capture the iversion update that just occurred */
-		log_flags |= XFS_ILOG_CORE;
+		generic_update_time(inode, flags);
+		return 0;
 	}
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 6ce1e6deb7ec..657ce0423f1d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1692,10 +1692,6 @@  xfs_fs_fill_super(
 
 	set_posix_acl_flag(sb);
 
-	/* version 5 superblocks support inode version counters. */
-	if (xfs_has_crc(mp))
-		sb->s_flags |= SB_I_VERSION;
-
 	if (xfs_has_dax_always(mp)) {
 		error = xfs_setup_dax_always(mp);
 		if (error)
@@ -1917,10 +1913,6 @@  xfs_fs_reconfigure(
 	int			flags = fc->sb_flags;
 	int			error;
 
-	/* version 5 superblocks always support version counters. */
-	if (xfs_has_crc(mp))
-		fc->sb_flags |= SB_I_VERSION;
-
 	error = xfs_fs_validate_params(new_mp);
 	if (error)
 		return error;