diff mbox series

[RFC,3/3,RESEND] xfs: mark the inode for high-res timestamp update in getattr

Message ID 20230411143702.64495-4-jlayton@kernel.org (mailing list archive)
State Mainlined, archived
Headers show
Series fs: opportunistic high-res file timestamps | expand

Commit Message

Jeffrey Layton April 11, 2023, 2:37 p.m. UTC
When the mtime or ctime is being queried via getattr, ensure that we
mark the inode for a high-res timestamp update on the next pass. Also,
switch to current_cmtime for other c/mtime updates.

With this change, we're better off having the NFS server just ignore
the i_version field and have it use the ctime instead, so clear the
STATX_CHANGE_COOKIE flag in the result mask in ->getattr.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/xfs/libxfs/xfs_trans_inode.c |  2 +-
 fs/xfs/xfs_acl.c                |  2 +-
 fs/xfs/xfs_inode.c              |  2 +-
 fs/xfs/xfs_iops.c               | 15 ++++++++++++---
 4 files changed, 15 insertions(+), 6 deletions(-)

Comments

Darrick J. Wong April 11, 2023, 2:54 p.m. UTC | #1
On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote:
> When the mtime or ctime is being queried via getattr, ensure that we
> mark the inode for a high-res timestamp update on the next pass. Also,
> switch to current_cmtime for other c/mtime updates.
> 
> With this change, we're better off having the NFS server just ignore
> the i_version field and have it use the ctime instead, so clear the
> STATX_CHANGE_COOKIE flag in the result mask in ->getattr.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/xfs/libxfs/xfs_trans_inode.c |  2 +-
>  fs/xfs/xfs_acl.c                |  2 +-
>  fs/xfs/xfs_inode.c              |  2 +-
>  fs/xfs/xfs_iops.c               | 15 ++++++++++++---
>  4 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> index 8b5547073379..9ad7c229c617 100644
> --- a/fs/xfs/libxfs/xfs_trans_inode.c
> +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> @@ -63,7 +63,7 @@ xfs_trans_ichgtime(
>  	ASSERT(tp);
>  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
>  
> -	tv = current_time(inode);
> +	tv = current_cmtime(inode);
>  
>  	if (flags & XFS_ICHGTIME_MOD)
>  		inode->i_mtime = tv;
> diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
> index 791db7d9c849..461adc58cf8c 100644
> --- a/fs/xfs/xfs_acl.c
> +++ b/fs/xfs/xfs_acl.c
> @@ -233,7 +233,7 @@ xfs_acl_set_mode(
>  	xfs_ilock(ip, XFS_ILOCK_EXCL);
>  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
>  	inode->i_mode = mode;
> -	inode->i_ctime = current_time(inode);
> +	inode->i_ctime = current_cmtime(inode);

Hmm, now we're adding a spinlock to all these updates.
Does lockdep have anything exciting to say about this?

(I don't think it will, just wondering aloud...)

>  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
>  
>  	if (xfs_has_wsync(mp))
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 5808abab786c..80f9d731e261 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -843,7 +843,7 @@ xfs_init_new_inode(
>  	ip->i_df.if_nextents = 0;
>  	ASSERT(ip->i_nblocks == 0);
>  
> -	tv = current_time(inode);
> +	tv = current_cmtime(inode);
>  	inode->i_mtime = tv;
>  	inode->i_atime = tv;
>  	inode->i_ctime = tv;
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 24718adb3c16..a0b07f90e16c 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -565,6 +565,15 @@ xfs_vn_getattr(
>  	if (xfs_is_shutdown(mp))
>  		return -EIO;
>  
> +	/*
> +	 * XFS uses the i_version infrastructure to track any change to
> +	 * the inode, including atime updates. This means that the i_version
> +	 * returned by getattr doesn't conform to what the callers expect.
> +	 * Clear it here so that nfsd will fake up a change cookie from the
> +	 * ctime instead.
> +	 */
> +	stat->result_mask &= ~STATX_CHANGE_COOKIE;
> +
>  	stat->size = XFS_ISIZE(ip);
>  	stat->dev = inode->i_sb->s_dev;
>  	stat->mode = inode->i_mode;
> @@ -573,8 +582,8 @@ xfs_vn_getattr(
>  	stat->gid = vfsgid_into_kgid(vfsgid);
>  	stat->ino = ip->i_ino;
>  	stat->atime = inode->i_atime;
> -	stat->mtime = inode->i_mtime;
> -	stat->ctime = inode->i_ctime;
> +	if (request_mask & (STATX_CTIME|STATX_MTIME))
> +		fill_cmtime_and_mark(inode, stat);

Should we be setting STATX_[CM]TIME in the result_mask, just in case the
caller asked for ctime and not mtime?

--D

>  	stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
>  
>  	if (xfs_has_v3inodes(mp)) {
> @@ -917,7 +926,7 @@ xfs_setattr_size(
>  	if (newsize != oldsize &&
>  	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
>  		iattr->ia_ctime = iattr->ia_mtime =
> -			current_time(inode);
> +			current_cmtime(inode);
>  		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
>  	}
>  
> -- 
> 2.39.2
>
Christian Brauner April 11, 2023, 3:15 p.m. UTC | #2
On Tue, Apr 11, 2023 at 07:54:46AM -0700, Darrick J. Wong wrote:
> On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote:
> > When the mtime or ctime is being queried via getattr, ensure that we
> > mark the inode for a high-res timestamp update on the next pass. Also,
> > switch to current_cmtime for other c/mtime updates.
> > 
> > With this change, we're better off having the NFS server just ignore
> > the i_version field and have it use the ctime instead, so clear the
> > STATX_CHANGE_COOKIE flag in the result mask in ->getattr.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/xfs/libxfs/xfs_trans_inode.c |  2 +-
> >  fs/xfs/xfs_acl.c                |  2 +-
> >  fs/xfs/xfs_inode.c              |  2 +-
> >  fs/xfs/xfs_iops.c               | 15 ++++++++++++---
> >  4 files changed, 15 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> > index 8b5547073379..9ad7c229c617 100644
> > --- a/fs/xfs/libxfs/xfs_trans_inode.c
> > +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> > @@ -63,7 +63,7 @@ xfs_trans_ichgtime(
> >  	ASSERT(tp);
> >  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
> >  
> > -	tv = current_time(inode);
> > +	tv = current_cmtime(inode);
> >  
> >  	if (flags & XFS_ICHGTIME_MOD)
> >  		inode->i_mtime = tv;
> > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
> > index 791db7d9c849..461adc58cf8c 100644
> > --- a/fs/xfs/xfs_acl.c
> > +++ b/fs/xfs/xfs_acl.c
> > @@ -233,7 +233,7 @@ xfs_acl_set_mode(
> >  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> >  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> >  	inode->i_mode = mode;
> > -	inode->i_ctime = current_time(inode);
> > +	inode->i_ctime = current_cmtime(inode);
> 
> Hmm, now we're adding a spinlock to all these updates.
> Does lockdep have anything exciting to say about this?
> 
> (I don't think it will, just wondering aloud...)
> 
> >  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> >  
> >  	if (xfs_has_wsync(mp))
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index 5808abab786c..80f9d731e261 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -843,7 +843,7 @@ xfs_init_new_inode(
> >  	ip->i_df.if_nextents = 0;
> >  	ASSERT(ip->i_nblocks == 0);
> >  
> > -	tv = current_time(inode);
> > +	tv = current_cmtime(inode);
> >  	inode->i_mtime = tv;
> >  	inode->i_atime = tv;
> >  	inode->i_ctime = tv;
> > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > index 24718adb3c16..a0b07f90e16c 100644
> > --- a/fs/xfs/xfs_iops.c
> > +++ b/fs/xfs/xfs_iops.c
> > @@ -565,6 +565,15 @@ xfs_vn_getattr(
> >  	if (xfs_is_shutdown(mp))
> >  		return -EIO;
> >  
> > +	/*
> > +	 * XFS uses the i_version infrastructure to track any change to
> > +	 * the inode, including atime updates. This means that the i_version
> > +	 * returned by getattr doesn't conform to what the callers expect.
> > +	 * Clear it here so that nfsd will fake up a change cookie from the
> > +	 * ctime instead.
> > +	 */
> > +	stat->result_mask &= ~STATX_CHANGE_COOKIE;
> > +
> >  	stat->size = XFS_ISIZE(ip);
> >  	stat->dev = inode->i_sb->s_dev;
> >  	stat->mode = inode->i_mode;
> > @@ -573,8 +582,8 @@ xfs_vn_getattr(
> >  	stat->gid = vfsgid_into_kgid(vfsgid);
> >  	stat->ino = ip->i_ino;
> >  	stat->atime = inode->i_atime;
> > -	stat->mtime = inode->i_mtime;
> > -	stat->ctime = inode->i_ctime;
> > +	if (request_mask & (STATX_CTIME|STATX_MTIME))
> > +		fill_cmtime_and_mark(inode, stat);
> 
> Should we be setting STATX_[CM]TIME in the result_mask, just in case the
> caller asked for ctime and not mtime?

I think the expectation is that everything in STATX_BASIC_MASK is always
returned to allow equivalence between stat() and statx(). So requesting
STATX_CTIME separately from STATX_MTIME isn't implemented widely, maybe
even not at atll?, yet.
Jeffrey Layton April 11, 2023, 3:58 p.m. UTC | #3
On Tue, 2023-04-11 at 07:54 -0700, Darrick J. Wong wrote:
> On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote:
> > When the mtime or ctime is being queried via getattr, ensure that we
> > mark the inode for a high-res timestamp update on the next pass. Also,
> > switch to current_cmtime for other c/mtime updates.
> > 
> > With this change, we're better off having the NFS server just ignore
> > the i_version field and have it use the ctime instead, so clear the
> > STATX_CHANGE_COOKIE flag in the result mask in ->getattr.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/xfs/libxfs/xfs_trans_inode.c |  2 +-
> >  fs/xfs/xfs_acl.c                |  2 +-
> >  fs/xfs/xfs_inode.c              |  2 +-
> >  fs/xfs/xfs_iops.c               | 15 ++++++++++++---
> >  4 files changed, 15 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> > index 8b5547073379..9ad7c229c617 100644
> > --- a/fs/xfs/libxfs/xfs_trans_inode.c
> > +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> > @@ -63,7 +63,7 @@ xfs_trans_ichgtime(
> >  	ASSERT(tp);
> >  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
> >  
> > -	tv = current_time(inode);
> > +	tv = current_cmtime(inode);
> >  
> >  	if (flags & XFS_ICHGTIME_MOD)
> >  		inode->i_mtime = tv;
> > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
> > index 791db7d9c849..461adc58cf8c 100644
> > --- a/fs/xfs/xfs_acl.c
> > +++ b/fs/xfs/xfs_acl.c
> > @@ -233,7 +233,7 @@ xfs_acl_set_mode(
> >  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> >  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> >  	inode->i_mode = mode;
> > -	inode->i_ctime = current_time(inode);
> > +	inode->i_ctime = current_cmtime(inode);
> 
> Hmm, now we're adding a spinlock to all these updates.
> Does lockdep have anything exciting to say about this?
> 
> (I don't think it will, just wondering aloud...)
> 

I haven't seen anything, but I haven't done any extensive testing yet
either. Still, that might be a good reason to avoid locking in this
scheme if we can.

> >  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> >  
> >  	if (xfs_has_wsync(mp))
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index 5808abab786c..80f9d731e261 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -843,7 +843,7 @@ xfs_init_new_inode(
> >  	ip->i_df.if_nextents = 0;
> >  	ASSERT(ip->i_nblocks == 0);
> >  
> > -	tv = current_time(inode);
> > +	tv = current_cmtime(inode);
> >  	inode->i_mtime = tv;
> >  	inode->i_atime = tv;
> >  	inode->i_ctime = tv;
> > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > index 24718adb3c16..a0b07f90e16c 100644
> > --- a/fs/xfs/xfs_iops.c
> > +++ b/fs/xfs/xfs_iops.c
> > @@ -565,6 +565,15 @@ xfs_vn_getattr(
> >  	if (xfs_is_shutdown(mp))
> >  		return -EIO;
> >  
> > +	/*
> > +	 * XFS uses the i_version infrastructure to track any change to
> > +	 * the inode, including atime updates. This means that the i_version
> > +	 * returned by getattr doesn't conform to what the callers expect.
> > +	 * Clear it here so that nfsd will fake up a change cookie from the
> > +	 * ctime instead.
> > +	 */
> > +	stat->result_mask &= ~STATX_CHANGE_COOKIE;
> > +
> >  	stat->size = XFS_ISIZE(ip);
> >  	stat->dev = inode->i_sb->s_dev;
> >  	stat->mode = inode->i_mode;
> > @@ -573,8 +582,8 @@ xfs_vn_getattr(
> >  	stat->gid = vfsgid_into_kgid(vfsgid);
> >  	stat->ino = ip->i_ino;
> >  	stat->atime = inode->i_atime;
> > -	stat->mtime = inode->i_mtime;
> > -	stat->ctime = inode->i_ctime;
> > +	if (request_mask & (STATX_CTIME|STATX_MTIME))
> > +		fill_cmtime_and_mark(inode, stat);
> 
> Should we be setting STATX_[CM]TIME in the result_mask, just in case the
> caller asked for ctime and not mtime?
> 
> 

We already set the result_mask in vfs_getattr_nosec:

    stat->result_mask |= STATX_BASIC_STATS;

...and it's OK to return more attrs than were requested.

The bigger issue here is that it's not setting the ctime and mtime
fields while leaving the STATX_CTIME and STATX_MTIME flags set. I'll
plan to fix that up in some fashion in the next post.

Thanks!

> >  	stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
> >  
> >  	if (xfs_has_v3inodes(mp)) {
> > @@ -917,7 +926,7 @@ xfs_setattr_size(
> >  	if (newsize != oldsize &&
> >  	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
> >  		iattr->ia_ctime = iattr->ia_mtime =
> > -			current_time(inode);
> > +			current_cmtime(inode);
> >  		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
> >  	}
> >  
> > -- 
> > 2.39.2
> >
Jeffrey Layton April 11, 2023, 4:05 p.m. UTC | #4
On Tue, 2023-04-11 at 17:15 +0200, Christian Brauner wrote:
> On Tue, Apr 11, 2023 at 07:54:46AM -0700, Darrick J. Wong wrote:
> > On Tue, Apr 11, 2023 at 10:37:02AM -0400, Jeff Layton wrote:
> > > When the mtime or ctime is being queried via getattr, ensure that we
> > > mark the inode for a high-res timestamp update on the next pass. Also,
> > > switch to current_cmtime for other c/mtime updates.
> > > 
> > > With this change, we're better off having the NFS server just ignore
> > > the i_version field and have it use the ctime instead, so clear the
> > > STATX_CHANGE_COOKIE flag in the result mask in ->getattr.
> > > 
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > >  fs/xfs/libxfs/xfs_trans_inode.c |  2 +-
> > >  fs/xfs/xfs_acl.c                |  2 +-
> > >  fs/xfs/xfs_inode.c              |  2 +-
> > >  fs/xfs/xfs_iops.c               | 15 ++++++++++++---
> > >  4 files changed, 15 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
> > > index 8b5547073379..9ad7c229c617 100644
> > > --- a/fs/xfs/libxfs/xfs_trans_inode.c
> > > +++ b/fs/xfs/libxfs/xfs_trans_inode.c
> > > @@ -63,7 +63,7 @@ xfs_trans_ichgtime(
> > >  	ASSERT(tp);
> > >  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
> > >  
> > > -	tv = current_time(inode);
> > > +	tv = current_cmtime(inode);
> > >  
> > >  	if (flags & XFS_ICHGTIME_MOD)
> > >  		inode->i_mtime = tv;
> > > diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
> > > index 791db7d9c849..461adc58cf8c 100644
> > > --- a/fs/xfs/xfs_acl.c
> > > +++ b/fs/xfs/xfs_acl.c
> > > @@ -233,7 +233,7 @@ xfs_acl_set_mode(
> > >  	xfs_ilock(ip, XFS_ILOCK_EXCL);
> > >  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> > >  	inode->i_mode = mode;
> > > -	inode->i_ctime = current_time(inode);
> > > +	inode->i_ctime = current_cmtime(inode);
> > 
> > Hmm, now we're adding a spinlock to all these updates.
> > Does lockdep have anything exciting to say about this?
> > 
> > (I don't think it will, just wondering aloud...)
> > 
> > >  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> > >  
> > >  	if (xfs_has_wsync(mp))
> > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > > index 5808abab786c..80f9d731e261 100644
> > > --- a/fs/xfs/xfs_inode.c
> > > +++ b/fs/xfs/xfs_inode.c
> > > @@ -843,7 +843,7 @@ xfs_init_new_inode(
> > >  	ip->i_df.if_nextents = 0;
> > >  	ASSERT(ip->i_nblocks == 0);
> > >  
> > > -	tv = current_time(inode);
> > > +	tv = current_cmtime(inode);
> > >  	inode->i_mtime = tv;
> > >  	inode->i_atime = tv;
> > >  	inode->i_ctime = tv;
> > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > > index 24718adb3c16..a0b07f90e16c 100644
> > > --- a/fs/xfs/xfs_iops.c
> > > +++ b/fs/xfs/xfs_iops.c
> > > @@ -565,6 +565,15 @@ xfs_vn_getattr(
> > >  	if (xfs_is_shutdown(mp))
> > >  		return -EIO;
> > >  
> > > +	/*
> > > +	 * XFS uses the i_version infrastructure to track any change to
> > > +	 * the inode, including atime updates. This means that the i_version
> > > +	 * returned by getattr doesn't conform to what the callers expect.
> > > +	 * Clear it here so that nfsd will fake up a change cookie from the
> > > +	 * ctime instead.
> > > +	 */
> > > +	stat->result_mask &= ~STATX_CHANGE_COOKIE;
> > > +
> > >  	stat->size = XFS_ISIZE(ip);
> > >  	stat->dev = inode->i_sb->s_dev;
> > >  	stat->mode = inode->i_mode;
> > > @@ -573,8 +582,8 @@ xfs_vn_getattr(
> > >  	stat->gid = vfsgid_into_kgid(vfsgid);
> > >  	stat->ino = ip->i_ino;
> > >  	stat->atime = inode->i_atime;
> > > -	stat->mtime = inode->i_mtime;
> > > -	stat->ctime = inode->i_ctime;
> > > +	if (request_mask & (STATX_CTIME|STATX_MTIME))
> > > +		fill_cmtime_and_mark(inode, stat);
> > 
> > Should we be setting STATX_[CM]TIME in the result_mask, just in case the
> > caller asked for ctime and not mtime?
> 
> I think the expectation is that everything in STATX_BASIC_MASK is always
> returned to allow equivalence between stat() and statx(). So requesting
> STATX_CTIME separately from STATX_MTIME isn't implemented widely, maybe
> even not at atll?, yet.

Right. Probably we ought to be more selective with how the result_mask
gets set in vfs_getattr_nosec. Applications that use statx() effectively
are still pretty rare, so most calls will fetch both times anyway.
kernel test robot April 21, 2023, 2:04 a.m. UTC | #5
Hello,

kernel test robot noticed a -4.8% regression of fsmark.files_per_sec on:


commit: 219c0ed2b1f5d2ad921dc206b7b23a0e29e5c516 ("[RFC PATCH 3/3][RESEND] xfs: mark the inode for high-res timestamp update in getattr")
url: https://github.com/intel-lab-lkp/linux/commits/Jeff-Layton/fs-add-infrastructure-for-opportunistic-high-res-ctime-mtime-updates/20230411-223803
base: https://git.kernel.org/cgit/fs/xfs/xfs-linux.git for-next
patch link: https://lore.kernel.org/all/20230411143702.64495-4-jlayton@kernel.org/
patch subject: [RFC PATCH 3/3][RESEND] xfs: mark the inode for high-res timestamp update in getattr

testcase: fsmark
test machine: 96 threads 2 sockets (Ice Lake) with 256G memory
parameters:

	iterations: 1x
	nr_threads: 1t
	disk: 1BRD_32G
	fs: xfs
	fs2: nfsv4
	filesize: 4K
	test_size: 4G
	sync_method: fsyncBeforeClose
	nr_files_per_directory: 1fpd
	cpufreq_governor: performance

test-description: The fsmark is a file system benchmark to test synchronous write workloads, for example, mail servers workload.
test-url: https://sourceforge.net/projects/fsmark/



If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Link: https://lore.kernel.org/oe-lkp/202304210920.e679570a-oliver.sang@intel.com


Details are as below:
-------------------------------------------------------------------------------------------------->


To reproduce:

        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        sudo bin/lkp install job.yaml           # job file is attached in this email
        bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
        sudo bin/lkp run generated-yaml-file

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.

=========================================================================================
compiler/cpufreq_governor/disk/filesize/fs2/fs/iterations/kconfig/nr_files_per_directory/nr_threads/rootfs/sync_method/tbox_group/test_size/testcase:
  gcc-11/performance/1BRD_32G/4K/nfsv4/xfs/1x/x86_64-rhel-8.3/1fpd/1t/debian-11.1-x86_64-20220510.cgz/fsyncBeforeClose/lkp-icl-2sp1/4G/fsmark

commit: 
  1aa3422f44 ("shmem: mark for high-res timestamps on next update after getattr")
  219c0ed2b1 ("xfs: mark the inode for high-res timestamp update in getattr")

1aa3422f44c3001f 219c0ed2b1f5d2ad921dc206b7b 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
      3993            -4.8%       3802        fsmark.files_per_sec
    250.44            +5.0%     263.03        fsmark.time.elapsed_time
    250.44            +5.0%     263.03        fsmark.time.elapsed_time.max
      3182 ±  3%     +23.7%       3935 ±  6%  fsmark.time.involuntary_context_switches
   5761785           +12.9%    6502395        fsmark.time.voluntary_context_switches
   4682497 ± 28%     -49.1%    2381683 ± 51%  numa-meminfo.node0.FilePages
   1170661 ± 28%     -49.1%     595420 ± 51%  numa-vmstat.node0.nr_file_pages
   1771594 ±  7%     -19.5%    1425973 ±  6%  sched_debug.cpu.nr_switches.max
    126209            -6.2%     118428        vmstat.io.bo
    349577            +2.1%     356924        vmstat.system.cs
   1342752 ±  4%      +8.3%    1454235 ±  4%  turbostat.C1
      0.27            -0.0        0.25 ±  2%  turbostat.C1%
     42.83 ±  3%     +36.6%      58.50 ± 21%  turbostat.CoreTmp
     42.83 ±  3%     +36.6%      58.50 ± 21%  turbostat.PkgTmp
    219.76            +3.5%     227.36 ±  2%  turbostat.PkgWatt
    167.86            +2.1%     171.31        turbostat.RAMWatt
  12018871            +6.4%   12790775        proc-vmstat.numa_hit
  11930268            +6.4%   12699729        proc-vmstat.numa_local
    313977            +2.1%     320644        proc-vmstat.pgactivate
  18057605            +4.3%   18838501        proc-vmstat.pgalloc_normal
    781753            +4.3%     815507        proc-vmstat.pgfault
  12737726            +6.1%   13516747        proc-vmstat.pgfree
   1980800            +4.2%    2064896        proc-vmstat.unevictable_pgs_scanned
 1.061e+09            +2.9%  1.092e+09        perf-stat.i.branch-instructions
   6265739 ±  7%     +16.1%    7274779 ±  7%  perf-stat.i.cache-misses
 1.751e+08            +2.5%  1.795e+08        perf-stat.i.cache-references
    353475            +2.1%     361037        perf-stat.i.context-switches
 8.402e+08            +2.0%  8.568e+08        perf-stat.i.dTLB-stores
 5.501e+09            +1.8%    5.6e+09        perf-stat.i.instructions
     38.10            +2.2%      38.93        perf-stat.i.metric.M/sec
      3.58 ±  7%      +0.5        4.06 ±  7%  perf-stat.overall.cache-miss-rate%
      1409 ±  7%     -12.8%       1228 ±  6%  perf-stat.overall.cycles-between-cache-misses
 1.057e+09            +2.9%  1.088e+09        perf-stat.ps.branch-instructions
   6241536 ±  7%     +16.2%    7250504 ±  7%  perf-stat.ps.cache-misses
 1.744e+08            +2.5%  1.788e+08        perf-stat.ps.cache-references
    352052            +2.2%     359651        perf-stat.ps.context-switches
 8.369e+08            +2.0%  8.535e+08        perf-stat.ps.dTLB-stores
 5.479e+09            +1.8%  5.579e+09        perf-stat.ps.instructions
 1.377e+12            +7.0%  1.474e+12        perf-stat.total.instructions
      1.34 ± 12%      -0.2        1.10 ± 10%  perf-profile.calltrace.cycles-pp.nfs_do_access.nfs_permission.inode_permission.link_path_walk.path_openat
      1.35 ± 12%      -0.2        1.11 ± 10%  perf-profile.calltrace.cycles-pp.nfs_permission.inode_permission.link_path_walk.path_openat.do_filp_open
      1.36 ± 12%      -0.2        1.12 ± 10%  perf-profile.calltrace.cycles-pp.inode_permission.link_path_walk.path_openat.do_filp_open.do_sys_openat2
      1.20 ± 12%      -0.2        0.96 ± 11%  perf-profile.calltrace.cycles-pp.rpc_run_task.nfs4_call_sync_sequence._nfs4_proc_access.nfs4_proc_access.nfs_do_access
      1.26 ± 12%      -0.2        1.02 ± 11%  perf-profile.calltrace.cycles-pp._nfs4_proc_access.nfs4_proc_access.nfs_do_access.nfs_permission.inode_permission
      1.26 ± 13%      -0.2        1.03 ± 11%  perf-profile.calltrace.cycles-pp.nfs4_proc_access.nfs_do_access.nfs_permission.inode_permission.link_path_walk
      1.21 ± 12%      -0.2        0.98 ± 11%  perf-profile.calltrace.cycles-pp.nfs4_call_sync_sequence._nfs4_proc_access.nfs4_proc_access.nfs_do_access.nfs_permission
      1.16 ± 12%      -0.2        0.94 ± 11%  perf-profile.calltrace.cycles-pp.rpc_execute.rpc_run_task.nfs4_call_sync_sequence._nfs4_proc_access.nfs4_proc_access
      1.15 ± 12%      -0.2        0.93 ± 11%  perf-profile.calltrace.cycles-pp.__rpc_execute.rpc_execute.rpc_run_task.nfs4_call_sync_sequence._nfs4_proc_access
      1.84 ±  3%      -0.2        1.66 ±  5%  perf-profile.calltrace.cycles-pp.xlog_cil_push_work.process_one_work.worker_thread.kthread.ret_from_fork
      1.39 ±  9%      -0.2        1.21 ±  7%  perf-profile.calltrace.cycles-pp.nfsd_create_setattr.nfsd_create_locked.nfsd_create.nfsd4_create.nfsd4_proc_compound
      1.00 ± 10%      -0.2        0.84 ±  9%  perf-profile.calltrace.cycles-pp.xlog_state_release_iclog.xlog_force_lsn.xfs_log_force_seq.nfsd_create_setattr.nfsd_create_locked
      1.03 ± 10%      -0.2        0.86 ±  9%  perf-profile.calltrace.cycles-pp.xlog_force_lsn.xfs_log_force_seq.nfsd_create_setattr.nfsd_create_locked.nfsd_create
      2.02 ±  4%      +0.2        2.27 ±  5%  perf-profile.calltrace.cycles-pp.tcp_write_xmit.__tcp_push_pending_frames.tcp_sock_set_cork.svc_tcp_sendto.svc_send
      2.04 ±  4%      +0.3        2.29 ±  5%  perf-profile.calltrace.cycles-pp.__tcp_push_pending_frames.tcp_sock_set_cork.svc_tcp_sendto.svc_send.nfsd
      2.20 ±  3%      +0.3        2.46 ±  5%  perf-profile.calltrace.cycles-pp.tcp_sock_set_cork.svc_tcp_sendto.svc_send.nfsd.kthread
      2.29 ±  9%      +0.6        2.85 ±  8%  perf-profile.calltrace.cycles-pp.path_openat.do_filp_open.do_sys_openat2.__x64_sys_openat.do_syscall_64
      2.29 ±  9%      +0.6        2.85 ±  8%  perf-profile.calltrace.cycles-pp.do_filp_open.do_sys_openat2.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe
      2.34 ±  9%      +0.6        2.90 ±  7%  perf-profile.calltrace.cycles-pp.do_sys_openat2.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64
      2.35 ±  9%      +0.6        2.92 ±  8%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.open64
      2.34 ±  9%      +0.6        2.90 ±  7%  perf-profile.calltrace.cycles-pp.__x64_sys_openat.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64
      2.39 ±  9%      +0.6        2.96 ±  7%  perf-profile.calltrace.cycles-pp.open64
      2.35 ±  9%      +0.6        2.92 ±  8%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.open64
      1.41 ± 12%      +0.6        2.04 ±  8%  perf-profile.calltrace.cycles-pp.link_path_walk.path_openat.do_filp_open.do_sys_openat2.__x64_sys_openat
      0.00            +0.8        0.77 ±  8%  perf-profile.calltrace.cycles-pp.__rpc_execute.rpc_execute.rpc_run_task.nfs4_do_call_sync._nfs4_proc_getattr
      0.00            +0.8        0.77 ±  9%  perf-profile.calltrace.cycles-pp.rpc_execute.rpc_run_task.nfs4_do_call_sync._nfs4_proc_getattr.nfs4_proc_getattr
      0.00            +0.8        0.79 ±  8%  perf-profile.calltrace.cycles-pp.rpc_run_task.nfs4_do_call_sync._nfs4_proc_getattr.nfs4_proc_getattr.__nfs_revalidate_inode
      0.00            +0.8        0.80 ±  9%  perf-profile.calltrace.cycles-pp.nfs4_do_call_sync._nfs4_proc_getattr.nfs4_proc_getattr.__nfs_revalidate_inode.nfs_check_verifier
      0.00            +0.8        0.80 ±  9%  perf-profile.calltrace.cycles-pp._nfs4_proc_getattr.nfs4_proc_getattr.__nfs_revalidate_inode.nfs_check_verifier.nfs_do_lookup_revalidate
      0.00            +0.8        0.81 ±  9%  perf-profile.calltrace.cycles-pp.nfs4_proc_getattr.__nfs_revalidate_inode.nfs_check_verifier.nfs_do_lookup_revalidate.__nfs_lookup_revalidate
      0.00            +0.8        0.84 ±  9%  perf-profile.calltrace.cycles-pp.__nfs_revalidate_inode.nfs_check_verifier.nfs_do_lookup_revalidate.__nfs_lookup_revalidate.lookup_fast
      0.00            +0.8        0.85 ±  8%  perf-profile.calltrace.cycles-pp.nfs_check_verifier.nfs_do_lookup_revalidate.__nfs_lookup_revalidate.lookup_fast.walk_component
      0.00            +0.9        0.85 ±  8%  perf-profile.calltrace.cycles-pp.nfs_do_lookup_revalidate.__nfs_lookup_revalidate.lookup_fast.walk_component.link_path_walk
      0.00            +0.9        0.87 ±  8%  perf-profile.calltrace.cycles-pp.__nfs_lookup_revalidate.lookup_fast.walk_component.link_path_walk.path_openat
      0.00            +0.9        0.88 ±  9%  perf-profile.calltrace.cycles-pp.lookup_fast.walk_component.link_path_walk.path_openat.do_filp_open
      0.00            +0.9        0.88 ±  8%  perf-profile.calltrace.cycles-pp.walk_component.link_path_walk.path_openat.do_filp_open.do_sys_openat2
      1.37 ± 11%      -0.2        1.13 ±  9%  perf-profile.children.cycles-pp.nfs_do_access
      1.46 ± 11%      -0.2        1.22 ±  8%  perf-profile.children.cycles-pp.inode_permission
      1.40 ± 11%      -0.2        1.16 ±  8%  perf-profile.children.cycles-pp.nfs_permission
      1.27 ± 12%      -0.2        1.03 ± 11%  perf-profile.children.cycles-pp.nfs4_proc_access
      1.26 ± 12%      -0.2        1.02 ± 11%  perf-profile.children.cycles-pp._nfs4_proc_access
      1.87 ±  3%      -0.2        1.68 ±  5%  perf-profile.children.cycles-pp.xlog_cil_push_work
      0.08 ±  9%      -0.0        0.06 ± 13%  perf-profile.children.cycles-pp.tick_program_event
      0.08 ± 11%      +0.0        0.10 ± 10%  perf-profile.children.cycles-pp.refcount_dec_not_one
      0.26 ±  8%      +0.0        0.30 ±  8%  perf-profile.children.cycles-pp.update_curr
      0.11 ±  9%      +0.0        0.15 ± 11%  perf-profile.children.cycles-pp.tcp_rcv_space_adjust
      0.24 ±  7%      +0.0        0.28 ± 10%  perf-profile.children.cycles-pp.skb_release_data
      0.17 ± 12%      +0.0        0.21 ±  8%  perf-profile.children.cycles-pp.napi_consume_skb
      0.44 ±  5%      +0.0        0.49 ±  3%  perf-profile.children.cycles-pp.__might_resched
      0.00            +0.1        0.05 ±  8%  perf-profile.children.cycles-pp.prepare_to_wait
      0.26 ±  8%      +0.1        0.32 ± 12%  perf-profile.children.cycles-pp.nfsd4_encode_fattr
      0.26 ±  8%      +0.1        0.32 ± 12%  perf-profile.children.cycles-pp.nfsd4_encode_getattr
      0.36 ±  5%      +0.1        0.46 ±  8%  perf-profile.children.cycles-pp.put_cred_rcu
      1.49 ±  4%      +0.1        1.60 ±  5%  perf-profile.children.cycles-pp.inet6_recvmsg
      1.47 ±  4%      +0.1        1.58 ±  5%  perf-profile.children.cycles-pp.tcp_recvmsg
      1.72 ±  3%      +0.1        1.85 ±  5%  perf-profile.children.cycles-pp.sock_recvmsg
      1.69 ±  3%      +0.2        1.88 ±  3%  perf-profile.children.cycles-pp.tcp_rcv_established
      1.72 ±  3%      +0.2        1.92 ±  3%  perf-profile.children.cycles-pp.tcp_v6_do_rcv
      2.26 ±  4%      +0.2        2.46 ±  4%  perf-profile.children.cycles-pp.__netif_receive_skb_one_core
      2.15 ±  3%      +0.2        2.36 ±  4%  perf-profile.children.cycles-pp.ip6_protocol_deliver_rcu
      2.09 ±  3%      +0.2        2.30 ±  4%  perf-profile.children.cycles-pp.tcp_v6_rcv
      2.16 ±  3%      +0.2        2.36 ±  4%  perf-profile.children.cycles-pp.ip6_input_finish
      2.32 ±  4%      +0.2        2.54 ±  4%  perf-profile.children.cycles-pp.__napi_poll
      2.32 ±  4%      +0.2        2.54 ±  4%  perf-profile.children.cycles-pp.process_backlog
      2.82 ±  4%      +0.3        3.12 ±  3%  perf-profile.children.cycles-pp.do_softirq
      2.70 ±  4%      +0.3        3.00 ±  3%  perf-profile.children.cycles-pp.net_rx_action
      2.89 ±  4%      +0.3        3.20 ±  3%  perf-profile.children.cycles-pp.__local_bh_enable_ip
      3.43 ±  3%      +0.3        3.77 ±  3%  perf-profile.children.cycles-pp.ip6_xmit
      3.28 ±  4%      +0.4        3.64 ±  3%  perf-profile.children.cycles-pp.ip6_finish_output2
      3.85 ±  2%      +0.4        4.22 ±  4%  perf-profile.children.cycles-pp.__tcp_transmit_skb
      3.50 ±  3%      +0.4        3.87 ±  3%  perf-profile.children.cycles-pp.inet6_csk_xmit
      4.23 ±  2%      +0.4        4.62 ±  4%  perf-profile.children.cycles-pp.tcp_write_xmit
      4.26 ±  2%      +0.4        4.66 ±  4%  perf-profile.children.cycles-pp.__tcp_push_pending_frames
      4.34 ±  2%      +0.4        4.75 ±  4%  perf-profile.children.cycles-pp.tcp_sock_set_cork
      5.24 ±  3%      +0.4        5.66        perf-profile.children.cycles-pp.__do_softirq
      2.66 ±  9%      +0.5        3.12 ±  9%  perf-profile.children.cycles-pp.rpc_execute
      2.34 ±  9%      +0.6        2.89 ±  8%  perf-profile.children.cycles-pp.do_filp_open
      2.33 ±  9%      +0.6        2.89 ±  8%  perf-profile.children.cycles-pp.path_openat
      2.40 ±  9%      +0.6        2.96 ±  7%  perf-profile.children.cycles-pp.open64
      2.38 ±  9%      +0.6        2.95 ±  7%  perf-profile.children.cycles-pp.__x64_sys_openat
      2.38 ±  8%      +0.6        2.95 ±  7%  perf-profile.children.cycles-pp.do_sys_openat2
      1.52 ± 10%      +0.6        2.14 ±  8%  perf-profile.children.cycles-pp.link_path_walk
      0.00            +0.8        0.80 ±  9%  perf-profile.children.cycles-pp.nfs4_do_call_sync
      0.00            +0.8        0.80 ±  9%  perf-profile.children.cycles-pp._nfs4_proc_getattr
      0.00            +0.8        0.81 ±  9%  perf-profile.children.cycles-pp.nfs4_proc_getattr
      0.00            +0.8        0.84 ±  8%  perf-profile.children.cycles-pp.__nfs_revalidate_inode
      0.00            +0.8        0.85 ±  8%  perf-profile.children.cycles-pp.nfs_check_verifier
      0.00            +0.9        0.86 ±  9%  perf-profile.children.cycles-pp.nfs_do_lookup_revalidate
      0.06 ± 46%      +0.9        0.92 ±  8%  perf-profile.children.cycles-pp.walk_component
      0.04 ± 71%      +0.9        0.91 ±  9%  perf-profile.children.cycles-pp.lookup_fast
      0.00            +0.9        0.87 ±  8%  perf-profile.children.cycles-pp.__nfs_lookup_revalidate
      0.12 ±  8%      +0.0        0.13 ±  8%  perf-profile.self.cycles-pp.nfsd_setuser
      0.08 ± 14%      +0.0        0.10 ±  7%  perf-profile.self.cycles-pp.refcount_dec_not_one
      0.16 ± 10%      +0.0        0.19 ±  6%  perf-profile.self.cycles-pp.__might_sleep
      0.16 ±  8%      +0.0        0.20 ± 10%  perf-profile.self.cycles-pp.call_cpuidle
      0.06 ± 47%      +0.0        0.10 ± 17%  perf-profile.self.cycles-pp.put_cred_rcu
      0.40 ±  5%      +0.0        0.45 ±  3%  perf-profile.self.cycles-pp.__might_resched
      0.02 ±142%      +0.0        0.06 ± 11%  perf-profile.self.cycles-pp.apparmor_cred_free




Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 8b5547073379..9ad7c229c617 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -63,7 +63,7 @@  xfs_trans_ichgtime(
 	ASSERT(tp);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-	tv = current_time(inode);
+	tv = current_cmtime(inode);
 
 	if (flags & XFS_ICHGTIME_MOD)
 		inode->i_mtime = tv;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 791db7d9c849..461adc58cf8c 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -233,7 +233,7 @@  xfs_acl_set_mode(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	inode->i_mode = mode;
-	inode->i_ctime = current_time(inode);
+	inode->i_ctime = current_cmtime(inode);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
 	if (xfs_has_wsync(mp))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5808abab786c..80f9d731e261 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -843,7 +843,7 @@  xfs_init_new_inode(
 	ip->i_df.if_nextents = 0;
 	ASSERT(ip->i_nblocks == 0);
 
-	tv = current_time(inode);
+	tv = current_cmtime(inode);
 	inode->i_mtime = tv;
 	inode->i_atime = tv;
 	inode->i_ctime = tv;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 24718adb3c16..a0b07f90e16c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -565,6 +565,15 @@  xfs_vn_getattr(
 	if (xfs_is_shutdown(mp))
 		return -EIO;
 
+	/*
+	 * XFS uses the i_version infrastructure to track any change to
+	 * the inode, including atime updates. This means that the i_version
+	 * returned by getattr doesn't conform to what the callers expect.
+	 * Clear it here so that nfsd will fake up a change cookie from the
+	 * ctime instead.
+	 */
+	stat->result_mask &= ~STATX_CHANGE_COOKIE;
+
 	stat->size = XFS_ISIZE(ip);
 	stat->dev = inode->i_sb->s_dev;
 	stat->mode = inode->i_mode;
@@ -573,8 +582,8 @@  xfs_vn_getattr(
 	stat->gid = vfsgid_into_kgid(vfsgid);
 	stat->ino = ip->i_ino;
 	stat->atime = inode->i_atime;
-	stat->mtime = inode->i_mtime;
-	stat->ctime = inode->i_ctime;
+	if (request_mask & (STATX_CTIME|STATX_MTIME))
+		fill_cmtime_and_mark(inode, stat);
 	stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
 
 	if (xfs_has_v3inodes(mp)) {
@@ -917,7 +926,7 @@  xfs_setattr_size(
 	if (newsize != oldsize &&
 	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
 		iattr->ia_ctime = iattr->ia_mtime =
-			current_time(inode);
+			current_cmtime(inode);
 		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
 	}