diff mbox series

[1/4] vfs: report change attribute in statx for IS_I_VERSION inodes

Message ID 20220816132759.43248-2-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series vfs: expose the inode change attribute via statx | expand

Commit Message

Jeff Layton Aug. 16, 2022, 1:27 p.m. UTC
From: Jeff Layton <jlayton@redhat.com>

Claim one of the spare fields in struct statx to hold a 64-bit change
attribute. When statx requests this attribute, do an
inode_query_iversion and fill the result in the field.

Also update the test-statx.c program to display the change attribute and
the mountid as well.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/stat.c                 | 7 +++++++
 include/linux/stat.h      | 1 +
 include/uapi/linux/stat.h | 3 ++-
 samples/vfs/test-statx.c  | 8 ++++++--
 4 files changed, 16 insertions(+), 3 deletions(-)

Comments

Christian Brauner Aug. 16, 2022, 1:44 p.m. UTC | #1
On Tue, Aug 16, 2022 at 09:27:56AM -0400, Jeff Layton wrote:
> From: Jeff Layton <jlayton@redhat.com>
> 
> Claim one of the spare fields in struct statx to hold a 64-bit change
> attribute. When statx requests this attribute, do an
> inode_query_iversion and fill the result in the field.
> 
> Also update the test-statx.c program to display the change attribute and
> the mountid as well.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/stat.c                 | 7 +++++++
>  include/linux/stat.h      | 1 +
>  include/uapi/linux/stat.h | 3 ++-
>  samples/vfs/test-statx.c  | 8 ++++++--
>  4 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/stat.c b/fs/stat.c
> index 9ced8860e0f3..7c3d063c31ba 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -17,6 +17,7 @@
>  #include <linux/syscalls.h>
>  #include <linux/pagemap.h>
>  #include <linux/compat.h>
> +#include <linux/iversion.h>
>  
>  #include <linux/uaccess.h>
>  #include <asm/unistd.h>
> @@ -118,6 +119,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
>  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
>  				  STATX_ATTR_DAX);
>  
> +	if ((request_mask & STATX_CHANGE_ATTR) && IS_I_VERSION(inode)) {
> +		stat->result_mask |= STATX_CHANGE_ATTR;
> +		stat->change_attr = inode_query_iversion(inode);
> +	}
> +
>  	mnt_userns = mnt_user_ns(path->mnt);
>  	if (inode->i_op->getattr)
>  		return inode->i_op->getattr(mnt_userns, path, stat,
> @@ -611,6 +617,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>  	tmp.stx_dev_major = MAJOR(stat->dev);
>  	tmp.stx_dev_minor = MINOR(stat->dev);
>  	tmp.stx_mnt_id = stat->mnt_id;
> +	tmp.stx_change_attr = stat->change_attr;
>  
>  	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
>  }
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index 7df06931f25d..7b444c2ad0ad 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h
> @@ -50,6 +50,7 @@ struct kstat {
>  	struct timespec64 btime;			/* File creation time */
>  	u64		blocks;
>  	u64		mnt_id;
> +	u64		change_attr;
>  };
>  
>  #endif
> diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> index 1500a0f58041..fd839ec76aa4 100644
> --- a/include/uapi/linux/stat.h
> +++ b/include/uapi/linux/stat.h
> @@ -124,7 +124,7 @@ struct statx {
>  	__u32	stx_dev_minor;
>  	/* 0x90 */
>  	__u64	stx_mnt_id;
> -	__u64	__spare2;
> +	__u64	stx_change_attr; /* Inode change attribute */
>  	/* 0xa0 */
>  	__u64	__spare3[12];	/* Spare space for future expansion */
>  	/* 0x100 */
> @@ -152,6 +152,7 @@ struct statx {
>  #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
>  #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
>  #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
> +#define STATX_CHANGE_ATTR	0x00002000U	/* Want/got stx_change_attr */

I'm a bit worried that STATX_CHANGE_ATTR isn't a good name for the flag
and field. Or I fail to understand what exact information this will
expose and how userspace will consume it.
To me the naming gives the impression that some set of generic
attributes have changed but given that statx is about querying file
attributes this becomes confusing.

Wouldn't it make more sense this time to expose it as what it is and
call this STATX_INO_VERSION and __u64 stx_ino_version?
Jeff Layton Aug. 16, 2022, 1:52 p.m. UTC | #2
On Tue, 2022-08-16 at 15:44 +0200, Christian Brauner wrote:
> On Tue, Aug 16, 2022 at 09:27:56AM -0400, Jeff Layton wrote:
> > From: Jeff Layton <jlayton@redhat.com>
> > 
> > Claim one of the spare fields in struct statx to hold a 64-bit change
> > attribute. When statx requests this attribute, do an
> > inode_query_iversion and fill the result in the field.
> > 
> > Also update the test-statx.c program to display the change attribute and
> > the mountid as well.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/stat.c                 | 7 +++++++
> >  include/linux/stat.h      | 1 +
> >  include/uapi/linux/stat.h | 3 ++-
> >  samples/vfs/test-statx.c  | 8 ++++++--
> >  4 files changed, 16 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/stat.c b/fs/stat.c
> > index 9ced8860e0f3..7c3d063c31ba 100644
> > --- a/fs/stat.c
> > +++ b/fs/stat.c
> > @@ -17,6 +17,7 @@
> >  #include <linux/syscalls.h>
> >  #include <linux/pagemap.h>
> >  #include <linux/compat.h>
> > +#include <linux/iversion.h>
> >  
> >  #include <linux/uaccess.h>
> >  #include <asm/unistd.h>
> > @@ -118,6 +119,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
> >  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
> >  				  STATX_ATTR_DAX);
> >  
> > +	if ((request_mask & STATX_CHANGE_ATTR) && IS_I_VERSION(inode)) {
> > +		stat->result_mask |= STATX_CHANGE_ATTR;
> > +		stat->change_attr = inode_query_iversion(inode);
> > +	}
> > +
> >  	mnt_userns = mnt_user_ns(path->mnt);
> >  	if (inode->i_op->getattr)
> >  		return inode->i_op->getattr(mnt_userns, path, stat,
> > @@ -611,6 +617,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> >  	tmp.stx_dev_major = MAJOR(stat->dev);
> >  	tmp.stx_dev_minor = MINOR(stat->dev);
> >  	tmp.stx_mnt_id = stat->mnt_id;
> > +	tmp.stx_change_attr = stat->change_attr;
> >  
> >  	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> >  }
> > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > index 7df06931f25d..7b444c2ad0ad 100644
> > --- a/include/linux/stat.h
> > +++ b/include/linux/stat.h
> > @@ -50,6 +50,7 @@ struct kstat {
> >  	struct timespec64 btime;			/* File creation time */
> >  	u64		blocks;
> >  	u64		mnt_id;
> > +	u64		change_attr;
> >  };
> >  
> >  #endif
> > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > index 1500a0f58041..fd839ec76aa4 100644
> > --- a/include/uapi/linux/stat.h
> > +++ b/include/uapi/linux/stat.h
> > @@ -124,7 +124,7 @@ struct statx {
> >  	__u32	stx_dev_minor;
> >  	/* 0x90 */
> >  	__u64	stx_mnt_id;
> > -	__u64	__spare2;
> > +	__u64	stx_change_attr; /* Inode change attribute */
> >  	/* 0xa0 */
> >  	__u64	__spare3[12];	/* Spare space for future expansion */
> >  	/* 0x100 */
> > @@ -152,6 +152,7 @@ struct statx {
> >  #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
> >  #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
> >  #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
> > +#define STATX_CHANGE_ATTR	0x00002000U	/* Want/got stx_change_attr */
> 
> I'm a bit worried that STATX_CHANGE_ATTR isn't a good name for the flag
> and field. Or I fail to understand what exact information this will
> expose and how userspace will consume it.
> To me the naming gives the impression that some set of generic
> attributes have changed but given that statx is about querying file
> attributes this becomes confusing.
> 
> Wouldn't it make more sense this time to expose it as what it is and
> call this STATX_INO_VERSION and __u64 stx_ino_version?

"Let the great bikesheddening begin!"

In all seriousness though, you do have a good point. The NFS RFCs call
this the "change attribute", so I went forward with that parlance here.
I'm not opposed to changing the naming -- STATX_INO_VERSION sounds fine
to me. 

Let's see if anyone else has a better name before I make any changes
though.
David Howells Aug. 16, 2022, 1:55 p.m. UTC | #3
Christian Brauner <brauner@kernel.org> wrote:

> > +#define STATX_CHANGE_ATTR	0x00002000U	/* Want/got stx_change_attr */
> 
> I'm a bit worried that STATX_CHANGE_ATTR isn't a good name for the flag
> and field. Or I fail to understand what exact information this will
> expose and how userspace will consume it.
> To me the naming gives the impression that some set of generic
> attributes have changed but given that statx is about querying file
> attributes this becomes confusing.
> 
> Wouldn't it make more sense this time to expose it as what it is and
> call this STATX_INO_VERSION and __u64 stx_ino_version?

I'm not sure that STATX_INO_VERSION is better that might get confused with the
version number that's used to uniquify inode slots (ie. deal with inode number
reuse).

The problem is that we need fsinfo() or similar to qualify what this means.
On some filesystems, it's only changed when the data content changes, but on
others it may get changed when, say, xattrs get changed; further, on some
filesystems it might be monotonically incremented, but on others it's just
supposed to be different between two consecutive changes (nfs, IIRC).

David
Jeff Layton Aug. 16, 2022, 2:02 p.m. UTC | #4
On Tue, 2022-08-16 at 14:55 +0100, David Howells wrote:
> Christian Brauner <brauner@kernel.org> wrote:
> 
> > > +#define STATX_CHANGE_ATTR	0x00002000U	/* Want/got stx_change_attr */
> > 
> > I'm a bit worried that STATX_CHANGE_ATTR isn't a good name for the flag
> > and field. Or I fail to understand what exact information this will
> > expose and how userspace will consume it.
> > To me the naming gives the impression that some set of generic
> > attributes have changed but given that statx is about querying file
> > attributes this becomes confusing.
> > 
> > Wouldn't it make more sense this time to expose it as what it is and
> > call this STATX_INO_VERSION and __u64 stx_ino_version?
> 
> I'm not sure that STATX_INO_VERSION is better that might get confused with the
> version number that's used to uniquify inode slots (ie. deal with inode number
> reuse).
> 
> The problem is that we need fsinfo() or similar to qualify what this means.
> On some filesystems, it's only changed when the data content changes, but on
> others it may get changed when, say, xattrs get changed; further, on some
> filesystems it might be monotonically incremented, but on others it's just
> supposed to be different between two consecutive changes (nfs, IIRC).
> 

I think we'll just have to ensure that before we expose this for any
filesystem that it conforms to some minimum standards. i.e.: it must
change if there are data or metadata changes to the inode, modulo atime
changes due to reads on regular files or readdir on dirs.

The local filesystems, ceph and NFS should all be fine. I guess that
just leaves AFS. If it can't guarantee that, then we might want to avoid
exposing the counter for it.
David Howells Aug. 16, 2022, 3:15 p.m. UTC | #5
Jeff Layton <jlayton@kernel.org> wrote:

> I think we'll just have to ensure that before we expose this for any
> filesystem that it conforms to some minimum standards. i.e.: it must
> change if there are data or metadata changes to the inode, modulo atime
> changes due to reads on regular files or readdir on dirs.
> 
> The local filesystems, ceph and NFS should all be fine. I guess that
> just leaves AFS. If it can't guarantee that, then we might want to avoid
> exposing the counter for it.

AFS monotonically increments the counter on data changes; doesn't make any
change for metadata changes (other than the file size).

But you can't assume NFS works as per your suggestion as you don't know what's
backing it (it could be AFS, for example - there's a converter for that).

Further, for ordinary disk filesystems, two data changes may get elided and
only increment the counter once.  And then there's mmap...

It might be better to reduce the scope of your definition and just say that it
must change if there's a data change and may also be changed if there's a
metadata change.

David
Jeff Layton Aug. 16, 2022, 3:32 p.m. UTC | #6
On Tue, 2022-08-16 at 16:15 +0100, David Howells wrote:
> Jeff Layton <jlayton@kernel.org> wrote:
> 
> > I think we'll just have to ensure that before we expose this for any
> > filesystem that it conforms to some minimum standards. i.e.: it must
> > change if there are data or metadata changes to the inode, modulo atime
> > changes due to reads on regular files or readdir on dirs.
> > 
> > The local filesystems, ceph and NFS should all be fine. I guess that
> > just leaves AFS. If it can't guarantee that, then we might want to avoid
> > exposing the counter for it.
> 
> AFS monotonically increments the counter on data changes; doesn't make any
> change for metadata changes (other than the file size).
> 
> But you can't assume NFS works as per your suggestion as you don't know what's
> backing it (it could be AFS, for example - there's a converter for that).
> 

In that case, the NFS server must synthesize a proper change attr. The
NFS spec mandates that it change on most metadata changes.

> Further, for ordinary disk filesystems, two data changes may get elided and
> only increment the counter once.
> 

Not a problem as long as nothing queried the counter in between the
changes.

> And then there's mmap...
> 

Not sure how that matters here.

> It might be better to reduce the scope of your definition and just say that it
> must change if there's a data change and may also be changed if there's a
> metadata change.
> 

I'd prefer that we mandate that it change on metadata changes as well.
That's what most of the in-kernel users want, and what most of the
existing filesystems provide. If AFS can't give that guarantee then we
can just omit exposing i_version on it.
Darrick J. Wong Aug. 16, 2022, 3:51 p.m. UTC | #7
On Tue, Aug 16, 2022 at 11:32:24AM -0400, Jeff Layton wrote:
> On Tue, 2022-08-16 at 16:15 +0100, David Howells wrote:
> > Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > > I think we'll just have to ensure that before we expose this for any
> > > filesystem that it conforms to some minimum standards. i.e.: it must
> > > change if there are data or metadata changes to the inode, modulo atime
> > > changes due to reads on regular files or readdir on dirs.
> > > 
> > > The local filesystems, ceph and NFS should all be fine. I guess that
> > > just leaves AFS. If it can't guarantee that, then we might want to avoid
> > > exposing the counter for it.
> > 
> > AFS monotonically increments the counter on data changes; doesn't make any
> > change for metadata changes (other than the file size).
> > 
> > But you can't assume NFS works as per your suggestion as you don't know what's
> > backing it (it could be AFS, for example - there's a converter for that).
> > 
> 
> In that case, the NFS server must synthesize a proper change attr. The
> NFS spec mandates that it change on most metadata changes.
> 
> > Further, for ordinary disk filesystems, two data changes may get elided and
> > only increment the counter once.
> > 
> 
> Not a problem as long as nothing queried the counter in between the
> changes.
> 
> > And then there's mmap...
> > 
> 
> Not sure how that matters here.
> 
> > It might be better to reduce the scope of your definition and just say that it
> > must change if there's a data change and may also be changed if there's a
> > metadata change.
> > 
> 
> I'd prefer that we mandate that it change on metadata changes as well.

...in that case, why not leave the i_version bump in
xfs_trans_log_inode?  That will capture all changes to file data,
attribues, and metadata. ;)

--D

> That's what most of the in-kernel users want, and what most of the
> existing filesystems provide. If AFS can't give that guarantee then we
> can just omit exposing i_version on it.
> -- 
> Jeff Layton <jlayton@kernel.org>
Jeff Layton Aug. 16, 2022, 4:05 p.m. UTC | #8
On Tue, 2022-08-16 at 08:51 -0700, Darrick J. Wong wrote:
> On Tue, Aug 16, 2022 at 11:32:24AM -0400, Jeff Layton wrote:
> > On Tue, 2022-08-16 at 16:15 +0100, David Howells wrote:
> > > Jeff Layton <jlayton@kernel.org> wrote:
> > > 
> > > > I think we'll just have to ensure that before we expose this for any
> > > > filesystem that it conforms to some minimum standards. i.e.: it must
> > > > change if there are data or metadata changes to the inode, modulo atime
> > > > changes due to reads on regular files or readdir on dirs.
> > > > 
> > > > The local filesystems, ceph and NFS should all be fine. I guess that
> > > > just leaves AFS. If it can't guarantee that, then we might want to avoid
> > > > exposing the counter for it.
> > > 
> > > AFS monotonically increments the counter on data changes; doesn't make any
> > > change for metadata changes (other than the file size).
> > > 
> > > But you can't assume NFS works as per your suggestion as you don't know what's
> > > backing it (it could be AFS, for example - there's a converter for that).
> > > 
> > 
> > In that case, the NFS server must synthesize a proper change attr. The
> > NFS spec mandates that it change on most metadata changes.
> > 
> > > Further, for ordinary disk filesystems, two data changes may get elided and
> > > only increment the counter once.
> > > 
> > 
> > Not a problem as long as nothing queried the counter in between the
> > changes.
> > 
> > > And then there's mmap...
> > > 
> > 
> > Not sure how that matters here.
> > 
> > > It might be better to reduce the scope of your definition and just say that it
> > > must change if there's a data change and may also be changed if there's a
> > > metadata change.
> > > 
> > 
> > I'd prefer that we mandate that it change on metadata changes as well.
> 
> ...in that case, why not leave the i_version bump in
> xfs_trans_log_inode?  That will capture all changes to file data,
> attribues, and metadata. ;)
> 
> 

Because that includes changes to the atime due to reads which should be
specifically omitted. We could still keep that callsite instead, if you
can see some way to exclude those.

In practice, we are using a change to i_version to mean that "something
changed" in the inode, which usually implies a change to the ctime and
mtime.

Trond pointed out that the NFSv4 spec implies that time_access updates
should be omitted from what we consider to be "metadata" here:

https://mailarchive.ietf.org/arch/msg/nfsv4/yrRBMrVwWWDCrgHPAzq_yAEc7BU/

IMA (which is the only other in-kernel consumer of i_version) also wants
the same behavior.

> > That's what most of the in-kernel users want, and what most of the
> > existing filesystems provide. If AFS can't give that guarantee then we
> > can just omit exposing i_version on it.
Jeff Layton Aug. 18, 2022, 8:24 p.m. UTC | #9
On Tue, 2022-08-16 at 15:44 +0200, Christian Brauner wrote:
> On Tue, Aug 16, 2022 at 09:27:56AM -0400, Jeff Layton wrote:
> > From: Jeff Layton <jlayton@redhat.com>
> > 
> > Claim one of the spare fields in struct statx to hold a 64-bit change
> > attribute. When statx requests this attribute, do an
> > inode_query_iversion and fill the result in the field.
> > 
> > Also update the test-statx.c program to display the change attribute and
> > the mountid as well.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/stat.c                 | 7 +++++++
> >  include/linux/stat.h      | 1 +
> >  include/uapi/linux/stat.h | 3 ++-
> >  samples/vfs/test-statx.c  | 8 ++++++--
> >  4 files changed, 16 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/stat.c b/fs/stat.c
> > index 9ced8860e0f3..7c3d063c31ba 100644
> > --- a/fs/stat.c
> > +++ b/fs/stat.c
> > @@ -17,6 +17,7 @@
> >  #include <linux/syscalls.h>
> >  #include <linux/pagemap.h>
> >  #include <linux/compat.h>
> > +#include <linux/iversion.h>
> >  
> >  #include <linux/uaccess.h>
> >  #include <asm/unistd.h>
> > @@ -118,6 +119,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
> >  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
> >  				  STATX_ATTR_DAX);
> >  
> > +	if ((request_mask & STATX_CHANGE_ATTR) && IS_I_VERSION(inode)) {
> > +		stat->result_mask |= STATX_CHANGE_ATTR;
> > +		stat->change_attr = inode_query_iversion(inode);
> > +	}
> > +
> >  	mnt_userns = mnt_user_ns(path->mnt);
> >  	if (inode->i_op->getattr)
> >  		return inode->i_op->getattr(mnt_userns, path, stat,
> > @@ -611,6 +617,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> >  	tmp.stx_dev_major = MAJOR(stat->dev);
> >  	tmp.stx_dev_minor = MINOR(stat->dev);
> >  	tmp.stx_mnt_id = stat->mnt_id;
> > +	tmp.stx_change_attr = stat->change_attr;
> >  
> >  	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
> >  }
> > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > index 7df06931f25d..7b444c2ad0ad 100644
> > --- a/include/linux/stat.h
> > +++ b/include/linux/stat.h
> > @@ -50,6 +50,7 @@ struct kstat {
> >  	struct timespec64 btime;			/* File creation time */
> >  	u64		blocks;
> >  	u64		mnt_id;
> > +	u64		change_attr;
> >  };
> >  
> >  #endif
> > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> > index 1500a0f58041..fd839ec76aa4 100644
> > --- a/include/uapi/linux/stat.h
> > +++ b/include/uapi/linux/stat.h
> > @@ -124,7 +124,7 @@ struct statx {
> >  	__u32	stx_dev_minor;
> >  	/* 0x90 */
> >  	__u64	stx_mnt_id;
> > -	__u64	__spare2;
> > +	__u64	stx_change_attr; /* Inode change attribute */
> >  	/* 0xa0 */
> >  	__u64	__spare3[12];	/* Spare space for future expansion */
> >  	/* 0x100 */
> > @@ -152,6 +152,7 @@ struct statx {
> >  #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
> >  #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
> >  #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
> > +#define STATX_CHANGE_ATTR	0x00002000U	/* Want/got stx_change_attr */
> 
> I'm a bit worried that STATX_CHANGE_ATTR isn't a good name for the flag
> and field. Or I fail to understand what exact information this will
> expose and how userspace will consume it.
> To me the naming gives the impression that some set of generic
> attributes have changed but given that statx is about querying file
> attributes this becomes confusing.
> 
> Wouldn't it make more sense this time to expose it as what it is and
> call this STATX_INO_VERSION and __u64 stx_ino_version?

Ok, having thought about this some more, I think this is a reasonable
name. It _does_ sort of imply that this value will increase over time.
That's true of all of the existing implementations, but I think we ought
to define such that there could be alternative implementations.

I'll respin this patch and resend it with a wider audience.

Thanks for the input so far!
diff mbox series

Patch

diff --git a/fs/stat.c b/fs/stat.c
index 9ced8860e0f3..7c3d063c31ba 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -17,6 +17,7 @@ 
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 #include <linux/compat.h>
+#include <linux/iversion.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -118,6 +119,11 @@  int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
 				  STATX_ATTR_DAX);
 
+	if ((request_mask & STATX_CHANGE_ATTR) && IS_I_VERSION(inode)) {
+		stat->result_mask |= STATX_CHANGE_ATTR;
+		stat->change_attr = inode_query_iversion(inode);
+	}
+
 	mnt_userns = mnt_user_ns(path->mnt);
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt_userns, path, stat,
@@ -611,6 +617,7 @@  cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_dev_major = MAJOR(stat->dev);
 	tmp.stx_dev_minor = MINOR(stat->dev);
 	tmp.stx_mnt_id = stat->mnt_id;
+	tmp.stx_change_attr = stat->change_attr;
 
 	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 7df06931f25d..7b444c2ad0ad 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -50,6 +50,7 @@  struct kstat {
 	struct timespec64 btime;			/* File creation time */
 	u64		blocks;
 	u64		mnt_id;
+	u64		change_attr;
 };
 
 #endif
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 1500a0f58041..fd839ec76aa4 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -124,7 +124,7 @@  struct statx {
 	__u32	stx_dev_minor;
 	/* 0x90 */
 	__u64	stx_mnt_id;
-	__u64	__spare2;
+	__u64	stx_change_attr; /* Inode change attribute */
 	/* 0xa0 */
 	__u64	__spare3[12];	/* Spare space for future expansion */
 	/* 0x100 */
@@ -152,6 +152,7 @@  struct statx {
 #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
 #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
 #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
+#define STATX_CHANGE_ATTR	0x00002000U	/* Want/got stx_change_attr */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
diff --git a/samples/vfs/test-statx.c b/samples/vfs/test-statx.c
index 49c7a46cee07..b104909721c4 100644
--- a/samples/vfs/test-statx.c
+++ b/samples/vfs/test-statx.c
@@ -107,6 +107,8 @@  static void dump_statx(struct statx *stx)
 	printf("Device: %-15s", buffer);
 	if (stx->stx_mask & STATX_INO)
 		printf(" Inode: %-11llu", (unsigned long long) stx->stx_ino);
+	if (stx->stx_mask & STATX_MNT_ID)
+		printf(" MountId: %llx"), stx->stx_mnt_id;
 	if (stx->stx_mask & STATX_NLINK)
 		printf(" Links: %-5u", stx->stx_nlink);
 	if (stx->stx_mask & STATX_TYPE) {
@@ -145,7 +147,9 @@  static void dump_statx(struct statx *stx)
 	if (stx->stx_mask & STATX_CTIME)
 		print_time("Change: ", &stx->stx_ctime);
 	if (stx->stx_mask & STATX_BTIME)
-		print_time(" Birth: ", &stx->stx_btime);
+		print_time("Birth: ", &stx->stx_btime);
+	if (stx->stx_mask & STATX_CHANGE_ATTR)
+		printf(" Change Attr: 0x%llx\n", stx->stx_change_attr);
 
 	if (stx->stx_attributes_mask) {
 		unsigned char bits, mbits;
@@ -218,7 +222,7 @@  int main(int argc, char **argv)
 	struct statx stx;
 	int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
 
-	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME | STATX_MNT_ID | STATX_CHANGE_ATTR;
 
 	for (argv++; *argv; argv++) {
 		if (strcmp(*argv, "-F") == 0) {