diff mbox series

[v8,RESEND,3/8] vfs: plumb i_version handling into struct kstat

Message ID 20230124193025.185781-4-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series fs: clean up internal i_version handling | expand

Commit Message

Jeff Layton Jan. 24, 2023, 7:30 p.m. UTC
The NFS server has a lot of special handling for different types of
change attribute access, depending on the underlying filesystem. In
most cases, it's doing a getattr anyway and then fetching that value
after the fact.

Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a
kernel-only symbol (for now). If requested and getattr can implement it,
it can fill out this field. For IS_I_VERSION inodes, add a generic
implementation in vfs_getattr_nosec. Take care to mask
STATX_CHANGE_COOKIE off in requests from userland and in the result
mask.

Since not all filesystems can give the same guarantees of monotonicity,
claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to
indicate that they offer an i_version value that can never go backward.

Eventually if we decide to make the i_version available to userland, we
can just designate a field for it in struct statx, and move the
STATX_CHANGE_COOKIE definition to the uapi header.

Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/stat.c            | 17 +++++++++++++++--
 include/linux/stat.h |  9 +++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

Comments

Christian Brauner Jan. 25, 2023, 3:50 p.m. UTC | #1
On Tue, Jan 24, 2023 at 02:30:20PM -0500, Jeff Layton wrote:
> The NFS server has a lot of special handling for different types of
> change attribute access, depending on the underlying filesystem. In
> most cases, it's doing a getattr anyway and then fetching that value
> after the fact.
> 
> Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a
> kernel-only symbol (for now). If requested and getattr can implement it,
> it can fill out this field. For IS_I_VERSION inodes, add a generic
> implementation in vfs_getattr_nosec. Take care to mask
> STATX_CHANGE_COOKIE off in requests from userland and in the result
> mask.
> 
> Since not all filesystems can give the same guarantees of monotonicity,
> claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to
> indicate that they offer an i_version value that can never go backward.
> 
> Eventually if we decide to make the i_version available to userland, we
> can just designate a field for it in struct statx, and move the
> STATX_CHANGE_COOKIE definition to the uapi header.
> 
> Reviewed-by: NeilBrown <neilb@suse.de>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/stat.c            | 17 +++++++++++++++--
>  include/linux/stat.h |  9 +++++++++
>  2 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/stat.c b/fs/stat.c
> index d6cc74ca8486..f43afe0081fe 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -18,6 +18,7 @@
>  #include <linux/syscalls.h>
>  #include <linux/pagemap.h>
>  #include <linux/compat.h>
> +#include <linux/iversion.h>
>  
>  #include <linux/uaccess.h>
>  #include <asm/unistd.h>
> @@ -122,6 +123,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
>  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
>  				  STATX_ATTR_DAX);
>  
> +	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
> +		stat->result_mask |= STATX_CHANGE_COOKIE;
> +		stat->change_cookie = inode_query_iversion(inode);
> +	}
> +
>  	mnt_userns = mnt_user_ns(path->mnt);
>  	if (inode->i_op->getattr)
>  		return inode->i_op->getattr(mnt_userns, path, stat,
> @@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>  
>  	memset(&tmp, 0, sizeof(tmp));
>  
> -	tmp.stx_mask = stat->result_mask;
> +	/* STATX_CHANGE_COOKIE is kernel-only for now */
> +	tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
>  	tmp.stx_blksize = stat->blksize;
> -	tmp.stx_attributes = stat->attributes;
> +	/* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
> +	tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
>  	tmp.stx_nlink = stat->nlink;
>  	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
>  	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
> @@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
>  	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
>  		return -EINVAL;
>  
> +	/* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
> +	 * from userland.
> +	 */
> +	mask &= ~STATX_CHANGE_COOKIE;
> +
>  	error = vfs_statx(dfd, filename, flags, &stat, mask);
>  	if (error)
>  		return error;
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index ff277ced50e9..52150570d37a 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h

Sorry being late to the party once again...

> @@ -52,6 +52,15 @@ struct kstat {
>  	u64		mnt_id;
>  	u32		dio_mem_align;
>  	u32		dio_offset_align;
> +	u64		change_cookie;
>  };
>  
> +/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> +
> +/* mask values */
> +#define STATX_CHANGE_COOKIE		0x40000000U	/* Want/got stx_change_attr */
> +
> +/* file attribute values */
> +#define STATX_ATTR_CHANGE_MONOTONIC	0x8000000000000000ULL /* version monotonically increases */

maybe it would be better to copy what we do for SB_* vs SB_I_* flags and
at least rename them to:

STATX_I_CHANGE_COOKIE
STATX_I_ATTR_CHANGE_MONOTONIC
i_change_cookie

to visually distinguish internal and external flags.

And also if possible it might be useful to move STATX_I_* flags to the
higher 32 bits and then one can use upper_32_bits to retrieve kernel
internal flags and lower_32_bits for userspace flags in tiny wrappers.

(I did something similar for clone3() a few years ago but there to
distinguish between flags available both in clone() and clone3() and
such that are only available in clone3().)

But just a thought. I mostly worry about accidently leaking this to
userspace so ideally we'd even have separate fields in struct kstat for
internal and external attributes but that might bump kstat size, though
I don't think struct kstat is actually ever really allocated all that
much.
Jan Kara Jan. 25, 2023, 4:20 p.m. UTC | #2
On Tue 24-01-23 14:30:20, Jeff Layton wrote:
> The NFS server has a lot of special handling for different types of
> change attribute access, depending on the underlying filesystem. In
> most cases, it's doing a getattr anyway and then fetching that value
> after the fact.
> 
> Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a
> kernel-only symbol (for now). If requested and getattr can implement it,
> it can fill out this field. For IS_I_VERSION inodes, add a generic
> implementation in vfs_getattr_nosec. Take care to mask
> STATX_CHANGE_COOKIE off in requests from userland and in the result
> mask.
> 
> Since not all filesystems can give the same guarantees of monotonicity,
> claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to
> indicate that they offer an i_version value that can never go backward.
> 
> Eventually if we decide to make the i_version available to userland, we
> can just designate a field for it in struct statx, and move the
> STATX_CHANGE_COOKIE definition to the uapi header.
> 
> Reviewed-by: NeilBrown <neilb@suse.de>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/stat.c            | 17 +++++++++++++++--
>  include/linux/stat.h |  9 +++++++++
>  2 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/stat.c b/fs/stat.c
> index d6cc74ca8486..f43afe0081fe 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -18,6 +18,7 @@
>  #include <linux/syscalls.h>
>  #include <linux/pagemap.h>
>  #include <linux/compat.h>
> +#include <linux/iversion.h>
>  
>  #include <linux/uaccess.h>
>  #include <asm/unistd.h>
> @@ -122,6 +123,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
>  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
>  				  STATX_ATTR_DAX);
>  
> +	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
> +		stat->result_mask |= STATX_CHANGE_COOKIE;
> +		stat->change_cookie = inode_query_iversion(inode);
> +	}
> +
>  	mnt_userns = mnt_user_ns(path->mnt);
>  	if (inode->i_op->getattr)
>  		return inode->i_op->getattr(mnt_userns, path, stat,
> @@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>  
>  	memset(&tmp, 0, sizeof(tmp));
>  
> -	tmp.stx_mask = stat->result_mask;
> +	/* STATX_CHANGE_COOKIE is kernel-only for now */
> +	tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
>  	tmp.stx_blksize = stat->blksize;
> -	tmp.stx_attributes = stat->attributes;
> +	/* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
> +	tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
>  	tmp.stx_nlink = stat->nlink;
>  	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
>  	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
> @@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
>  	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
>  		return -EINVAL;
>  
> +	/* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
> +	 * from userland.
> +	 */
> +	mask &= ~STATX_CHANGE_COOKIE;
> +
>  	error = vfs_statx(dfd, filename, flags, &stat, mask);
>  	if (error)
>  		return error;
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index ff277ced50e9..52150570d37a 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h
> @@ -52,6 +52,15 @@ struct kstat {
>  	u64		mnt_id;
>  	u32		dio_mem_align;
>  	u32		dio_offset_align;
> +	u64		change_cookie;
>  };
>  
> +/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> +
> +/* mask values */
> +#define STATX_CHANGE_COOKIE		0x40000000U	/* Want/got stx_change_attr */
> +
> +/* file attribute values */
> +#define STATX_ATTR_CHANGE_MONOTONIC	0x8000000000000000ULL /* version monotonically increases */
> +
>  #endif
> -- 
> 2.39.1
>
Jeff Layton Jan. 25, 2023, 6:30 p.m. UTC | #3
On Wed, 2023-01-25 at 16:50 +0100, Christian Brauner wrote:
> On Tue, Jan 24, 2023 at 02:30:20PM -0500, Jeff Layton wrote:
> > The NFS server has a lot of special handling for different types of
> > change attribute access, depending on the underlying filesystem. In
> > most cases, it's doing a getattr anyway and then fetching that value
> > after the fact.
> > 
> > Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a
> > kernel-only symbol (for now). If requested and getattr can implement it,
> > it can fill out this field. For IS_I_VERSION inodes, add a generic
> > implementation in vfs_getattr_nosec. Take care to mask
> > STATX_CHANGE_COOKIE off in requests from userland and in the result
> > mask.
> > 
> > Since not all filesystems can give the same guarantees of monotonicity,
> > claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to
> > indicate that they offer an i_version value that can never go backward.
> > 
> > Eventually if we decide to make the i_version available to userland, we
> > can just designate a field for it in struct statx, and move the
> > STATX_CHANGE_COOKIE definition to the uapi header.
> > 
> > Reviewed-by: NeilBrown <neilb@suse.de>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/stat.c            | 17 +++++++++++++++--
> >  include/linux/stat.h |  9 +++++++++
> >  2 files changed, 24 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/stat.c b/fs/stat.c
> > index d6cc74ca8486..f43afe0081fe 100644
> > --- a/fs/stat.c
> > +++ b/fs/stat.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/syscalls.h>
> >  #include <linux/pagemap.h>
> >  #include <linux/compat.h>
> > +#include <linux/iversion.h>
> >  
> >  #include <linux/uaccess.h>
> >  #include <asm/unistd.h>
> > @@ -122,6 +123,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
> >  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
> >  				  STATX_ATTR_DAX);
> >  
> > +	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
> > +		stat->result_mask |= STATX_CHANGE_COOKIE;
> > +		stat->change_cookie = inode_query_iversion(inode);
> > +	}
> > +
> >  	mnt_userns = mnt_user_ns(path->mnt);
> >  	if (inode->i_op->getattr)
> >  		return inode->i_op->getattr(mnt_userns, path, stat,
> > @@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> >  
> >  	memset(&tmp, 0, sizeof(tmp));
> >  
> > -	tmp.stx_mask = stat->result_mask;
> > +	/* STATX_CHANGE_COOKIE is kernel-only for now */
> > +	tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
> >  	tmp.stx_blksize = stat->blksize;
> > -	tmp.stx_attributes = stat->attributes;
> > +	/* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
> > +	tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
> >  	tmp.stx_nlink = stat->nlink;
> >  	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
> >  	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
> > @@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
> >  	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
> >  		return -EINVAL;
> >  
> > +	/* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
> > +	 * from userland.
> > +	 */
> > +	mask &= ~STATX_CHANGE_COOKIE;
> > +
> >  	error = vfs_statx(dfd, filename, flags, &stat, mask);
> >  	if (error)
> >  		return error;
> > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > index ff277ced50e9..52150570d37a 100644
> > --- a/include/linux/stat.h
> > +++ b/include/linux/stat.h
> 
> Sorry being late to the party once again...
> 
> > @@ -52,6 +52,15 @@ struct kstat {
> >  	u64		mnt_id;
> >  	u32		dio_mem_align;
> >  	u32		dio_offset_align;
> > +	u64		change_cookie;
> >  };
> >  
> > +/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > +
> > +/* mask values */
> > +#define STATX_CHANGE_COOKIE		0x40000000U	/* Want/got stx_change_attr */
> > +
> > +/* file attribute values */
> > +#define STATX_ATTR_CHANGE_MONOTONIC	0x8000000000000000ULL /* version monotonically increases */
> 
> maybe it would be better to copy what we do for SB_* vs SB_I_* flags and
> at least rename them to:
> 
> STATX_I_CHANGE_COOKIE
> STATX_I_ATTR_CHANGE_MONOTONIC
> i_change_cookie

An "i_"/"I_" prefix says "inode" to me. Maybe I've been at this too
long. ;)

> 
> to visually distinguish internal and external flags.
> 
> And also if possible it might be useful to move STATX_I_* flags to the
> higher 32 bits and then one can use upper_32_bits to retrieve kernel
> internal flags and lower_32_bits for userspace flags in tiny wrappers.
> 
> (I did something similar for clone3() a few years ago but there to
> distinguish between flags available both in clone() and clone3() and
> such that are only available in clone3().)
> 
> But just a thought. I mostly worry about accidently leaking this to
> userspace so ideally we'd even have separate fields in struct kstat for
> internal and external attributes but that might bump kstat size, though
> I don't think struct kstat is actually ever really allocated all that
> much.

I'm not sure that the internal/external distinction matters much for
filesystem providers or consumers of it. The place that it matters is at
the userland interface level, where statx or something similar is
called.

At some point we may want to make STATX_CHANGE_COOKIE queryable via
statx, at which point we'll have to have a big flag day where we do
s/STATX_I_CHANGE_COOKIE/STATX_CHANGE_COOKIE/.

I don't think it's worth it here.
Christian Brauner Jan. 26, 2023, 8:23 a.m. UTC | #4
On Wed, Jan 25, 2023 at 01:30:07PM -0500, Jeff Layton wrote:
> On Wed, 2023-01-25 at 16:50 +0100, Christian Brauner wrote:
> > On Tue, Jan 24, 2023 at 02:30:20PM -0500, Jeff Layton wrote:
> > > The NFS server has a lot of special handling for different types of
> > > change attribute access, depending on the underlying filesystem. In
> > > most cases, it's doing a getattr anyway and then fetching that value
> > > after the fact.
> > > 
> > > Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a
> > > kernel-only symbol (for now). If requested and getattr can implement it,
> > > it can fill out this field. For IS_I_VERSION inodes, add a generic
> > > implementation in vfs_getattr_nosec. Take care to mask
> > > STATX_CHANGE_COOKIE off in requests from userland and in the result
> > > mask.
> > > 
> > > Since not all filesystems can give the same guarantees of monotonicity,
> > > claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to
> > > indicate that they offer an i_version value that can never go backward.
> > > 
> > > Eventually if we decide to make the i_version available to userland, we
> > > can just designate a field for it in struct statx, and move the
> > > STATX_CHANGE_COOKIE definition to the uapi header.
> > > 
> > > Reviewed-by: NeilBrown <neilb@suse.de>
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > >  fs/stat.c            | 17 +++++++++++++++--
> > >  include/linux/stat.h |  9 +++++++++
> > >  2 files changed, 24 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/fs/stat.c b/fs/stat.c
> > > index d6cc74ca8486..f43afe0081fe 100644
> > > --- a/fs/stat.c
> > > +++ b/fs/stat.c
> > > @@ -18,6 +18,7 @@
> > >  #include <linux/syscalls.h>
> > >  #include <linux/pagemap.h>
> > >  #include <linux/compat.h>
> > > +#include <linux/iversion.h>
> > >  
> > >  #include <linux/uaccess.h>
> > >  #include <asm/unistd.h>
> > > @@ -122,6 +123,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
> > >  	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
> > >  				  STATX_ATTR_DAX);
> > >  
> > > +	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
> > > +		stat->result_mask |= STATX_CHANGE_COOKIE;
> > > +		stat->change_cookie = inode_query_iversion(inode);
> > > +	}
> > > +
> > >  	mnt_userns = mnt_user_ns(path->mnt);
> > >  	if (inode->i_op->getattr)
> > >  		return inode->i_op->getattr(mnt_userns, path, stat,
> > > @@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
> > >  
> > >  	memset(&tmp, 0, sizeof(tmp));
> > >  
> > > -	tmp.stx_mask = stat->result_mask;
> > > +	/* STATX_CHANGE_COOKIE is kernel-only for now */
> > > +	tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
> > >  	tmp.stx_blksize = stat->blksize;
> > > -	tmp.stx_attributes = stat->attributes;
> > > +	/* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
> > > +	tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
> > >  	tmp.stx_nlink = stat->nlink;
> > >  	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
> > >  	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
> > > @@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
> > >  	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
> > >  		return -EINVAL;
> > >  
> > > +	/* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
> > > +	 * from userland.
> > > +	 */
> > > +	mask &= ~STATX_CHANGE_COOKIE;
> > > +
> > >  	error = vfs_statx(dfd, filename, flags, &stat, mask);
> > >  	if (error)
> > >  		return error;
> > > diff --git a/include/linux/stat.h b/include/linux/stat.h
> > > index ff277ced50e9..52150570d37a 100644
> > > --- a/include/linux/stat.h
> > > +++ b/include/linux/stat.h
> > 
> > Sorry being late to the party once again...
> > 
> > > @@ -52,6 +52,15 @@ struct kstat {
> > >  	u64		mnt_id;
> > >  	u32		dio_mem_align;
> > >  	u32		dio_offset_align;
> > > +	u64		change_cookie;
> > >  };
> > >  
> > > +/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
> > > +
> > > +/* mask values */
> > > +#define STATX_CHANGE_COOKIE		0x40000000U	/* Want/got stx_change_attr */
> > > +
> > > +/* file attribute values */
> > > +#define STATX_ATTR_CHANGE_MONOTONIC	0x8000000000000000ULL /* version monotonically increases */
> > 
> > maybe it would be better to copy what we do for SB_* vs SB_I_* flags and
> > at least rename them to:
> > 
> > STATX_I_CHANGE_COOKIE
> > STATX_I_ATTR_CHANGE_MONOTONIC
> > i_change_cookie
> 
> An "i_"/"I_" prefix says "inode" to me. Maybe I've been at this too
> long. ;)
> 
> > 
> > to visually distinguish internal and external flags.
> > 
> > And also if possible it might be useful to move STATX_I_* flags to the
> > higher 32 bits and then one can use upper_32_bits to retrieve kernel
> > internal flags and lower_32_bits for userspace flags in tiny wrappers.
> > 
> > (I did something similar for clone3() a few years ago but there to
> > distinguish between flags available both in clone() and clone3() and
> > such that are only available in clone3().)
> > 
> > But just a thought. I mostly worry about accidently leaking this to
> > userspace so ideally we'd even have separate fields in struct kstat for
> > internal and external attributes but that might bump kstat size, though
> > I don't think struct kstat is actually ever really allocated all that
> > much.
> 
> I'm not sure that the internal/external distinction matters much for
> filesystem providers or consumers of it. The place that it matters is at
> the userland interface level, where statx or something similar is
> called.
> 
> At some point we may want to make STATX_CHANGE_COOKIE queryable via
> statx, at which point we'll have to have a big flag day where we do
> s/STATX_I_CHANGE_COOKIE/STATX_CHANGE_COOKIE/.
> 
> I don't think it's worth it here.

I'm not fond of internal and external STATX_* flags having the exact
same name but fine. :)
diff mbox series

Patch

diff --git a/fs/stat.c b/fs/stat.c
index d6cc74ca8486..f43afe0081fe 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -18,6 +18,7 @@ 
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 #include <linux/compat.h>
+#include <linux/iversion.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -122,6 +123,11 @@  int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
 				  STATX_ATTR_DAX);
 
+	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
+		stat->result_mask |= STATX_CHANGE_COOKIE;
+		stat->change_cookie = inode_query_iversion(inode);
+	}
+
 	mnt_userns = mnt_user_ns(path->mnt);
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt_userns, path, stat,
@@ -602,9 +608,11 @@  cp_statx(const struct kstat *stat, struct statx __user *buffer)
 
 	memset(&tmp, 0, sizeof(tmp));
 
-	tmp.stx_mask = stat->result_mask;
+	/* STATX_CHANGE_COOKIE is kernel-only for now */
+	tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE;
 	tmp.stx_blksize = stat->blksize;
-	tmp.stx_attributes = stat->attributes;
+	/* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */
+	tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC;
 	tmp.stx_nlink = stat->nlink;
 	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
 	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
@@ -643,6 +651,11 @@  int do_statx(int dfd, struct filename *filename, unsigned int flags,
 	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
 		return -EINVAL;
 
+	/* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests
+	 * from userland.
+	 */
+	mask &= ~STATX_CHANGE_COOKIE;
+
 	error = vfs_statx(dfd, filename, flags, &stat, mask);
 	if (error)
 		return error;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index ff277ced50e9..52150570d37a 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -52,6 +52,15 @@  struct kstat {
 	u64		mnt_id;
 	u32		dio_mem_align;
 	u32		dio_offset_align;
+	u64		change_cookie;
 };
 
+/* These definitions are internal to the kernel for now. Mainly used by nfsd. */
+
+/* mask values */
+#define STATX_CHANGE_COOKIE		0x40000000U	/* Want/got stx_change_attr */
+
+/* file attribute values */
+#define STATX_ATTR_CHANGE_MONOTONIC	0x8000000000000000ULL /* version monotonically increases */
+
 #endif