diff mbox series

[v4,09/11] fs: handle delegated timestamps in setattr_copy_mgtime

Message ID 20240905-delstid-v4-9-d3e5fd34d107@kernel.org (mailing list archive)
State New
Headers show
Series nfsd: implement the "delstid" draft | expand

Commit Message

Jeff Layton Sept. 5, 2024, 12:41 p.m. UTC
When updating the ctime on an inode for a SETATTR with a multigrain
filesystem, we usually want to take the latest time we can get for the
ctime. The exception to this rule is when there is a nfsd write
delegation and the server is proxying timestamps from the client.

When nfsd gets a CB_GETATTR response, we want to update the timestamp
value in the inode to the values that the client is tracking. The client
doesn't send a ctime value (since that's always determined by the
exported filesystem), but it can send a mtime value. In the case where
it does, then we may need to update the ctime to a value commensurate
with that instead of the current time.

If ATTR_DELEG is set, then use ia_ctime value instead of setting the
timestamp to the current time.

With the addition of delegated timestamps we can also receive a request
to update only the atime, but we may not need to set the ctime. Trust
the ATTR_CTIME flag in the update and only update the ctime when it's
set.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/attr.c          | 28 +++++++++++++--------
 fs/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 3 files changed, 94 insertions(+), 10 deletions(-)

Comments

Chuck Lever Sept. 5, 2024, 3:44 p.m. UTC | #1
On Thu, Sep 05, 2024 at 08:41:53AM -0400, Jeff Layton wrote:
> When updating the ctime on an inode for a SETATTR with a multigrain
> filesystem, we usually want to take the latest time we can get for the
> ctime. The exception to this rule is when there is a nfsd write
> delegation and the server is proxying timestamps from the client.
> 
> When nfsd gets a CB_GETATTR response, we want to update the timestamp
> value in the inode to the values that the client is tracking. The client
> doesn't send a ctime value (since that's always determined by the
> exported filesystem), but it can send a mtime value. In the case where
> it does, then we may need to update the ctime to a value commensurate
> with that instead of the current time.
> 
> If ATTR_DELEG is set, then use ia_ctime value instead of setting the
> timestamp to the current time.
> 
> With the addition of delegated timestamps we can also receive a request
> to update only the atime, but we may not need to set the ctime. Trust
> the ATTR_CTIME flag in the update and only update the ctime when it's
> set.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/attr.c          | 28 +++++++++++++--------
>  fs/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/fs.h |  2 ++
>  3 files changed, 94 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/attr.c b/fs/attr.c
> index 3bcbc45708a3..392eb62aa609 100644
> --- a/fs/attr.c
> +++ b/fs/attr.c
> @@ -286,16 +286,20 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
>  	unsigned int ia_valid = attr->ia_valid;
>  	struct timespec64 now;
>  
> -	/*
> -	 * If the ctime isn't being updated then nothing else should be
> -	 * either.
> -	 */
> -	if (!(ia_valid & ATTR_CTIME)) {
> -		WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME));
> -		return;
> +	if (ia_valid & ATTR_CTIME) {
> +		/*
> +		 * In the case of an update for a write delegation, we must respect
> +		 * the value in ia_ctime and not use the current time.
> +		 */
> +		if (ia_valid & ATTR_DELEG)
> +			now = inode_set_ctime_deleg(inode, attr->ia_ctime);
> +		else
> +			now = inode_set_ctime_current(inode);
> +	} else {
> +		/* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
> +		WARN_ON_ONCE(ia_valid & ATTR_MTIME);
>  	}
>  
> -	now = inode_set_ctime_current(inode);
>  	if (ia_valid & ATTR_ATIME_SET)
>  		inode_set_atime_to_ts(inode, attr->ia_atime);
>  	else if (ia_valid & ATTR_ATIME)
> @@ -354,8 +358,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
>  		inode_set_atime_to_ts(inode, attr->ia_atime);
>  	if (ia_valid & ATTR_MTIME)
>  		inode_set_mtime_to_ts(inode, attr->ia_mtime);
> -	if (ia_valid & ATTR_CTIME)
> -		inode_set_ctime_to_ts(inode, attr->ia_ctime);
> +	if (ia_valid & ATTR_CTIME) {
> +		if (ia_valid & ATTR_DELEG)
> +			inode_set_ctime_deleg(inode, attr->ia_ctime);
> +		else
> +			inode_set_ctime_to_ts(inode, attr->ia_ctime);
> +	}
>  }
>  EXPORT_SYMBOL(setattr_copy);
>  

This patch fails to apply cleanly to my copy of nfsd-next:

  error: `git apply --index`: error: patch failed: fs/attr.c:286
  error: fs/attr.c: patch does not apply

Before I try jiggling this to get it to apply, is there anything
I should know? I worry about a potential merge conflict here,
hopefully it will be no more complicated than that.


> diff --git a/fs/inode.c b/fs/inode.c
> index 01f7df1973bd..f0fbfd470d8e 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2835,6 +2835,80 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
>  }
>  EXPORT_SYMBOL(inode_set_ctime_current);
>  
> +/**
> + * inode_set_ctime_deleg - try to update the ctime on a delegated inode
> + * @inode: inode to update
> + * @update: timespec64 to set the ctime
> + *
> + * Attempt to atomically update the ctime on behalf of a delegation holder.
> + *
> + * The nfs server can call back the holder of a delegation to get updated
> + * inode attributes, including the mtime. When updating the mtime we may
> + * need to update the ctime to a value at least equal to that.
> + *
> + * This can race with concurrent updates to the inode, in which
> + * case we just don't do the update.
> + *
> + * Note that this works even when multigrain timestamps are not enabled,
> + * so use it in either case.
> + */
> +struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update)
> +{
> +	ktime_t now, floor = atomic64_read(&ctime_floor);
> +	struct timespec64 now_ts, cur_ts;
> +	u32 cur, old;
> +
> +	/* pairs with try_cmpxchg below */
> +	cur = smp_load_acquire(&inode->i_ctime_nsec);
> +	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
> +	cur_ts.tv_sec = inode->i_ctime_sec;
> +
> +	/* If the update is older than the existing value, skip it. */
> +	if (timespec64_compare(&update, &cur_ts) <= 0)
> +		return cur_ts;
> +
> +	now = coarse_ctime(floor);
> +	now_ts = ktime_to_timespec64(now);
> +
> +	/* Clamp the update to "now" if it's in the future */
> +	if (timespec64_compare(&update, &now_ts) > 0)
> +		update = now_ts;
> +
> +	update = timestamp_truncate(update, inode);
> +
> +	/* No need to update if the values are already the same */
> +	if (timespec64_equal(&update, &cur_ts))
> +		return cur_ts;
> +
> +	/*
> +	 * Try to swap the nsec value into place. If it fails, that means
> +	 * we raced with an update due to a write or similar activity. That
> +	 * stamp takes precedence, so just skip the update.
> +	 */
> +retry:
> +	old = cur;
> +	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
> +		inode->i_ctime_sec = update.tv_sec;
> +		mgtime_counter_inc(mg_ctime_swaps);
> +		return update;
> +	}
> +
> +	/*
> +	 * Was the change due to someone marking the old ctime QUERIED?
> +	 * If so then retry the swap. This can only happen once since
> +	 * the only way to clear I_CTIME_QUERIED is to stamp the inode
> +	 * with a new ctime.
> +	 */
> +	if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED)))
> +		goto retry;
> +
> +	/* Otherwise, it was a new timestamp. */
> +	cur_ts.tv_sec = inode->i_ctime_sec;
> +	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
> +	return cur_ts;
> +}
> +EXPORT_SYMBOL(inode_set_ctime_deleg);
> +
>  /**
>   * in_group_or_capable - check whether caller is CAP_FSETID privileged
>   * @idmap:	idmap of the mount @inode was found from
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index eff688e75f2f..ea7ed437d2b1 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1544,6 +1544,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
>  
>  struct timespec64 current_time(struct inode *inode);
>  struct timespec64 inode_set_ctime_current(struct inode *inode);
> +struct timespec64 inode_set_ctime_deleg(struct inode *inode,
> +					struct timespec64 update);
>  
>  static inline time64_t inode_get_atime_sec(const struct inode *inode)
>  {
> 
> -- 
> 2.46.0
>
Jeff Layton Sept. 5, 2024, 5:46 p.m. UTC | #2
On Thu, 2024-09-05 at 11:44 -0400, Chuck Lever wrote:
> On Thu, Sep 05, 2024 at 08:41:53AM -0400, Jeff Layton wrote:
> > When updating the ctime on an inode for a SETATTR with a multigrain
> > filesystem, we usually want to take the latest time we can get for the
> > ctime. The exception to this rule is when there is a nfsd write
> > delegation and the server is proxying timestamps from the client.
> > 
> > When nfsd gets a CB_GETATTR response, we want to update the timestamp
> > value in the inode to the values that the client is tracking. The client
> > doesn't send a ctime value (since that's always determined by the
> > exported filesystem), but it can send a mtime value. In the case where
> > it does, then we may need to update the ctime to a value commensurate
> > with that instead of the current time.
> > 
> > If ATTR_DELEG is set, then use ia_ctime value instead of setting the
> > timestamp to the current time.
> > 
> > With the addition of delegated timestamps we can also receive a request
> > to update only the atime, but we may not need to set the ctime. Trust
> > the ATTR_CTIME flag in the update and only update the ctime when it's
> > set.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/attr.c          | 28 +++++++++++++--------
> >  fs/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/fs.h |  2 ++
> >  3 files changed, 94 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/attr.c b/fs/attr.c
> > index 3bcbc45708a3..392eb62aa609 100644
> > --- a/fs/attr.c
> > +++ b/fs/attr.c
> > @@ -286,16 +286,20 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
> >  	unsigned int ia_valid = attr->ia_valid;
> >  	struct timespec64 now;
> >  
> > -	/*
> > -	 * If the ctime isn't being updated then nothing else should be
> > -	 * either.
> > -	 */
> > -	if (!(ia_valid & ATTR_CTIME)) {
> > -		WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME));
> > -		return;
> > +	if (ia_valid & ATTR_CTIME) {
> > +		/*
> > +		 * In the case of an update for a write delegation, we must respect
> > +		 * the value in ia_ctime and not use the current time.
> > +		 */
> > +		if (ia_valid & ATTR_DELEG)
> > +			now = inode_set_ctime_deleg(inode, attr->ia_ctime);
> > +		else
> > +			now = inode_set_ctime_current(inode);
> > +	} else {
> > +		/* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
> > +		WARN_ON_ONCE(ia_valid & ATTR_MTIME);
> >  	}
> >  
> > -	now = inode_set_ctime_current(inode);
> >  	if (ia_valid & ATTR_ATIME_SET)
> >  		inode_set_atime_to_ts(inode, attr->ia_atime);
> >  	else if (ia_valid & ATTR_ATIME)
> > @@ -354,8 +358,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
> >  		inode_set_atime_to_ts(inode, attr->ia_atime);
> >  	if (ia_valid & ATTR_MTIME)
> >  		inode_set_mtime_to_ts(inode, attr->ia_mtime);
> > -	if (ia_valid & ATTR_CTIME)
> > -		inode_set_ctime_to_ts(inode, attr->ia_ctime);
> > +	if (ia_valid & ATTR_CTIME) {
> > +		if (ia_valid & ATTR_DELEG)
> > +			inode_set_ctime_deleg(inode, attr->ia_ctime);
> > +		else
> > +			inode_set_ctime_to_ts(inode, attr->ia_ctime);
> > +	}
> >  }
> >  EXPORT_SYMBOL(setattr_copy);
> >  
> 
> This patch fails to apply cleanly to my copy of nfsd-next:
> 
>   error: `git apply --index`: error: patch failed: fs/attr.c:286
>   error: fs/attr.c: patch does not apply
> 
> Before I try jiggling this to get it to apply, is there anything
> I should know? I worry about a potential merge conflict here,
> hopefully it will be no more complicated than that.
> 
> 

This is based on a combo of your nfsd-next branch and Christian's
vfs.mgtime branch. This patch in particular depends on the multigrain
patches in his tree.

I think we can just drop this patch from the series in your tree, and
I'll get Christian to pick it up in his tree. The ctime setting will be
a bit racy without it but everything should still work.

Sound ok? Christian, are you OK with picking this up in vfs.mgtime?

> > diff --git a/fs/inode.c b/fs/inode.c
> > index 01f7df1973bd..f0fbfd470d8e 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -2835,6 +2835,80 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
> >  }
> >  EXPORT_SYMBOL(inode_set_ctime_current);
> >  
> > +/**
> > + * inode_set_ctime_deleg - try to update the ctime on a delegated inode
> > + * @inode: inode to update
> > + * @update: timespec64 to set the ctime
> > + *
> > + * Attempt to atomically update the ctime on behalf of a delegation holder.
> > + *
> > + * The nfs server can call back the holder of a delegation to get updated
> > + * inode attributes, including the mtime. When updating the mtime we may
> > + * need to update the ctime to a value at least equal to that.
> > + *
> > + * This can race with concurrent updates to the inode, in which
> > + * case we just don't do the update.
> > + *
> > + * Note that this works even when multigrain timestamps are not enabled,
> > + * so use it in either case.
> > + */
> > +struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update)
> > +{
> > +	ktime_t now, floor = atomic64_read(&ctime_floor);
> > +	struct timespec64 now_ts, cur_ts;
> > +	u32 cur, old;
> > +
> > +	/* pairs with try_cmpxchg below */
> > +	cur = smp_load_acquire(&inode->i_ctime_nsec);
> > +	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
> > +	cur_ts.tv_sec = inode->i_ctime_sec;
> > +
> > +	/* If the update is older than the existing value, skip it. */
> > +	if (timespec64_compare(&update, &cur_ts) <= 0)
> > +		return cur_ts;
> > +
> > +	now = coarse_ctime(floor);
> > +	now_ts = ktime_to_timespec64(now);
> > +
> > +	/* Clamp the update to "now" if it's in the future */
> > +	if (timespec64_compare(&update, &now_ts) > 0)
> > +		update = now_ts;
> > +
> > +	update = timestamp_truncate(update, inode);
> > +
> > +	/* No need to update if the values are already the same */
> > +	if (timespec64_equal(&update, &cur_ts))
> > +		return cur_ts;
> > +
> > +	/*
> > +	 * Try to swap the nsec value into place. If it fails, that means
> > +	 * we raced with an update due to a write or similar activity. That
> > +	 * stamp takes precedence, so just skip the update.
> > +	 */
> > +retry:
> > +	old = cur;
> > +	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
> > +		inode->i_ctime_sec = update.tv_sec;
> > +		mgtime_counter_inc(mg_ctime_swaps);
> > +		return update;
> > +	}
> > +
> > +	/*
> > +	 * Was the change due to someone marking the old ctime QUERIED?
> > +	 * If so then retry the swap. This can only happen once since
> > +	 * the only way to clear I_CTIME_QUERIED is to stamp the inode
> > +	 * with a new ctime.
> > +	 */
> > +	if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED)))
> > +		goto retry;
> > +
> > +	/* Otherwise, it was a new timestamp. */
> > +	cur_ts.tv_sec = inode->i_ctime_sec;
> > +	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
> > +	return cur_ts;
> > +}
> > +EXPORT_SYMBOL(inode_set_ctime_deleg);
> > +
> >  /**
> >   * in_group_or_capable - check whether caller is CAP_FSETID privileged
> >   * @idmap:	idmap of the mount @inode was found from
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index eff688e75f2f..ea7ed437d2b1 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1544,6 +1544,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
> >  
> >  struct timespec64 current_time(struct inode *inode);
> >  struct timespec64 inode_set_ctime_current(struct inode *inode);
> > +struct timespec64 inode_set_ctime_deleg(struct inode *inode,
> > +					struct timespec64 update);
> >  
> >  static inline time64_t inode_get_atime_sec(const struct inode *inode)
> >  {
> > 
> > -- 
> > 2.46.0
> > 
>
Christian Brauner Sept. 7, 2024, 10:21 a.m. UTC | #3
On Thu, Sep 05, 2024 at 01:46:14PM GMT, Jeff Layton wrote:
> On Thu, 2024-09-05 at 11:44 -0400, Chuck Lever wrote:
> > On Thu, Sep 05, 2024 at 08:41:53AM -0400, Jeff Layton wrote:
> > > When updating the ctime on an inode for a SETATTR with a multigrain
> > > filesystem, we usually want to take the latest time we can get for the
> > > ctime. The exception to this rule is when there is a nfsd write
> > > delegation and the server is proxying timestamps from the client.
> > > 
> > > When nfsd gets a CB_GETATTR response, we want to update the timestamp
> > > value in the inode to the values that the client is tracking. The client
> > > doesn't send a ctime value (since that's always determined by the
> > > exported filesystem), but it can send a mtime value. In the case where
> > > it does, then we may need to update the ctime to a value commensurate
> > > with that instead of the current time.
> > > 
> > > If ATTR_DELEG is set, then use ia_ctime value instead of setting the
> > > timestamp to the current time.
> > > 
> > > With the addition of delegated timestamps we can also receive a request
> > > to update only the atime, but we may not need to set the ctime. Trust
> > > the ATTR_CTIME flag in the update and only update the ctime when it's
> > > set.
> > > 
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > >  fs/attr.c          | 28 +++++++++++++--------
> > >  fs/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  include/linux/fs.h |  2 ++
> > >  3 files changed, 94 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/fs/attr.c b/fs/attr.c
> > > index 3bcbc45708a3..392eb62aa609 100644
> > > --- a/fs/attr.c
> > > +++ b/fs/attr.c
> > > @@ -286,16 +286,20 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
> > >  	unsigned int ia_valid = attr->ia_valid;
> > >  	struct timespec64 now;
> > >  
> > > -	/*
> > > -	 * If the ctime isn't being updated then nothing else should be
> > > -	 * either.
> > > -	 */
> > > -	if (!(ia_valid & ATTR_CTIME)) {
> > > -		WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME));
> > > -		return;
> > > +	if (ia_valid & ATTR_CTIME) {
> > > +		/*
> > > +		 * In the case of an update for a write delegation, we must respect
> > > +		 * the value in ia_ctime and not use the current time.
> > > +		 */
> > > +		if (ia_valid & ATTR_DELEG)
> > > +			now = inode_set_ctime_deleg(inode, attr->ia_ctime);
> > > +		else
> > > +			now = inode_set_ctime_current(inode);
> > > +	} else {
> > > +		/* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
> > > +		WARN_ON_ONCE(ia_valid & ATTR_MTIME);
> > >  	}
> > >  
> > > -	now = inode_set_ctime_current(inode);
> > >  	if (ia_valid & ATTR_ATIME_SET)
> > >  		inode_set_atime_to_ts(inode, attr->ia_atime);
> > >  	else if (ia_valid & ATTR_ATIME)
> > > @@ -354,8 +358,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
> > >  		inode_set_atime_to_ts(inode, attr->ia_atime);
> > >  	if (ia_valid & ATTR_MTIME)
> > >  		inode_set_mtime_to_ts(inode, attr->ia_mtime);
> > > -	if (ia_valid & ATTR_CTIME)
> > > -		inode_set_ctime_to_ts(inode, attr->ia_ctime);
> > > +	if (ia_valid & ATTR_CTIME) {
> > > +		if (ia_valid & ATTR_DELEG)
> > > +			inode_set_ctime_deleg(inode, attr->ia_ctime);
> > > +		else
> > > +			inode_set_ctime_to_ts(inode, attr->ia_ctime);
> > > +	}
> > >  }
> > >  EXPORT_SYMBOL(setattr_copy);
> > >  
> > 
> > This patch fails to apply cleanly to my copy of nfsd-next:
> > 
> >   error: `git apply --index`: error: patch failed: fs/attr.c:286
> >   error: fs/attr.c: patch does not apply
> > 
> > Before I try jiggling this to get it to apply, is there anything
> > I should know? I worry about a potential merge conflict here,
> > hopefully it will be no more complicated than that.
> > 
> > 
> 
> This is based on a combo of your nfsd-next branch and Christian's
> vfs.mgtime branch. This patch in particular depends on the multigrain
> patches in his tree.
> 
> I think we can just drop this patch from the series in your tree, and
> I'll get Christian to pick it up in his tree. The ctime setting will be
> a bit racy without it but everything should still work.
> 
> Sound ok? Christian, are you OK with picking this up in vfs.mgtime?

Yep, of course. Already picked it up.
Christian Brauner Sept. 7, 2024, 10:22 a.m. UTC | #4
On Thu, 05 Sep 2024 08:41:53 -0400, Jeff Layton wrote:
> When updating the ctime on an inode for a SETATTR with a multigrain
> filesystem, we usually want to take the latest time we can get for the
> ctime. The exception to this rule is when there is a nfsd write
> delegation and the server is proxying timestamps from the client.
> 
> When nfsd gets a CB_GETATTR response, we want to update the timestamp
> value in the inode to the values that the client is tracking. The client
> doesn't send a ctime value (since that's always determined by the
> exported filesystem), but it can send a mtime value. In the case where
> it does, then we may need to update the ctime to a value commensurate
> with that instead of the current time.
> 
> [...]

Applied to the vfs.mgtime branch of the vfs/vfs.git tree.
Patches in the vfs.mgtime branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.mgtime

[09/11] fs: handle delegated timestamps in setattr_copy_mgtime
        https://git.kernel.org/vfs/vfs/c/b0d5ea249d88
diff mbox series

Patch

diff --git a/fs/attr.c b/fs/attr.c
index 3bcbc45708a3..392eb62aa609 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -286,16 +286,20 @@  static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
 	unsigned int ia_valid = attr->ia_valid;
 	struct timespec64 now;
 
-	/*
-	 * If the ctime isn't being updated then nothing else should be
-	 * either.
-	 */
-	if (!(ia_valid & ATTR_CTIME)) {
-		WARN_ON_ONCE(ia_valid & (ATTR_ATIME|ATTR_MTIME));
-		return;
+	if (ia_valid & ATTR_CTIME) {
+		/*
+		 * In the case of an update for a write delegation, we must respect
+		 * the value in ia_ctime and not use the current time.
+		 */
+		if (ia_valid & ATTR_DELEG)
+			now = inode_set_ctime_deleg(inode, attr->ia_ctime);
+		else
+			now = inode_set_ctime_current(inode);
+	} else {
+		/* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
+		WARN_ON_ONCE(ia_valid & ATTR_MTIME);
 	}
 
-	now = inode_set_ctime_current(inode);
 	if (ia_valid & ATTR_ATIME_SET)
 		inode_set_atime_to_ts(inode, attr->ia_atime);
 	else if (ia_valid & ATTR_ATIME)
@@ -354,8 +358,12 @@  void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
 		inode_set_atime_to_ts(inode, attr->ia_atime);
 	if (ia_valid & ATTR_MTIME)
 		inode_set_mtime_to_ts(inode, attr->ia_mtime);
-	if (ia_valid & ATTR_CTIME)
-		inode_set_ctime_to_ts(inode, attr->ia_ctime);
+	if (ia_valid & ATTR_CTIME) {
+		if (ia_valid & ATTR_DELEG)
+			inode_set_ctime_deleg(inode, attr->ia_ctime);
+		else
+			inode_set_ctime_to_ts(inode, attr->ia_ctime);
+	}
 }
 EXPORT_SYMBOL(setattr_copy);
 
diff --git a/fs/inode.c b/fs/inode.c
index 01f7df1973bd..f0fbfd470d8e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2835,6 +2835,80 @@  struct timespec64 inode_set_ctime_current(struct inode *inode)
 }
 EXPORT_SYMBOL(inode_set_ctime_current);
 
+/**
+ * inode_set_ctime_deleg - try to update the ctime on a delegated inode
+ * @inode: inode to update
+ * @update: timespec64 to set the ctime
+ *
+ * Attempt to atomically update the ctime on behalf of a delegation holder.
+ *
+ * The nfs server can call back the holder of a delegation to get updated
+ * inode attributes, including the mtime. When updating the mtime we may
+ * need to update the ctime to a value at least equal to that.
+ *
+ * This can race with concurrent updates to the inode, in which
+ * case we just don't do the update.
+ *
+ * Note that this works even when multigrain timestamps are not enabled,
+ * so use it in either case.
+ */
+struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update)
+{
+	ktime_t now, floor = atomic64_read(&ctime_floor);
+	struct timespec64 now_ts, cur_ts;
+	u32 cur, old;
+
+	/* pairs with try_cmpxchg below */
+	cur = smp_load_acquire(&inode->i_ctime_nsec);
+	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
+	cur_ts.tv_sec = inode->i_ctime_sec;
+
+	/* If the update is older than the existing value, skip it. */
+	if (timespec64_compare(&update, &cur_ts) <= 0)
+		return cur_ts;
+
+	now = coarse_ctime(floor);
+	now_ts = ktime_to_timespec64(now);
+
+	/* Clamp the update to "now" if it's in the future */
+	if (timespec64_compare(&update, &now_ts) > 0)
+		update = now_ts;
+
+	update = timestamp_truncate(update, inode);
+
+	/* No need to update if the values are already the same */
+	if (timespec64_equal(&update, &cur_ts))
+		return cur_ts;
+
+	/*
+	 * Try to swap the nsec value into place. If it fails, that means
+	 * we raced with an update due to a write or similar activity. That
+	 * stamp takes precedence, so just skip the update.
+	 */
+retry:
+	old = cur;
+	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
+		inode->i_ctime_sec = update.tv_sec;
+		mgtime_counter_inc(mg_ctime_swaps);
+		return update;
+	}
+
+	/*
+	 * Was the change due to someone marking the old ctime QUERIED?
+	 * If so then retry the swap. This can only happen once since
+	 * the only way to clear I_CTIME_QUERIED is to stamp the inode
+	 * with a new ctime.
+	 */
+	if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED)))
+		goto retry;
+
+	/* Otherwise, it was a new timestamp. */
+	cur_ts.tv_sec = inode->i_ctime_sec;
+	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
+	return cur_ts;
+}
+EXPORT_SYMBOL(inode_set_ctime_deleg);
+
 /**
  * in_group_or_capable - check whether caller is CAP_FSETID privileged
  * @idmap:	idmap of the mount @inode was found from
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eff688e75f2f..ea7ed437d2b1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1544,6 +1544,8 @@  static inline bool fsuidgid_has_mapping(struct super_block *sb,
 
 struct timespec64 current_time(struct inode *inode);
 struct timespec64 inode_set_ctime_current(struct inode *inode);
+struct timespec64 inode_set_ctime_deleg(struct inode *inode,
+					struct timespec64 update);
 
 static inline time64_t inode_get_atime_sec(const struct inode *inode)
 {