diff mbox series

[01/10] fs: turn inode ctime fields into a single ktime_t

Message ID 20240626-mgtime-v1-1-a189352d0f8f@kernel.org (mailing list archive)
State Superseded
Headers show
Series fs: multigrain timestamp redux | expand

Commit Message

Jeff Layton June 27, 2024, 1 a.m. UTC
The ctime is not settable to arbitrary values. It always comes from the
system clock, so we'll never stamp an inode with a value that can't be
represented there. If we disregard people setting their system clock
past the year 2262, there is no reason we can't replace the ctime fields
with a ktime_t.

Switch the ctime fields to a single ktime_t. Move the i_generation down
above i_fsnotify_mask and then move the i_version into the resulting 8
byte hole. This shrinks struct inode by 8 bytes total, and should
improve the cache footprint as the i_version and ctime are usually
updated together.

The one downside I can see to switching to a ktime_t is that if someone
has a filesystem with files on it that has ctimes outside the ktime_t
range (before ~1678 AD or after ~2262 AD), we won't be able to display
them properly in stat() without some special treatment in the
filesystem. The operating assumption here is that that is not a
practical problem.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 include/linux/fs.h | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

Comments

Darrick J. Wong July 1, 2024, 10:49 p.m. UTC | #1
On Wed, Jun 26, 2024 at 09:00:21PM -0400, Jeff Layton wrote:
> The ctime is not settable to arbitrary values. It always comes from the
> system clock, so we'll never stamp an inode with a value that can't be
> represented there. If we disregard people setting their system clock
> past the year 2262, there is no reason we can't replace the ctime fields
> with a ktime_t.
> 
> Switch the ctime fields to a single ktime_t. Move the i_generation down
> above i_fsnotify_mask and then move the i_version into the resulting 8
> byte hole. This shrinks struct inode by 8 bytes total, and should
> improve the cache footprint as the i_version and ctime are usually
> updated together.
> 
> The one downside I can see to switching to a ktime_t is that if someone
> has a filesystem with files on it that has ctimes outside the ktime_t
> range (before ~1678 AD or after ~2262 AD), we won't be able to display
> them properly in stat() without some special treatment in the
> filesystem. The operating assumption here is that that is not a
> practical problem.

What happens if a filesystem with the ability to store ctimes beyond
whatever ktime_t supports (AFAICT 2^63-1 nanonseconds on either side of
the Unix epoch)?  I think the behavior with your patch is that ktime_set
clamps the ctime on iget because the kernel can't handle it?

It's a little surprising that the ctime will suddenly jump back in time
to 2262, but maybe you're right that nobody will notice or care? ;)

--D

> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  include/linux/fs.h | 26 +++++++++++---------------
>  1 file changed, 11 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 5ff362277834..5139dec085f2 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -662,11 +662,10 @@ struct inode {
>  	loff_t			i_size;
>  	time64_t		i_atime_sec;
>  	time64_t		i_mtime_sec;
> -	time64_t		i_ctime_sec;
>  	u32			i_atime_nsec;
>  	u32			i_mtime_nsec;
> -	u32			i_ctime_nsec;
> -	u32			i_generation;
> +	ktime_t			__i_ctime;
> +	atomic64_t		i_version;
>  	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
>  	unsigned short          i_bytes;
>  	u8			i_blkbits;
> @@ -701,7 +700,6 @@ struct inode {
>  		struct hlist_head	i_dentry;
>  		struct rcu_head		i_rcu;
>  	};
> -	atomic64_t		i_version;
>  	atomic64_t		i_sequence; /* see futex */
>  	atomic_t		i_count;
>  	atomic_t		i_dio_count;
> @@ -724,6 +722,8 @@ struct inode {
>  	};
>  
>  
> +	u32			i_generation;
> +
>  #ifdef CONFIG_FSNOTIFY
>  	__u32			i_fsnotify_mask; /* all events this inode cares about */
>  	/* 32-bit hole reserved for expanding i_fsnotify_mask */
> @@ -1608,29 +1608,25 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
>  	return inode_set_mtime_to_ts(inode, ts);
>  }
>  
> -static inline time64_t inode_get_ctime_sec(const struct inode *inode)
> +static inline struct timespec64 inode_get_ctime(const struct inode *inode)
>  {
> -	return inode->i_ctime_sec;
> +	return ktime_to_timespec64(inode->__i_ctime);
>  }
>  
> -static inline long inode_get_ctime_nsec(const struct inode *inode)
> +static inline time64_t inode_get_ctime_sec(const struct inode *inode)
>  {
> -	return inode->i_ctime_nsec;
> +	return inode_get_ctime(inode).tv_sec;
>  }
>  
> -static inline struct timespec64 inode_get_ctime(const struct inode *inode)
> +static inline long inode_get_ctime_nsec(const struct inode *inode)
>  {
> -	struct timespec64 ts = { .tv_sec  = inode_get_ctime_sec(inode),
> -				 .tv_nsec = inode_get_ctime_nsec(inode) };
> -
> -	return ts;
> +	return inode_get_ctime(inode).tv_nsec;
>  }
>  
>  static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
>  						      struct timespec64 ts)
>  {
> -	inode->i_ctime_sec = ts.tv_sec;
> -	inode->i_ctime_nsec = ts.tv_nsec;
> +	inode->__i_ctime = ktime_set(ts.tv_sec, ts.tv_nsec);
>  	return ts;
>  }
>  
> 
> -- 
> 2.45.2
> 
>
Jeff Layton July 2, 2024, 12:22 a.m. UTC | #2
On Mon, 2024-07-01 at 15:49 -0700, Darrick J. Wong wrote:
> On Wed, Jun 26, 2024 at 09:00:21PM -0400, Jeff Layton wrote:
> > The ctime is not settable to arbitrary values. It always comes from the
> > system clock, so we'll never stamp an inode with a value that can't be
> > represented there. If we disregard people setting their system clock
> > past the year 2262, there is no reason we can't replace the ctime fields
> > with a ktime_t.
> > 
> > Switch the ctime fields to a single ktime_t. Move the i_generation down
> > above i_fsnotify_mask and then move the i_version into the resulting 8
> > byte hole. This shrinks struct inode by 8 bytes total, and should
> > improve the cache footprint as the i_version and ctime are usually
> > updated together.
> > 
> > The one downside I can see to switching to a ktime_t is that if someone
> > has a filesystem with files on it that has ctimes outside the ktime_t
> > range (before ~1678 AD or after ~2262 AD), we won't be able to display
> > them properly in stat() without some special treatment in the
> > filesystem. The operating assumption here is that that is not a
> > practical problem.
> 
> What happens if a filesystem with the ability to store ctimes beyond
> whatever ktime_t supports (AFAICT 2^63-1 nanonseconds on either side of
> the Unix epoch)?  I think the behavior with your patch is that ktime_set
> clamps the ctime on iget because the kernel can't handle it?
> 
> It's a little surprising that the ctime will suddenly jump back in time
> to 2262, but maybe you're right that nobody will notice or care? ;)
> 
> 

Yeah, it'd be clamped at KTIME_MAX when we pull in the inode from disk,
a'la ktime_set.

I think it's important to note that the ctime is not settable from
userland, so if an on-disk ctime is outside of the ktime_t range, there
are only two possibilities:

1) the system clock was set to some time (far) in the future when the
file's metadata was last altered (bad clock? time traveling fs?).

...or...

2) the filesystem has been altered (fuzzing? deliberate doctoring?).

None of these seem like legitimate use cases so I'm arguing that we
shouldn't worry about them.

(...ok maybe the time travel one could be legit, but someone needs to
step up and make a case for it, if so.)

> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  include/linux/fs.h | 26 +++++++++++---------------
> >  1 file changed, 11 insertions(+), 15 deletions(-)
> > 
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 5ff362277834..5139dec085f2 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -662,11 +662,10 @@ struct inode {
> >  	loff_t			i_size;
> >  	time64_t		i_atime_sec;
> >  	time64_t		i_mtime_sec;
> > -	time64_t		i_ctime_sec;
> >  	u32			i_atime_nsec;
> >  	u32			i_mtime_nsec;
> > -	u32			i_ctime_nsec;
> > -	u32			i_generation;
> > +	ktime_t			__i_ctime;
> > +	atomic64_t		i_version;
> >  	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
> >  	unsigned short          i_bytes;
> >  	u8			i_blkbits;
> > @@ -701,7 +700,6 @@ struct inode {
> >  		struct hlist_head	i_dentry;
> >  		struct rcu_head		i_rcu;
> >  	};
> > -	atomic64_t		i_version;
> >  	atomic64_t		i_sequence; /* see futex */
> >  	atomic_t		i_count;
> >  	atomic_t		i_dio_count;
> > @@ -724,6 +722,8 @@ struct inode {
> >  	};
> >  
> >  
> > +	u32			i_generation;
> > +
> >  #ifdef CONFIG_FSNOTIFY
> >  	__u32			i_fsnotify_mask; /* all events this inode cares about */
> >  	/* 32-bit hole reserved for expanding i_fsnotify_mask */
> > @@ -1608,29 +1608,25 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
> >  	return inode_set_mtime_to_ts(inode, ts);
> >  }
> >  
> > -static inline time64_t inode_get_ctime_sec(const struct inode *inode)
> > +static inline struct timespec64 inode_get_ctime(const struct inode *inode)
> >  {
> > -	return inode->i_ctime_sec;
> > +	return ktime_to_timespec64(inode->__i_ctime);
> >  }
> >  
> > -static inline long inode_get_ctime_nsec(const struct inode *inode)
> > +static inline time64_t inode_get_ctime_sec(const struct inode *inode)
> >  {
> > -	return inode->i_ctime_nsec;
> > +	return inode_get_ctime(inode).tv_sec;
> >  }
> >  
> > -static inline struct timespec64 inode_get_ctime(const struct inode *inode)
> > +static inline long inode_get_ctime_nsec(const struct inode *inode)
> >  {
> > -	struct timespec64 ts = { .tv_sec  = inode_get_ctime_sec(inode),
> > -				 .tv_nsec = inode_get_ctime_nsec(inode) };
> > -
> > -	return ts;
> > +	return inode_get_ctime(inode).tv_nsec;
> >  }
> >  
> >  static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
> >  						      struct timespec64 ts)
> >  {
> > -	inode->i_ctime_sec = ts.tv_sec;
> > -	inode->i_ctime_nsec = ts.tv_nsec;
> > +	inode->__i_ctime = ktime_set(ts.tv_sec, ts.tv_nsec);
> >  	return ts;
> >  }
> >  
> > 
> > -- 
> > 2.45.2
> > 
> >
Christoph Hellwig July 2, 2024, 7:37 a.m. UTC | #3
On Mon, Jul 01, 2024 at 08:22:07PM -0400, Jeff Layton wrote:
> 2) the filesystem has been altered (fuzzing? deliberate doctoring?).
> 
> None of these seem like legitimate use cases so I'm arguing that we
> shouldn't worry about them.

Not worry seems like the wrong answer here.  Either we decide they
are legitimate enough and we preserve them, or we decide they are
bogus and refuse reading the inode.  But we'll need to consciously
deal with the case.
Jeff Layton July 2, 2024, 9:56 a.m. UTC | #4
On Tue, 2024-07-02 at 00:37 -0700, Christoph Hellwig wrote:
> On Mon, Jul 01, 2024 at 08:22:07PM -0400, Jeff Layton wrote:
> > 2) the filesystem has been altered (fuzzing? deliberate doctoring?).
> > 
> > None of these seem like legitimate use cases so I'm arguing that we
> > shouldn't worry about them.
> 
> Not worry seems like the wrong answer here.  Either we decide they
> are legitimate enough and we preserve them, or we decide they are
> bogus and refuse reading the inode.  But we'll need to consciously
> deal with the case.
> 

Is there a problem with consciously dealing with it by clamping the
time at KTIME_MAX? If I had a fs with corrupt timestamps, the last
thing I'd want is the filesystem refusing to let me at my data because
of them.
Jan Kara July 2, 2024, 10:19 a.m. UTC | #5
On Tue 02-07-24 05:56:37, Jeff Layton wrote:
> On Tue, 2024-07-02 at 00:37 -0700, Christoph Hellwig wrote:
> > On Mon, Jul 01, 2024 at 08:22:07PM -0400, Jeff Layton wrote:
> > > 2) the filesystem has been altered (fuzzing? deliberate doctoring?).
> > > 
> > > None of these seem like legitimate use cases so I'm arguing that we
> > > shouldn't worry about them.
> > 
> > Not worry seems like the wrong answer here.  Either we decide they
> > are legitimate enough and we preserve them, or we decide they are
> > bogus and refuse reading the inode.  But we'll need to consciously
> > deal with the case.
> > 
> 
> Is there a problem with consciously dealing with it by clamping the
> time at KTIME_MAX? If I had a fs with corrupt timestamps, the last
> thing I'd want is the filesystem refusing to let me at my data because
> of them.

Well, you could also view it differently: If I have a fs that corrupts time
stamps, the last thing I'd like is that the kernel silently accepts it
without telling me about it :)

But more seriously, my filesystem development experience shows that if the
kernel silently tries to accept and fixup the breakage, it is nice in the
short term (no complaining users) but it tends to get ugly in the long term
(where tend people come up with nasty cases where it was wrong to fix it
up). So I think Christoph's idea of refusing to load inodes with ctimes out
of range makes sense. Or at least complain about it if nothing else (which
has some precedens in the year 2038 problem).

								Honza
Christian Brauner July 2, 2024, 11:42 a.m. UTC | #6
> if the
> kernel silently tries to accept and fixup the breakage, it is nice in the
> short term (no complaining users) but it tends to get ugly in the long term
> (where tend people come up with nasty cases where it was wrong to fix it
> up).

Yeah, very much agree. It works for simple APIs sometimes but it's
certainly not something we should just do for filesystems with actual
on-disk format.
Jeff Layton July 2, 2024, 11:44 a.m. UTC | #7
On Tue, 2024-07-02 at 12:19 +0200, Jan Kara wrote:
> On Tue 02-07-24 05:56:37, Jeff Layton wrote:
> > On Tue, 2024-07-02 at 00:37 -0700, Christoph Hellwig wrote:
> > > On Mon, Jul 01, 2024 at 08:22:07PM -0400, Jeff Layton wrote:
> > > > 2) the filesystem has been altered (fuzzing? deliberate doctoring?).
> > > > 
> > > > None of these seem like legitimate use cases so I'm arguing that we
> > > > shouldn't worry about them.
> > > 
> > > Not worry seems like the wrong answer here.  Either we decide they
> > > are legitimate enough and we preserve them, or we decide they are
> > > bogus and refuse reading the inode.  But we'll need to consciously
> > > deal with the case.
> > > 
> > 
> > Is there a problem with consciously dealing with it by clamping the
> > time at KTIME_MAX? If I had a fs with corrupt timestamps, the last
> > thing I'd want is the filesystem refusing to let me at my data because
> > of them.
> 
> Well, you could also view it differently: If I have a fs that corrupts time
> stamps, the last thing I'd like is that the kernel silently accepts it
> without telling me about it :)
> 

Fair enough.

> But more seriously, my filesystem development experience shows that if the
> kernel silently tries to accept and fixup the breakage, it is nice in the
> short term (no complaining users) but it tends to get ugly in the long term
> (where tend people come up with nasty cases where it was wrong to fix it
> up). So I think Christoph's idea of refusing to load inodes with ctimes out
> of range makes sense. Or at least complain about it if nothing else (which
> has some precedens in the year 2038 problem).

Complaining about it is fairly simple. We could just throw a pr_warn in
inode_set_ctime_to_ts when the time comes back as KTIME_MAX. This might
also be what we need to do for filesystems like NFS, where a future
ctime on the server is not necessarily a problem for the client.

Refusing to load the inode on disk-based filesystems is harder, but is
probably possible. There are ~90 calls to inode_set_ctime_to_ts in the
kernel, so we'd need to vet the places that are loading times from disk
images or the like and fix them to return errors in this situation.

Is warning acceptable, or do we really need to reject inodes that have
corrupt timestamps like this?
Christoph Hellwig July 2, 2024, 12:04 p.m. UTC | #8
On Tue, Jul 02, 2024 at 07:44:19AM -0400, Jeff Layton wrote:
> Complaining about it is fairly simple. We could just throw a pr_warn in
> inode_set_ctime_to_ts when the time comes back as KTIME_MAX. This might
> also be what we need to do for filesystems like NFS, where a future
> ctime on the server is not necessarily a problem for the client.
> 
> Refusing to load the inode on disk-based filesystems is harder, but is
> probably possible. There are ~90 calls to inode_set_ctime_to_ts in the
> kernel, so we'd need to vet the places that are loading times from disk
> images or the like and fix them to return errors in this situation.
> 
> Is warning acceptable, or do we really need to reject inodes that have
> corrupt timestamps like this?

inode_set_ctime_to_ts should return an error if things are out of range.
How do we currently catch this when it comes from userland?
Jeff Layton July 2, 2024, 12:09 p.m. UTC | #9
On Tue, 2024-07-02 at 05:04 -0700, Christoph Hellwig wrote:
> On Tue, Jul 02, 2024 at 07:44:19AM -0400, Jeff Layton wrote:
> > Complaining about it is fairly simple. We could just throw a pr_warn in
> > inode_set_ctime_to_ts when the time comes back as KTIME_MAX. This might
> > also be what we need to do for filesystems like NFS, where a future
> > ctime on the server is not necessarily a problem for the client.
> > 
> > Refusing to load the inode on disk-based filesystems is harder, but is
> > probably possible. There are ~90 calls to inode_set_ctime_to_ts in the
> > kernel, so we'd need to vet the places that are loading times from disk
> > images or the like and fix them to return errors in this situation.
> > 
> > Is warning acceptable, or do we really need to reject inodes that have
> > corrupt timestamps like this?
> 
> inode_set_ctime_to_ts should return an error if things are out of range.

Currently it just returns the timespec64 we're setting it to (which
makes it easy to do several assignments), so we'd need to change its
prototype to handle this case, and fix up the callers to recognize the
error.

Alternately it may be easier to just add in a test for when
__i_ctime == KTIME_MAX in the appropriate callers and have them error
out. I'll have a look and see what makes sense.

> How do we currently catch this when it comes from userland?
> 

Not sure I understand this question. ctime values should never come
from userland. They should only ever come from the system clock.
Christoph Hellwig July 2, 2024, 12:15 p.m. UTC | #10
On Tue, Jul 02, 2024 at 08:09:46AM -0400, Jeff Layton wrote:
> > > corrupt timestamps like this?
> > 
> > inode_set_ctime_to_ts should return an error if things are out of range.
> 
> Currently it just returns the timespec64 we're setting it to (which
> makes it easy to do several assignments), so we'd need to change its
> prototype to handle this case, and fix up the callers to recognize the
> error.
> 
> Alternately it may be easier to just add in a test for when
> __i_ctime == KTIME_MAX in the appropriate callers and have them error
> out. I'll have a look and see what makes sense.

The seems like a more awkward interface vs one that explicitly checks.

> 
> > How do we currently catch this when it comes from userland?
> > 
> 
> Not sure I understand this question. ctime values should never come
> from userland. They should only ever come from the system clock.

Ah, yes, utimes only changes mtime.
Jeff Layton July 2, 2024, 12:21 p.m. UTC | #11
On Tue, 2024-07-02 at 05:15 -0700, Christoph Hellwig wrote:
> On Tue, Jul 02, 2024 at 08:09:46AM -0400, Jeff Layton wrote:
> > > > corrupt timestamps like this?
> > > 
> > > inode_set_ctime_to_ts should return an error if things are out of
> > > range.
> > 
> > Currently it just returns the timespec64 we're setting it to (which
> > makes it easy to do several assignments), so we'd need to change
> > its
> > prototype to handle this case, and fix up the callers to recognize
> > the
> > error.
> > 
> > Alternately it may be easier to just add in a test for when
> > __i_ctime == KTIME_MAX in the appropriate callers and have them
> > error
> > out. I'll have a look and see what makes sense.
> 
> The seems like a more awkward interface vs one that explicitly
> checks.
> 

Many of the existing callers of inode_ctime_to_ts are in void return
functions. They're just copying data from an internal representation to
struct inode and assume it always succeeds. For those we'll probably
have to catch bad ctime values earlier.

So, I think I'll probably have to roll bespoke error handling in all of
the relevant filesystems if we go this route. There are also
differences between filesystems -- does it make sense to refuse to load
an inode with a bogus ctime on NFS or AFS? Probably not.

Hell, it may be simpler to just ditch this patch and reimplement
mgtimes using the nanosecond fields like the earlier versions did.

I'll need to study this a bit and figure out what's best.

> > 
> > > How do we currently catch this when it comes from userland?
> > > 
> > 
> > Not sure I understand this question. ctime values should never come
> > from userland. They should only ever come from the system clock.
> 
> Ah, yes, utimes only changes mtime.

Yep. That's the main reason I see the ctime as very different from the
atime or mtime.
Christoph Hellwig July 2, 2024, 3:12 p.m. UTC | #12
On Tue, Jul 02, 2024 at 08:21:42AM -0400, Jeff Layton wrote:
> Many of the existing callers of inode_ctime_to_ts are in void return
> functions. They're just copying data from an internal representation to
> struct inode and assume it always succeeds. For those we'll probably
> have to catch bad ctime values earlier.
> 
> So, I think I'll probably have to roll bespoke error handling in all of
> the relevant filesystems if we go this route. There are also
> differences between filesystems -- does it make sense to refuse to load
> an inode with a bogus ctime on NFS or AFS? Probably not.
> 
> Hell, it may be simpler to just ditch this patch and reimplement
> mgtimes using the nanosecond fields like the earlier versions did.

Thatdoes for sure sound simpler.  What is the big advantage of the
ktime_t?  Smaller size?
Jeff Layton July 2, 2024, 3:58 p.m. UTC | #13
On Tue, 2024-07-02 at 08:12 -0700, Christoph Hellwig wrote:
> On Tue, Jul 02, 2024 at 08:21:42AM -0400, Jeff Layton wrote:
> > Many of the existing callers of inode_ctime_to_ts are in void
> > return
> > functions. They're just copying data from an internal
> > representation to
> > struct inode and assume it always succeeds. For those we'll
> > probably
> > have to catch bad ctime values earlier.
> > 
> > So, I think I'll probably have to roll bespoke error handling in
> > all of
> > the relevant filesystems if we go this route. There are also
> > differences between filesystems -- does it make sense to refuse to
> > load
> > an inode with a bogus ctime on NFS or AFS? Probably not.
> > 
> > Hell, it may be simpler to just ditch this patch and reimplement
> > mgtimes using the nanosecond fields like the earlier versions did.
> 
> Thatdoes for sure sound simpler.  What is the big advantage of the
> ktime_t?  Smaller size?
> 

Yeah, mostly. We shrink struct inode by 8 bytes with that patch, and we
(probably) get a better cache footprint, since i_version ends up in the
same cacheline as the ctime. That's really a separate issue though, so
I'm not too worked up about dropping that patch.

As a bonus, leaving it split across separate fields means that we can
use unused bits in the nsec field for the flag, so we don't need to
sacrifice any timestamp granularity either.

I've got a draft rework that does this that I'm testing now. Assuming
it works OK, I'll resend in a few days.

Thanks for the feedback!
Christian Brauner July 2, 2024, 4:18 p.m. UTC | #14
On Tue, Jul 02, 2024 at 08:21:42AM GMT, Jeff Layton wrote:
> On Tue, 2024-07-02 at 05:15 -0700, Christoph Hellwig wrote:
> > On Tue, Jul 02, 2024 at 08:09:46AM -0400, Jeff Layton wrote:
> > > > > corrupt timestamps like this?
> > > > 
> > > > inode_set_ctime_to_ts should return an error if things are out of
> > > > range.
> > > 
> > > Currently it just returns the timespec64 we're setting it to (which
> > > makes it easy to do several assignments), so we'd need to change
> > > its
> > > prototype to handle this case, and fix up the callers to recognize
> > > the
> > > error.
> > > 
> > > Alternately it may be easier to just add in a test for when
> > > __i_ctime == KTIME_MAX in the appropriate callers and have them
> > > error
> > > out. I'll have a look and see what makes sense.
> > 
> > The seems like a more awkward interface vs one that explicitly
> > checks.
> > 
> 
> Many of the existing callers of inode_ctime_to_ts are in void return
> functions. They're just copying data from an internal representation to
> struct inode and assume it always succeeds. For those we'll probably
> have to catch bad ctime values earlier.
> 
> So, I think I'll probably have to roll bespoke error handling in all of
> the relevant filesystems if we go this route. There are also

Shudder, let's try and avoid that.
Christoph Hellwig July 3, 2024, 5:26 a.m. UTC | #15
On Tue, Jul 02, 2024 at 11:58:02AM -0400, Jeff Layton wrote:
> Yeah, mostly. We shrink struct inode by 8 bytes with that patch, and we
> (probably) get a better cache footprint, since i_version ends up in the
> same cacheline as the ctime. That's really a separate issue though, so
> I'm not too worked up about dropping that patch.
> 
> As a bonus, leaving it split across separate fields means that we can
> use unused bits in the nsec field for the flag, so we don't need to
> sacrifice any timestamp granularity either.
> 
> I've got a draft rework that does this that I'm testing now. Assuming
> it works OK, I'll resend in a few days.

So while shrinking the inodes sounds nice, the tradeoff to have to
check all timestamps from disk / the server for validity doesn't
sound as positive.  So I'm glade we're avoiding this at least for.
Christoph Hellwig July 3, 2024, 5:27 a.m. UTC | #16
On Tue, Jul 02, 2024 at 10:26:55PM -0700, Christoph Hellwig wrote:
> So while shrinking the inodes sounds nice, the tradeoff to have to
> check all timestamps from disk / the server for validity doesn't
> sound as positive.  So I'm glade we're avoiding this at least for.

... now.
diff mbox series

Patch

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ff362277834..5139dec085f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -662,11 +662,10 @@  struct inode {
 	loff_t			i_size;
 	time64_t		i_atime_sec;
 	time64_t		i_mtime_sec;
-	time64_t		i_ctime_sec;
 	u32			i_atime_nsec;
 	u32			i_mtime_nsec;
-	u32			i_ctime_nsec;
-	u32			i_generation;
+	ktime_t			__i_ctime;
+	atomic64_t		i_version;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	u8			i_blkbits;
@@ -701,7 +700,6 @@  struct inode {
 		struct hlist_head	i_dentry;
 		struct rcu_head		i_rcu;
 	};
-	atomic64_t		i_version;
 	atomic64_t		i_sequence; /* see futex */
 	atomic_t		i_count;
 	atomic_t		i_dio_count;
@@ -724,6 +722,8 @@  struct inode {
 	};
 
 
+	u32			i_generation;
+
 #ifdef CONFIG_FSNOTIFY
 	__u32			i_fsnotify_mask; /* all events this inode cares about */
 	/* 32-bit hole reserved for expanding i_fsnotify_mask */
@@ -1608,29 +1608,25 @@  static inline struct timespec64 inode_set_mtime(struct inode *inode,
 	return inode_set_mtime_to_ts(inode, ts);
 }
 
-static inline time64_t inode_get_ctime_sec(const struct inode *inode)
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-	return inode->i_ctime_sec;
+	return ktime_to_timespec64(inode->__i_ctime);
 }
 
-static inline long inode_get_ctime_nsec(const struct inode *inode)
+static inline time64_t inode_get_ctime_sec(const struct inode *inode)
 {
-	return inode->i_ctime_nsec;
+	return inode_get_ctime(inode).tv_sec;
 }
 
-static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+static inline long inode_get_ctime_nsec(const struct inode *inode)
 {
-	struct timespec64 ts = { .tv_sec  = inode_get_ctime_sec(inode),
-				 .tv_nsec = inode_get_ctime_nsec(inode) };
-
-	return ts;
+	return inode_get_ctime(inode).tv_nsec;
 }
 
 static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
 						      struct timespec64 ts)
 {
-	inode->i_ctime_sec = ts.tv_sec;
-	inode->i_ctime_nsec = ts.tv_nsec;
+	inode->__i_ctime = ktime_set(ts.tv_sec, ts.tv_nsec);
 	return ts;
 }