diff mbox

[RFC,1/2] vfs: update overlay inode times on write

Message ID 1514896009-11468-2-git-send-email-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Amir Goldstein Jan. 2, 2018, 12:26 p.m. UTC
Currently with overlayfs, the real upper inode's i_mtime is updated on
write, but not overlay vfs inode. The vfs inode's i_mtime is used by nfsd
to check if nfs client cache is stale, so updating the overlay vfs inode
i_mtime on write is required for overlayfs NFS export support.

The non uptodate mtime issue was found and verified with the
nfstest_posix test when run over NFS exported overlayfs:

 $ nfstest_posix --runtest=write
 ...
 FAIL: write - file st_mtime should be updated

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/inode.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Vivek Goyal Jan. 2, 2018, 4:51 p.m. UTC | #1
On Tue, Jan 02, 2018 at 02:26:48PM +0200, Amir Goldstein wrote:
> Currently with overlayfs, the real upper inode's i_mtime is updated on
> write, but not overlay vfs inode. The vfs inode's i_mtime is used by nfsd
> to check if nfs client cache is stale, so updating the overlay vfs inode
> i_mtime on write is required for overlayfs NFS export support.
> 
> The non uptodate mtime issue was found and verified with the
> nfstest_posix test when run over NFS exported overlayfs:
> 
>  $ nfstest_posix --runtest=write
>  ...
>  FAIL: write - file st_mtime should be updated
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/inode.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 03102d6ef044..a252256f4e51 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1567,8 +1567,7 @@ EXPORT_SYMBOL(bmap);
>  /*
>   * Update times in overlayed inode from underlying real inode
>   */
> -static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
> -			       bool rcu)
> +static void update_ovl_d_inode_times(struct dentry *dentry, bool rcu)
>  {
>  	struct dentry *upperdentry;
>  
> @@ -1585,6 +1584,7 @@ static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
>  	 * stale mtime/ctime.
>  	 */
>  	if (upperdentry) {
> +		struct inode *inode = d_inode(dentry);
>  		struct inode *realinode = d_inode(upperdentry);
>  
>  		if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
> @@ -1607,7 +1607,8 @@ static int relatime_need_update(const struct path *path, struct inode *inode,
>  	if (!(path->mnt->mnt_flags & MNT_RELATIME))
>  		return 1;
>  
> -	update_ovl_inode_times(path->dentry, inode, rcu);
> +	update_ovl_d_inode_times(path->dentry, rcu);
> +

Hi Amir,

If we update overlay inode mtime, ctime in write path, then do we still
need this call in relatime_need_update() path? I mean what other path
is there where real inode mtime/ctime will be out of sync with overlay
inode mtime/ctime.

Vivek

>  	/*
>  	 * Is mtime younger than atime? If yes, update atime:
>  	 */
> @@ -1876,6 +1877,8 @@ int file_update_time(struct file *file)
>  	ret = update_time(inode, &now, sync_it);
>  	__mnt_drop_write_file(file);
>  
> +	update_ovl_d_inode_times(file->f_path.dentry, false);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(file_update_time);
> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Amir Goldstein Jan. 2, 2018, 5:27 p.m. UTC | #2
On Tue, Jan 2, 2018 at 6:51 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Tue, Jan 02, 2018 at 02:26:48PM +0200, Amir Goldstein wrote:
>> Currently with overlayfs, the real upper inode's i_mtime is updated on
>> write, but not overlay vfs inode. The vfs inode's i_mtime is used by nfsd
>> to check if nfs client cache is stale, so updating the overlay vfs inode
>> i_mtime on write is required for overlayfs NFS export support.
>>
>> The non uptodate mtime issue was found and verified with the
>> nfstest_posix test when run over NFS exported overlayfs:
>>
>>  $ nfstest_posix --runtest=write
>>  ...
>>  FAIL: write - file st_mtime should be updated
>>
>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>> ---
>>  fs/inode.c | 9 ++++++---
>>  1 file changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index 03102d6ef044..a252256f4e51 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -1567,8 +1567,7 @@ EXPORT_SYMBOL(bmap);
>>  /*
>>   * Update times in overlayed inode from underlying real inode
>>   */
>> -static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
>> -                            bool rcu)
>> +static void update_ovl_d_inode_times(struct dentry *dentry, bool rcu)
>>  {
>>       struct dentry *upperdentry;
>>
>> @@ -1585,6 +1584,7 @@ static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
>>        * stale mtime/ctime.
>>        */
>>       if (upperdentry) {
>> +             struct inode *inode = d_inode(dentry);
>>               struct inode *realinode = d_inode(upperdentry);
>>
>>               if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
>> @@ -1607,7 +1607,8 @@ static int relatime_need_update(const struct path *path, struct inode *inode,
>>       if (!(path->mnt->mnt_flags & MNT_RELATIME))
>>               return 1;
>>
>> -     update_ovl_inode_times(path->dentry, inode, rcu);
>> +     update_ovl_d_inode_times(path->dentry, rcu);
>> +
>
> Hi Amir,
>
> If we update overlay inode mtime, ctime in write path, then do we still
> need this call in relatime_need_update() path? I mean what other path
> is there where real inode mtime/ctime will be out of sync with overlay
> inode mtime/ctime.
>

Specifically, in the mentioned nfstest_posix test, i_mtime was not
updated on write of regular file and i_mtime of parent was not updated on link.
I guess there are more cases where i_mtime is updated not in the
write() path. I can think of punch hole for regular files and even file system
specific ioctls, which overlayfs is not aware of.
To handle those cases, I added the 2nd patch, which is "mtime reader
oriented", just like the relatime_need_update() case.

The problem is that AFAIK there is no simple place where all
i_mtime updates can be intercepted. Perhaps the recent work by Jeff
to sort out i_version readers/writers will provide a better place
to intercept all mtime updates?? At least for underlying filesystems that
support i_version, we could update overlay inode times only if i_version
was updated, but I don't even know what would be the best implementation
choice for relatime_need_update() in that case.

Amir.
Jeff Layton Jan. 2, 2018, 6:48 p.m. UTC | #3
On Tue, 2018-01-02 at 14:26 +0200, Amir Goldstein wrote:
> Currently with overlayfs, the real upper inode's i_mtime is updated on
> write, but not overlay vfs inode. The vfs inode's i_mtime is used by nfsd
> to check if nfs client cache is stale, so updating the overlay vfs inode
> i_mtime on write is required for overlayfs NFS export support.
> 
> The non uptodate mtime issue was found and verified with the
> nfstest_posix test when run over NFS exported overlayfs:
> 
>  $ nfstest_posix --runtest=write
>  ...
>  FAIL: write - file st_mtime should be updated
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/inode.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 03102d6ef044..a252256f4e51 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1567,8 +1567,7 @@ EXPORT_SYMBOL(bmap);
>  /*
>   * Update times in overlayed inode from underlying real inode
>   */
> -static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
> -			       bool rcu)
> +static void update_ovl_d_inode_times(struct dentry *dentry, bool rcu)
>  {
>  	struct dentry *upperdentry;
>  
> @@ -1585,6 +1584,7 @@ static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
>  	 * stale mtime/ctime.
>  	 */
>  	if (upperdentry) {
> +		struct inode *inode = d_inode(dentry);
>  		struct inode *realinode = d_inode(upperdentry);
>  
>  		if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
> @@ -1607,7 +1607,8 @@ static int relatime_need_update(const struct path *path, struct inode *inode,
>  	if (!(path->mnt->mnt_flags & MNT_RELATIME))
>  		return 1;
>  
> -	update_ovl_inode_times(path->dentry, inode, rcu);
> +	update_ovl_d_inode_times(path->dentry, rcu);
> +
>  	/*
>  	 * Is mtime younger than atime? If yes, update atime:
>  	 */
> @@ -1876,6 +1877,8 @@ int file_update_time(struct file *file)
>  	ret = update_time(inode, &now, sync_it);
>  	__mnt_drop_write_file(file);
>  
> +	update_ovl_d_inode_times(file->f_path.dentry, false);
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(file_update_time);

This code all seems to be called from the relatime handling codepath,
but the problem statement is all about the mtime.

Is there a reason for that, or would this be better done in (e.g.)
ovl_update_time? Is this code trying to delay updating times until
something is trying to access it?
--
Jeff Layton <jlayton@kernel.org>
Amir Goldstein Jan. 2, 2018, 7:45 p.m. UTC | #4
On Tue, Jan 2, 2018 at 8:48 PM, Jeff Layton <jlayton@kernel.org> wrote:
> On Tue, 2018-01-02 at 14:26 +0200, Amir Goldstein wrote:
>> Currently with overlayfs, the real upper inode's i_mtime is updated on
>> write, but not overlay vfs inode. The vfs inode's i_mtime is used by nfsd
>> to check if nfs client cache is stale, so updating the overlay vfs inode
>> i_mtime on write is required for overlayfs NFS export support.
>>
>> The non uptodate mtime issue was found and verified with the
>> nfstest_posix test when run over NFS exported overlayfs:
>>
>>  $ nfstest_posix --runtest=write
>>  ...
>>  FAIL: write - file st_mtime should be updated
>>
>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>> ---
>>  fs/inode.c | 9 ++++++---
>>  1 file changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index 03102d6ef044..a252256f4e51 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -1567,8 +1567,7 @@ EXPORT_SYMBOL(bmap);
>>  /*
>>   * Update times in overlayed inode from underlying real inode
>>   */
>> -static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
>> -                            bool rcu)
>> +static void update_ovl_d_inode_times(struct dentry *dentry, bool rcu)
>>  {
>>       struct dentry *upperdentry;
>>
>> @@ -1585,6 +1584,7 @@ static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
>>        * stale mtime/ctime.
>>        */
>>       if (upperdentry) {
>> +             struct inode *inode = d_inode(dentry);
>>               struct inode *realinode = d_inode(upperdentry);
>>
>>               if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
>> @@ -1607,7 +1607,8 @@ static int relatime_need_update(const struct path *path, struct inode *inode,
>>       if (!(path->mnt->mnt_flags & MNT_RELATIME))
>>               return 1;
>>
>> -     update_ovl_inode_times(path->dentry, inode, rcu);
>> +     update_ovl_d_inode_times(path->dentry, rcu);
>> +
>>       /*
>>        * Is mtime younger than atime? If yes, update atime:
>>        */
>> @@ -1876,6 +1877,8 @@ int file_update_time(struct file *file)
>>       ret = update_time(inode, &now, sync_it);
>>       __mnt_drop_write_file(file);
>>
>> +     update_ovl_d_inode_times(file->f_path.dentry, false);
>> +
>>       return ret;
>>  }
>>  EXPORT_SYMBOL(file_update_time);
>
> This code all seems to be called from the relatime handling codepath,
> but the problem statement is all about the mtime.

See, currently update_ovl_inode_times() purpose is to update overlay
inode before the mtime reader relatime_need_update() access i_mtime.
Any other access to i_mtime (e.g. lease_get_mtime()) gets a non-update
version of i_mtime, because nobody bothers to call update_ovl_inode_times().

>
> Is there a reason for that, or would this be better done in (e.g.)
> ovl_update_time? Is this code trying to delay updating times until
> something is trying to access it?

There is no delaying requirement. The problem statement is that with
overlayfs there are 2 inodes in play, The overlay inode and the real underlying
inode. After an overlay regular file is open, file_inode(file) points
to the underlying real inode,
so write() only knowns about the real underlying inode and i_mtime is typically
only ever updated on real underlying inode. ovl_update_time() is thus
never called
on the write() path.

locks_inode(file) := file->f_path.dentry->d_inode is the overlay
inode, which is the
one used throughout locks.c, so the patch above calls  update_ovl_inode_times()
from file_update_time() with the overlay inode to copy i_mtime from
real to overlay
inode.

Patch 2/2, which seems like the better option to me, calls
update_ovl_inode_times()
from the i_mtime reader lease_get_mtime() before i_mtime access.

Is this making more sense to you?

Amir.
Dave Chinner Jan. 2, 2018, 9:26 p.m. UTC | #5
On Tue, Jan 02, 2018 at 07:27:41PM +0200, Amir Goldstein wrote:
> On Tue, Jan 2, 2018 at 6:51 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > On Tue, Jan 02, 2018 at 02:26:48PM +0200, Amir Goldstein wrote:
> >> Currently with overlayfs, the real upper inode's i_mtime is updated on
> >> write, but not overlay vfs inode. The vfs inode's i_mtime is used by nfsd
> >> to check if nfs client cache is stale, so updating the overlay vfs inode
> >> i_mtime on write is required for overlayfs NFS export support.
> >>
> >> The non uptodate mtime issue was found and verified with the
> >> nfstest_posix test when run over NFS exported overlayfs:
> >>
> >>  $ nfstest_posix --runtest=write
> >>  ...
> >>  FAIL: write - file st_mtime should be updated
> >>
> >> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> >> ---
> >>  fs/inode.c | 9 ++++++---
> >>  1 file changed, 6 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/fs/inode.c b/fs/inode.c
> >> index 03102d6ef044..a252256f4e51 100644
> >> --- a/fs/inode.c
> >> +++ b/fs/inode.c
> >> @@ -1567,8 +1567,7 @@ EXPORT_SYMBOL(bmap);
> >>  /*
> >>   * Update times in overlayed inode from underlying real inode
> >>   */
> >> -static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
> >> -                            bool rcu)
> >> +static void update_ovl_d_inode_times(struct dentry *dentry, bool rcu)
> >>  {
> >>       struct dentry *upperdentry;
> >>
> >> @@ -1585,6 +1584,7 @@ static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
> >>        * stale mtime/ctime.
> >>        */
> >>       if (upperdentry) {
> >> +             struct inode *inode = d_inode(dentry);
> >>               struct inode *realinode = d_inode(upperdentry);
> >>
> >>               if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
> >> @@ -1607,7 +1607,8 @@ static int relatime_need_update(const struct path *path, struct inode *inode,
> >>       if (!(path->mnt->mnt_flags & MNT_RELATIME))
> >>               return 1;
> >>
> >> -     update_ovl_inode_times(path->dentry, inode, rcu);
> >> +     update_ovl_d_inode_times(path->dentry, rcu);
> >> +
> >
> > Hi Amir,
> >
> > If we update overlay inode mtime, ctime in write path, then do we still
> > need this call in relatime_need_update() path? I mean what other path
> > is there where real inode mtime/ctime will be out of sync with overlay
> > inode mtime/ctime.
> >
> 
> Specifically, in the mentioned nfstest_posix test, i_mtime was not
> updated on write of regular file and i_mtime of parent was not updated on link.
> I guess there are more cases where i_mtime is updated not in the
> write() path. I can think of punch hole for regular files and even file system
> specific ioctls, which overlayfs is not aware of.
> To handle those cases, I added the 2nd patch, which is "mtime reader
> oriented", just like the relatime_need_update() case.
> 
> The problem is that AFAIK there is no simple place where all
> i_mtime updates can be intercepted. Perhaps the recent work by Jeff
> to sort out i_version readers/writers will provide a better place
> to intercept all mtime updates?? At least for underlying filesystems that
> support i_version, we could update overlay inode times only if i_version
> was updated, but I don't even know what would be the best implementation
> choice for relatime_need_update() in that case.

You might be able to intercept m/ctime changes at notify_change(),
but you are not going to be able to catch all timestamp changes
because many of them are deep inside filesystem specific
implementations of various functions. e.g. see all the internal XFS
calls to xfs_trans_ichgtime() to change inode timestamps during
various modifications because they are conditional on the
modifications those operations make to the filesystem and hence they
can't be specified/directed by the VFS....

Cheers,

Dave.
diff mbox

Patch

diff --git a/fs/inode.c b/fs/inode.c
index 03102d6ef044..a252256f4e51 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1567,8 +1567,7 @@  EXPORT_SYMBOL(bmap);
 /*
  * Update times in overlayed inode from underlying real inode
  */
-static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
-			       bool rcu)
+static void update_ovl_d_inode_times(struct dentry *dentry, bool rcu)
 {
 	struct dentry *upperdentry;
 
@@ -1585,6 +1584,7 @@  static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
 	 * stale mtime/ctime.
 	 */
 	if (upperdentry) {
+		struct inode *inode = d_inode(dentry);
 		struct inode *realinode = d_inode(upperdentry);
 
 		if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
@@ -1607,7 +1607,8 @@  static int relatime_need_update(const struct path *path, struct inode *inode,
 	if (!(path->mnt->mnt_flags & MNT_RELATIME))
 		return 1;
 
-	update_ovl_inode_times(path->dentry, inode, rcu);
+	update_ovl_d_inode_times(path->dentry, rcu);
+
 	/*
 	 * Is mtime younger than atime? If yes, update atime:
 	 */
@@ -1876,6 +1877,8 @@  int file_update_time(struct file *file)
 	ret = update_time(inode, &now, sync_it);
 	__mnt_drop_write_file(file);
 
+	update_ovl_d_inode_times(file->f_path.dentry, false);
+
 	return ret;
 }
 EXPORT_SYMBOL(file_update_time);