diff mbox series

[RESEND,V12,8/8] fuse: Introduce passthrough for mmap

Message ID 20210125153057.3623715-9-balsini@android.com (mailing list archive)
State New, archived
Headers show
Series fuse: Add support for passthrough read/write | expand

Commit Message

Alessio Balsini Jan. 25, 2021, 3:30 p.m. UTC
Enabling FUSE passthrough for mmap-ed operations not only affects
performance, but has also been shown as mandatory for the correct
functioning of FUSE passthrough.
yanwu noticed [1] that a FUSE file with passthrough enabled may suffer
data inconsistencies if the same file is also accessed with mmap. What
happens is that read/write operations are directly applied to the lower
file system (and its cache), while mmap-ed operations are affecting the
FUSE cache.

Extend the FUSE passthrough implementation to also handle memory-mapped
FUSE file, to both fix the cache inconsistencies and extend the
passthrough performance benefits to mmap-ed operations.

[1] https://lore.kernel.org/lkml/20210119110654.11817-1-wu-yan@tcl.com/

Signed-off-by: Alessio Balsini <balsini@android.com>
---
 fs/fuse/file.c        |  3 +++
 fs/fuse/fuse_i.h      |  1 +
 fs/fuse/passthrough.c | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)

Comments

Miklos Szeredi Feb. 17, 2021, 2:05 p.m. UTC | #1
On Mon, Jan 25, 2021 at 4:31 PM Alessio Balsini <balsini@android.com> wrote:
>
> Enabling FUSE passthrough for mmap-ed operations not only affects
> performance, but has also been shown as mandatory for the correct
> functioning of FUSE passthrough.
> yanwu noticed [1] that a FUSE file with passthrough enabled may suffer
> data inconsistencies if the same file is also accessed with mmap. What
> happens is that read/write operations are directly applied to the lower
> file system (and its cache), while mmap-ed operations are affecting the
> FUSE cache.
>
> Extend the FUSE passthrough implementation to also handle memory-mapped
> FUSE file, to both fix the cache inconsistencies and extend the
> passthrough performance benefits to mmap-ed operations.
>
> [1] https://lore.kernel.org/lkml/20210119110654.11817-1-wu-yan@tcl.com/
>
> Signed-off-by: Alessio Balsini <balsini@android.com>
> ---
>  fs/fuse/file.c        |  3 +++
>  fs/fuse/fuse_i.h      |  1 +
>  fs/fuse/passthrough.c | 41 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 45 insertions(+)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index cddada1e8bd9..e3741a94c1f9 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -2370,6 +2370,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
>         if (FUSE_IS_DAX(file_inode(file)))
>                 return fuse_dax_mmap(file, vma);
>
> +       if (ff->passthrough.filp)
> +               return fuse_passthrough_mmap(file, vma);
> +
>         if (ff->open_flags & FOPEN_DIRECT_IO) {
>                 /* Can't provide the coherency needed for MAP_SHARED */
>                 if (vma->vm_flags & VM_MAYSHARE)
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 815af1845b16..7b0d65984608 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -1244,5 +1244,6 @@ int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
>  void fuse_passthrough_release(struct fuse_passthrough *passthrough);
>  ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
>  ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
> +ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
>
>  #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> index 24866c5fe7e2..284979f87747 100644
> --- a/fs/fuse/passthrough.c
> +++ b/fs/fuse/passthrough.c
> @@ -135,6 +135,47 @@ ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
>         return ret;
>  }
>
> +ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +       int ret;
> +       const struct cred *old_cred;
> +       struct fuse_file *ff = file->private_data;
> +       struct inode *fuse_inode = file_inode(file);
> +       struct file *passthrough_filp = ff->passthrough.filp;
> +       struct inode *passthrough_inode = file_inode(passthrough_filp);
> +
> +       if (!passthrough_filp->f_op->mmap)
> +               return -ENODEV;
> +
> +       if (WARN_ON(file != vma->vm_file))
> +               return -EIO;
> +
> +       vma->vm_file = get_file(passthrough_filp);
> +
> +       old_cred = override_creds(ff->passthrough.cred);
> +       ret = call_mmap(vma->vm_file, vma);
> +       revert_creds(old_cred);
> +
> +       if (ret)
> +               fput(passthrough_filp);
> +       else
> +               fput(file);
> +
> +       if (file->f_flags & O_NOATIME)
> +               return ret;
> +
> +       if ((!timespec64_equal(&fuse_inode->i_mtime,
> +                              &passthrough_inode->i_mtime) ||
> +            !timespec64_equal(&fuse_inode->i_ctime,
> +                              &passthrough_inode->i_ctime))) {
> +               fuse_inode->i_mtime = passthrough_inode->i_mtime;
> +               fuse_inode->i_ctime = passthrough_inode->i_ctime;

Again, violation of rules.   Not sure why this is needed, mmap(2)
isn't supposed to change mtime or ctime, AFAIK.

Thanks,
Miklos
Alessio Balsini April 1, 2021, 11:24 a.m. UTC | #2
On Wed, Feb 17, 2021 at 03:05:07PM +0100, Miklos Szeredi wrote:
> On Mon, Jan 25, 2021 at 4:31 PM Alessio Balsini <balsini@android.com> wrote:
> >
> > Enabling FUSE passthrough for mmap-ed operations not only affects
> > performance, but has also been shown as mandatory for the correct
> > functioning of FUSE passthrough.
> > yanwu noticed [1] that a FUSE file with passthrough enabled may suffer
> > data inconsistencies if the same file is also accessed with mmap. What
> > happens is that read/write operations are directly applied to the lower
> > file system (and its cache), while mmap-ed operations are affecting the
> > FUSE cache.
> >
> > Extend the FUSE passthrough implementation to also handle memory-mapped
> > FUSE file, to both fix the cache inconsistencies and extend the
> > passthrough performance benefits to mmap-ed operations.
> >
> > [1] https://lore.kernel.org/lkml/20210119110654.11817-1-wu-yan@tcl.com/
> >
> > Signed-off-by: Alessio Balsini <balsini@android.com>
> > ---
> >  fs/fuse/file.c        |  3 +++
> >  fs/fuse/fuse_i.h      |  1 +
> >  fs/fuse/passthrough.c | 41 +++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 45 insertions(+)
> >
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index cddada1e8bd9..e3741a94c1f9 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -2370,6 +2370,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
> >         if (FUSE_IS_DAX(file_inode(file)))
> >                 return fuse_dax_mmap(file, vma);
> >
> > +       if (ff->passthrough.filp)
> > +               return fuse_passthrough_mmap(file, vma);
> > +
> >         if (ff->open_flags & FOPEN_DIRECT_IO) {
> >                 /* Can't provide the coherency needed for MAP_SHARED */
> >                 if (vma->vm_flags & VM_MAYSHARE)
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index 815af1845b16..7b0d65984608 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -1244,5 +1244,6 @@ int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
> >  void fuse_passthrough_release(struct fuse_passthrough *passthrough);
> >  ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
> >  ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
> > +ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
> >
> >  #endif /* _FS_FUSE_I_H */
> > diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> > index 24866c5fe7e2..284979f87747 100644
> > --- a/fs/fuse/passthrough.c
> > +++ b/fs/fuse/passthrough.c
> > @@ -135,6 +135,47 @@ ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
> >         return ret;
> >  }
> >
> > +ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +       int ret;
> > +       const struct cred *old_cred;
> > +       struct fuse_file *ff = file->private_data;
> > +       struct inode *fuse_inode = file_inode(file);
> > +       struct file *passthrough_filp = ff->passthrough.filp;
> > +       struct inode *passthrough_inode = file_inode(passthrough_filp);
> > +
> > +       if (!passthrough_filp->f_op->mmap)
> > +               return -ENODEV;
> > +
> > +       if (WARN_ON(file != vma->vm_file))
> > +               return -EIO;
> > +
> > +       vma->vm_file = get_file(passthrough_filp);
> > +
> > +       old_cred = override_creds(ff->passthrough.cred);
> > +       ret = call_mmap(vma->vm_file, vma);
> > +       revert_creds(old_cred);
> > +
> > +       if (ret)
> > +               fput(passthrough_filp);
> > +       else
> > +               fput(file);
> > +
> > +       if (file->f_flags & O_NOATIME)
> > +               return ret;
> > +
> > +       if ((!timespec64_equal(&fuse_inode->i_mtime,
> > +                              &passthrough_inode->i_mtime) ||
> > +            !timespec64_equal(&fuse_inode->i_ctime,
> > +                              &passthrough_inode->i_ctime))) {
> > +               fuse_inode->i_mtime = passthrough_inode->i_mtime;
> > +               fuse_inode->i_ctime = passthrough_inode->i_ctime;
> 
> Again, violation of rules.   Not sure why this is needed, mmap(2)
> isn't supposed to change mtime or ctime, AFAIK.
> 
> Thanks,
> Miklos

Hi Miklos,

I don't have a strong preference for this and will drop the ctime/atime
updates in v13.


For the records, here follows my reasoning for which I decided to update
atime/ctime here.

From the stats(2) man it just says that it's not guaranteed that atime
would be updated, as `Other routines, like mmap(2), may or may not
update st_atime.`

Something similar according to the inotify(7) man that warns not to trigger events
after mmap(2), msync(2), and munmap(2) operations.

The mmap(2) man mentions that st_ctime and st_mtime would be updated for
file mappings with PROT_WRITE and MAP_SHARED, before a msync(2) with
MS_SYNC or MS_ASYNC.
This passthrough scenario is slightly different from the standard mmap,
but it seems to me that we are kind of falling into a similar use case
for the atime/ctime update.
I would imagine this is why OverlayFS updates atime/ctime too in
ovl_mmap(), through ovl_copyattr().

Thanks,
Alessio
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cddada1e8bd9..e3741a94c1f9 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2370,6 +2370,9 @@  static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (FUSE_IS_DAX(file_inode(file)))
 		return fuse_dax_mmap(file, vma);
 
+	if (ff->passthrough.filp)
+		return fuse_passthrough_mmap(file, vma);
+
 	if (ff->open_flags & FOPEN_DIRECT_IO) {
 		/* Can't provide the coherency needed for MAP_SHARED */
 		if (vma->vm_flags & VM_MAYSHARE)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 815af1845b16..7b0d65984608 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1244,5 +1244,6 @@  int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
 void fuse_passthrough_release(struct fuse_passthrough *passthrough);
 ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
 ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 24866c5fe7e2..284979f87747 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -135,6 +135,47 @@  ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
 	return ret;
 }
 
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int ret;
+	const struct cred *old_cred;
+	struct fuse_file *ff = file->private_data;
+	struct inode *fuse_inode = file_inode(file);
+	struct file *passthrough_filp = ff->passthrough.filp;
+	struct inode *passthrough_inode = file_inode(passthrough_filp);
+
+	if (!passthrough_filp->f_op->mmap)
+		return -ENODEV;
+
+	if (WARN_ON(file != vma->vm_file))
+		return -EIO;
+
+	vma->vm_file = get_file(passthrough_filp);
+
+	old_cred = override_creds(ff->passthrough.cred);
+	ret = call_mmap(vma->vm_file, vma);
+	revert_creds(old_cred);
+
+	if (ret)
+		fput(passthrough_filp);
+	else
+		fput(file);
+
+	if (file->f_flags & O_NOATIME)
+		return ret;
+
+	if ((!timespec64_equal(&fuse_inode->i_mtime,
+			       &passthrough_inode->i_mtime) ||
+	     !timespec64_equal(&fuse_inode->i_ctime,
+			       &passthrough_inode->i_ctime))) {
+		fuse_inode->i_mtime = passthrough_inode->i_mtime;
+		fuse_inode->i_ctime = passthrough_inode->i_ctime;
+	}
+	touch_atime(&file->f_path);
+
+	return ret;
+}
+
 int fuse_passthrough_open(struct fuse_dev *fud,
 			  struct fuse_passthrough_out *pto)
 {