diff mbox series

[V10,2/5] fuse: Passthrough initialization and release

Message ID 20201026125016.1905945-3-balsini@android.com (mailing list archive)
State New, archived
Headers show
Series fuse: Add support for passthrough read/write | expand

Commit Message

Alessio Balsini Oct. 26, 2020, 12:50 p.m. UTC
Implement the FUSE passthrough ioctl() that associates the lower
(passthrough) file system file with the fuse_file.

The file descriptor passed to the ioctl() by the FUSE daemon is used to
access the relative file pointer, that will be copied to the fuse_file data
structure to consolidate the link between the FUSE and lower file system.

To enable the passthrough mode, userspace triggers the
FUSE_DEV_IOC_PASSTHROUGH_OPEN ioctl() and, if the call succeeds,
receives back an identifier that will be used at open/create response
time in the fuse_open_out field to associate the FUSE file to the lower
file system file.
The value returned by the ioctl() to userspace can be:
- > 0: success, the identifier can be used as part of an open/create
  reply.
- < 0: an error occurred.
The value 0 has been left unused for backward compatibility: the
fuse_open_out field that is used to pass the passthrough_fh back to the
kernel uses the same bits that were previously as struct padding,
zero-initialized in the common libfuse implementation. Removing the 0
value fixes the ambiguity between the case in which 0 corresponds to a
real passthrough_fh or a missing implementation, simplifying the
userspace implementation.

For the passthrough mode to be successfully activated, the lower file
system file must implement both read_ and write_iter file operations.
This extra check avoids special pseudo files to be targeted for this
feature.
Passthrough comes with another limitation: no further file system stacking
is allowed for those FUSE file systems using passthrough.

Signed-off-by: Alessio Balsini <balsini@android.com>
---
 fs/fuse/inode.c       |  5 +++
 fs/fuse/passthrough.c | 80 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 2 deletions(-)

Comments

Peng Tao Nov. 26, 2020, 1:33 p.m. UTC | #1
On Tue, Oct 27, 2020 at 12:19 AM Alessio Balsini <balsini@android.com> wrote:
>
> Implement the FUSE passthrough ioctl() that associates the lower
> (passthrough) file system file with the fuse_file.
>
> The file descriptor passed to the ioctl() by the FUSE daemon is used to
> access the relative file pointer, that will be copied to the fuse_file data
> structure to consolidate the link between the FUSE and lower file system.
>
> To enable the passthrough mode, userspace triggers the
> FUSE_DEV_IOC_PASSTHROUGH_OPEN ioctl() and, if the call succeeds,
> receives back an identifier that will be used at open/create response
> time in the fuse_open_out field to associate the FUSE file to the lower
> file system file.
> The value returned by the ioctl() to userspace can be:
> - > 0: success, the identifier can be used as part of an open/create
>   reply.
> - < 0: an error occurred.
> The value 0 has been left unused for backward compatibility: the
> fuse_open_out field that is used to pass the passthrough_fh back to the
> kernel uses the same bits that were previously as struct padding,
> zero-initialized in the common libfuse implementation. Removing the 0
> value fixes the ambiguity between the case in which 0 corresponds to a
> real passthrough_fh or a missing implementation, simplifying the
> userspace implementation.
>
> For the passthrough mode to be successfully activated, the lower file
> system file must implement both read_ and write_iter file operations.
> This extra check avoids special pseudo files to be targeted for this
> feature.
> Passthrough comes with another limitation: no further file system stacking
> is allowed for those FUSE file systems using passthrough.
>
> Signed-off-by: Alessio Balsini <balsini@android.com>
> ---
>  fs/fuse/inode.c       |  5 +++
>  fs/fuse/passthrough.c | 80 +++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 83 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 6738dd5ff5d2..1e94c54d1455 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -1034,6 +1034,11 @@ EXPORT_SYMBOL_GPL(fuse_send_init);
>
>  static int free_fuse_passthrough(int id, void *p, void *data)
>  {
> +       struct fuse_passthrough *passthrough = (struct fuse_passthrough *)p;
> +
> +       fuse_passthrough_release(passthrough);
> +       kfree(p);
> +
>         return 0;
>  }
>
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> index 594060c654f8..a135c955cc33 100644
> --- a/fs/fuse/passthrough.c
> +++ b/fs/fuse/passthrough.c
> @@ -3,19 +3,95 @@
>  #include "fuse_i.h"
>
>  #include <linux/fuse.h>
> +#include <linux/idr.h>
>
>  int fuse_passthrough_open(struct fuse_dev *fud,
>                           struct fuse_passthrough_out *pto)
>  {
> -       return -EINVAL;
> +       int res;
> +       struct file *passthrough_filp;
> +       struct fuse_conn *fc = fud->fc;
> +       struct fuse_passthrough *passthrough;
> +
> +       if (!fc->passthrough)
> +               return -EPERM;
> +
> +       /* This field is reserved for future implementation */
> +       if (pto->len != 0)
> +               return -EINVAL;
> +
> +       passthrough_filp = fget(pto->fd);
> +       if (!passthrough_filp) {
> +               pr_err("FUSE: invalid file descriptor for passthrough.\n");
> +               return -EBADF;
> +       }
> +
> +       if (!passthrough_filp->f_op->read_iter ||
> +           !passthrough_filp->f_op->write_iter) {
> +               pr_err("FUSE: passthrough file misses file operations.\n");
> +               return -EBADF;
> +       }
> +
> +       passthrough = kmalloc(sizeof(struct fuse_passthrough), GFP_KERNEL);
> +       if (!passthrough)
> +               return -ENOMEM;
> +
> +       passthrough->filp = passthrough_filp;
> +
> +       idr_preload(GFP_KERNEL);
> +       spin_lock(&fc->passthrough_req_lock);
> +       res = idr_alloc(&fc->passthrough_req, passthrough, 1, 0, GFP_ATOMIC);
> +       spin_unlock(&fc->passthrough_req_lock);
> +       idr_preload_end();
> +       if (res <= 0) {
> +               fuse_passthrough_release(passthrough);
> +               kfree(passthrough);
> +       }
> +
> +       return res;
>  }
>
>  int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
>                            struct fuse_open_out *openarg)
>  {
> -       return -EINVAL;
> +       struct inode *passthrough_inode;
> +       struct super_block *passthrough_sb;
> +       struct fuse_passthrough *passthrough;
> +       int passthrough_fh = openarg->passthrough_fh;
> +
> +       if (!fc->passthrough)
> +               return -EPERM;
> +
> +       /* Default case, passthrough is not requested */
> +       if (passthrough_fh <= 0)
> +               return -EINVAL;
> +
> +       spin_lock(&fc->passthrough_req_lock);
> +       passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
> +       spin_unlock(&fc->passthrough_req_lock);
> +
> +       if (!passthrough)
> +               return -EINVAL;
> +
> +       passthrough_inode = file_inode(passthrough->filp);
> +       passthrough_sb = passthrough_inode->i_sb;
> +       if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
Hi Alessio,

passthrough_sb is the underlying filesystem superblock, right? It
seems to prevent fuse passthrough fs from stacking on another fully
stacked file system, instead of preventing other file systems from
stacking on this fuse passthrough file system. Am I misunderstanding
it?

Cheers,
Tao
--
Into Sth. Rich & Strange
Alessio Balsini Nov. 27, 2020, 1:41 p.m. UTC | #2
Hi Peng,

Thanks for the heads up!

On Thu, Nov 26, 2020 at 09:33:34PM +0800, Peng Tao wrote:
> On Tue, Oct 27, 2020 at 12:19 AM Alessio Balsini <balsini@android.com> wrote:
> > [...]
> >  int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
> >                            struct fuse_open_out *openarg)
> >  {
> > -       return -EINVAL;
> > +       struct inode *passthrough_inode;
> > +       struct super_block *passthrough_sb;
> > +       struct fuse_passthrough *passthrough;
> > +       int passthrough_fh = openarg->passthrough_fh;
> > +
> > +       if (!fc->passthrough)
> > +               return -EPERM;
> > +
> > +       /* Default case, passthrough is not requested */
> > +       if (passthrough_fh <= 0)
> > +               return -EINVAL;
> > +
> > +       spin_lock(&fc->passthrough_req_lock);
> > +       passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
> > +       spin_unlock(&fc->passthrough_req_lock);
> > +
> > +       if (!passthrough)
> > +               return -EINVAL;
> > +
> > +       passthrough_inode = file_inode(passthrough->filp);
> > +       passthrough_sb = passthrough_inode->i_sb;
> > +       if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
> Hi Alessio,
> 
> passthrough_sb is the underlying filesystem superblock, right? It
> seems to prevent fuse passthrough fs from stacking on another fully
> stacked file system, instead of preventing other file systems from
> stacking on this fuse passthrough file system. Am I misunderstanding
> it?

Correct, this checks the stacking depth on the lower filesystem.
This is an intended behavior to avoid the stacking of multiple FUSE
passthrough filesystems, and works because when a FUSE connection has
the passthrough feature activated, the file system updates its
s_stack_depth to FILESYSTEM_MAX_STACK_DEPTH in process_init_reply()
(PATCH 1/5), avoiding further stacking.

Do you see issues with that?

What I'm now thinking is that fuse_passthrough_open would probably be a
better place for this check, so that the ioctl() would fail and the user
space daemon may decide to skip passthrough and use legacy FUSE access
for that file (or, at least, be aware that something went wrong).

A more aggressive approach would be instead to move the stacking depth
check to fuse_fill_super_common, where we can update s_stack_depth to
lower-fs depth+1 and fail if passthrough is active and s_stack_depth is
greater than FILESYSTEM_MAX_STACK_DEPTH.

What do you think?

Thanks,
Alessio
Peng Tao Nov. 28, 2020, 1:57 a.m. UTC | #3
On Fri, Nov 27, 2020 at 9:41 PM Alessio Balsini <balsini@android.com> wrote:
>
> Hi Peng,
>
> Thanks for the heads up!
>
> On Thu, Nov 26, 2020 at 09:33:34PM +0800, Peng Tao wrote:
> > On Tue, Oct 27, 2020 at 12:19 AM Alessio Balsini <balsini@android.com> wrote:
> > > [...]
> > >  int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
> > >                            struct fuse_open_out *openarg)
> > >  {
> > > -       return -EINVAL;
> > > +       struct inode *passthrough_inode;
> > > +       struct super_block *passthrough_sb;
> > > +       struct fuse_passthrough *passthrough;
> > > +       int passthrough_fh = openarg->passthrough_fh;
> > > +
> > > +       if (!fc->passthrough)
> > > +               return -EPERM;
> > > +
> > > +       /* Default case, passthrough is not requested */
> > > +       if (passthrough_fh <= 0)
> > > +               return -EINVAL;
> > > +
> > > +       spin_lock(&fc->passthrough_req_lock);
> > > +       passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
> > > +       spin_unlock(&fc->passthrough_req_lock);
> > > +
> > > +       if (!passthrough)
> > > +               return -EINVAL;
> > > +
> > > +       passthrough_inode = file_inode(passthrough->filp);
> > > +       passthrough_sb = passthrough_inode->i_sb;
> > > +       if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
> > Hi Alessio,
> >
> > passthrough_sb is the underlying filesystem superblock, right? It
> > seems to prevent fuse passthrough fs from stacking on another fully
> > stacked file system, instead of preventing other file systems from
> > stacking on this fuse passthrough file system. Am I misunderstanding
> > it?
>
> Correct, this checks the stacking depth on the lower filesystem.
> This is an intended behavior to avoid the stacking of multiple FUSE
> passthrough filesystems, and works because when a FUSE connection has
> the passthrough feature activated, the file system updates its
> s_stack_depth to FILESYSTEM_MAX_STACK_DEPTH in process_init_reply()
> (PATCH 1/5), avoiding further stacking.
>
> Do you see issues with that?
I'm considering a use case where a fuse passthrough file system is
stacked on top of an overlayfs and/or an ecryptfs. The underlying
s_stack_depth FILESYSTEM_MAX_STACK_DEPTH is rejected here so it is
possible to have an overlayfs or an ecryptfs underneath but not both
(with current FILESYSTEM_MAX_STACK_DEPTH being 2). How about changing
passthrough fuse sb s_stack_depth to FILESYSTEM_MAX_STACK_DEPTH + 1 in
PATCH 1/5, and allow passthrough_sb->s_stack_depth to be
FILESYSTEM_MAX_STACK_DEPTH here? So that existing kernel stacking file
system setups can all work as the underlying file system, while the
stacking of multiple FUSE passthrough filesystems is still blocked.

>
> What I'm now thinking is that fuse_passthrough_open would probably be a
> better place for this check, so that the ioctl() would fail and the user
> space daemon may decide to skip passthrough and use legacy FUSE access
> for that file (or, at least, be aware that something went wrong).
>
+1, fuse_passthrough_open seems to be a better place for the check.

> A more aggressive approach would be instead to move the stacking depth
> check to fuse_fill_super_common, where we can update s_stack_depth to
> lower-fs depth+1 and fail if passthrough is active and s_stack_depth is
> greater than FILESYSTEM_MAX_STACK_DEPTH.
>
The lower layer files/directories might actually spread on different
file systems. I'm not sure if s_stack_depth check is still possible at
mount time. Even if we can calculate the substree s_stack_depth, it is
still more flexible to determine on a per inode basis, right?

Cheers,
Tao
--
Into Sth. Rich & Strange
Alessio Balsini Dec. 16, 2020, 4:46 p.m. UTC | #4
Hi Tao,

On Sat, Nov 28, 2020 at 09:57:31AM +0800, Peng Tao wrote:
> On Fri, Nov 27, 2020 at 9:41 PM Alessio Balsini <balsini@android.com> wrote:
> >
> > Hi Peng,
> >
> > Thanks for the heads up!
> >
> > On Thu, Nov 26, 2020 at 09:33:34PM +0800, Peng Tao wrote:
> > > On Tue, Oct 27, 2020 at 12:19 AM Alessio Balsini <balsini@android.com> wrote:
> > > > [...]
> > > >  int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
> > > >                            struct fuse_open_out *openarg)
> > > >  {
> > > > -       return -EINVAL;
> > > > +       struct inode *passthrough_inode;
> > > > +       struct super_block *passthrough_sb;
> > > > +       struct fuse_passthrough *passthrough;
> > > > +       int passthrough_fh = openarg->passthrough_fh;
> > > > +
> > > > +       if (!fc->passthrough)
> > > > +               return -EPERM;
> > > > +
> > > > +       /* Default case, passthrough is not requested */
> > > > +       if (passthrough_fh <= 0)
> > > > +               return -EINVAL;
> > > > +
> > > > +       spin_lock(&fc->passthrough_req_lock);
> > > > +       passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
> > > > +       spin_unlock(&fc->passthrough_req_lock);
> > > > +
> > > > +       if (!passthrough)
> > > > +               return -EINVAL;
> > > > +
> > > > +       passthrough_inode = file_inode(passthrough->filp);
> > > > +       passthrough_sb = passthrough_inode->i_sb;
> > > > +       if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
> > > Hi Alessio,
> > >
> > > passthrough_sb is the underlying filesystem superblock, right? It
> > > seems to prevent fuse passthrough fs from stacking on another fully
> > > stacked file system, instead of preventing other file systems from
> > > stacking on this fuse passthrough file system. Am I misunderstanding
> > > it?
> >
> > Correct, this checks the stacking depth on the lower filesystem.
> > This is an intended behavior to avoid the stacking of multiple FUSE
> > passthrough filesystems, and works because when a FUSE connection has
> > the passthrough feature activated, the file system updates its
> > s_stack_depth to FILESYSTEM_MAX_STACK_DEPTH in process_init_reply()
> > (PATCH 1/5), avoiding further stacking.
> >
> > Do you see issues with that?
> I'm considering a use case where a fuse passthrough file system is
> stacked on top of an overlayfs and/or an ecryptfs. The underlying
> s_stack_depth FILESYSTEM_MAX_STACK_DEPTH is rejected here so it is
> possible to have an overlayfs or an ecryptfs underneath but not both
> (with current FILESYSTEM_MAX_STACK_DEPTH being 2). How about changing
> passthrough fuse sb s_stack_depth to FILESYSTEM_MAX_STACK_DEPTH + 1 in
> PATCH 1/5, and allow passthrough_sb->s_stack_depth to be
> FILESYSTEM_MAX_STACK_DEPTH here? So that existing kernel stacking file
> system setups can all work as the underlying file system, while the
> stacking of multiple FUSE passthrough filesystems is still blocked.
> 


Sounds like a good idea, I'll think about it a bit more and if
everything's all right I'll post the new patchset.


> >
> > What I'm now thinking is that fuse_passthrough_open would probably be a
> > better place for this check, so that the ioctl() would fail and the user
> > space daemon may decide to skip passthrough and use legacy FUSE access
> > for that file (or, at least, be aware that something went wrong).
> >
> +1, fuse_passthrough_open seems to be a better place for the check.
> 
> > A more aggressive approach would be instead to move the stacking depth
> > check to fuse_fill_super_common, where we can update s_stack_depth to
> > lower-fs depth+1 and fail if passthrough is active and s_stack_depth is
> > greater than FILESYSTEM_MAX_STACK_DEPTH.
> >
> The lower layer files/directories might actually spread on different
> file systems. I'm not sure if s_stack_depth check is still possible at
> mount time. Even if we can calculate the substree s_stack_depth, it is
> still more flexible to determine on a per inode basis, right?
> 
> Cheers,
> Tao
> --
> Into Sth. Rich & Strange


Fair enough. The per-inode check is definitely the right way to proceed.

Thanks a lot for you feedback!
Alessio
Alessio Balsini Dec. 16, 2020, 4:55 p.m. UTC | #5
On Wed, Dec 16, 2020 at 09:32:51PM +0800, wu-yan@tcl.com wrote:
> Hi Alessio,
> 
> It may cause file reference counter leak in fuse_passthrough_open. If the
> passthrough_filp
> 
> not implement read_iter/write_iter or passthrough struct allocated failed,
> the reference counter get in fget(pro->fd) not released and cause leak.
> 
> Cheers,
> 
> yanwu
> 


Hi yanwu,

Nice catch, this bug was introduced in v10 and will be fixed in the next
patch set.

Cheers,
Alessio
diff mbox series

Patch

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6738dd5ff5d2..1e94c54d1455 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1034,6 +1034,11 @@  EXPORT_SYMBOL_GPL(fuse_send_init);
 
 static int free_fuse_passthrough(int id, void *p, void *data)
 {
+	struct fuse_passthrough *passthrough = (struct fuse_passthrough *)p;
+
+	fuse_passthrough_release(passthrough);
+	kfree(p);
+
 	return 0;
 }
 
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 594060c654f8..a135c955cc33 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -3,19 +3,95 @@ 
 #include "fuse_i.h"
 
 #include <linux/fuse.h>
+#include <linux/idr.h>
 
 int fuse_passthrough_open(struct fuse_dev *fud,
 			  struct fuse_passthrough_out *pto)
 {
-	return -EINVAL;
+	int res;
+	struct file *passthrough_filp;
+	struct fuse_conn *fc = fud->fc;
+	struct fuse_passthrough *passthrough;
+
+	if (!fc->passthrough)
+		return -EPERM;
+
+	/* This field is reserved for future implementation */
+	if (pto->len != 0)
+		return -EINVAL;
+
+	passthrough_filp = fget(pto->fd);
+	if (!passthrough_filp) {
+		pr_err("FUSE: invalid file descriptor for passthrough.\n");
+		return -EBADF;
+	}
+
+	if (!passthrough_filp->f_op->read_iter ||
+	    !passthrough_filp->f_op->write_iter) {
+		pr_err("FUSE: passthrough file misses file operations.\n");
+		return -EBADF;
+	}
+
+	passthrough = kmalloc(sizeof(struct fuse_passthrough), GFP_KERNEL);
+	if (!passthrough)
+		return -ENOMEM;
+
+	passthrough->filp = passthrough_filp;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fc->passthrough_req_lock);
+	res = idr_alloc(&fc->passthrough_req, passthrough, 1, 0, GFP_ATOMIC);
+	spin_unlock(&fc->passthrough_req_lock);
+	idr_preload_end();
+	if (res <= 0) {
+		fuse_passthrough_release(passthrough);
+		kfree(passthrough);
+	}
+
+	return res;
 }
 
 int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
 			   struct fuse_open_out *openarg)
 {
-	return -EINVAL;
+	struct inode *passthrough_inode;
+	struct super_block *passthrough_sb;
+	struct fuse_passthrough *passthrough;
+	int passthrough_fh = openarg->passthrough_fh;
+
+	if (!fc->passthrough)
+		return -EPERM;
+
+	/* Default case, passthrough is not requested */
+	if (passthrough_fh <= 0)
+		return -EINVAL;
+
+	spin_lock(&fc->passthrough_req_lock);
+	passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
+	spin_unlock(&fc->passthrough_req_lock);
+
+	if (!passthrough)
+		return -EINVAL;
+
+	passthrough_inode = file_inode(passthrough->filp);
+	passthrough_sb = passthrough_inode->i_sb;
+	if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("FUSE: fs stacking depth exceeded for passthrough\n");
+		fuse_passthrough_release(passthrough);
+		kfree(passthrough);
+		return -EINVAL;
+	}
+
+	ff->passthrough = *passthrough;
+	kfree(passthrough);
+
+	return 0;
 }
 
 void fuse_passthrough_release(struct fuse_passthrough *passthrough)
 {
+	if (passthrough->filp) {
+		fput(passthrough->filp);
+		passthrough->filp = NULL;
+	}
 }