diff mbox series

[v2,9/9] fs/fuse: allow idmapped mounts

Message ID 20240814114034.113953-10-aleksandr.mikhalitsyn@canonical.com (mailing list archive)
State New
Headers show
Series fuse: basic support for idmapped mounts | expand

Commit Message

Aleksandr Mikhalitsyn Aug. 14, 2024, 11:40 a.m. UTC
Now we have everything in place and we can allow idmapped mounts
by setting the FS_ALLOW_IDMAP flag. Notice that real availability
of idmapped mounts will depend on the fuse daemon. Fuse daemon
have to set FUSE_ALLOW_IDMAP flag in the FUSE_INIT reply.

To discuss:
- we enable idmapped mounts support only if "default_permissions" mode is enabled,
because otherwise we would need to deal with UID/GID mappings in the userspace side OR
provide the userspace with idmapped req->in.h.uid/req->in.h.gid values which is not
something that we probably want to. Idmapped mounts phylosophy is not about faking
caller uid/gid.

- We have a small offlist discussion with Christian around adding fs_type->allow_idmap
hook. Christian pointed that it would be nice to have a superblock flag instead like
SB_I_NOIDMAP and we can set this flag during mount time if we see that filesystem does not
support idmappings. But, unfortunately I didn't succeed here because the kernel will
know if the filesystem supports idmapping or not after FUSE_INIT request, but FUSE_INIT request
is being sent at the end of mounting process, so mount and superblock will exist and
visible by the userspace in that time. It seems like setting SB_I_NOIDMAP flag in this
case is too late as user may do the trick with creating a idmapped mount while it wasn't
restricted by SB_I_NOIDMAP. Alternatively, we can introduce a "positive" version SB_I_ALLOWIDMAP
and "weak" version of FS_ALLOW_IDMAP like FS_MAY_ALLOW_IDMAP. So if FS_MAY_ALLOW_IDMAP is set,
then SB_I_ALLOWIDMAP has to be set on the superblock to allow creation of an idmapped mount.
But that's a matter of our discussion.

Some extra links and examples:

- libfuse support
https://github.com/mihalicyn/libfuse/commits/idmap_support

- fuse-overlayfs support:
https://github.com/mihalicyn/fuse-overlayfs/commits/idmap_support

- cephfs-fuse conversion example
https://github.com/mihalicyn/ceph/commits/fuse_idmap

- glusterfs conversion example
https://github.com/mihalicyn/glusterfs/commits/fuse_idmap

Cc: Christian Brauner <brauner@kernel.org>
Cc: Seth Forshee <sforshee@kernel.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Bernd Schubert <bschubert@ddn.com>
Cc: <linux-fsdevel@vger.kernel.org>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
---
 fs/fuse/fuse_i.h          |  3 +++
 fs/fuse/inode.c           | 13 ++++++++++---
 include/uapi/linux/fuse.h |  5 ++++-
 3 files changed, 17 insertions(+), 4 deletions(-)

Comments

Christian Brauner Aug. 14, 2024, 2:19 p.m. UTC | #1
On Wed, Aug 14, 2024 at 01:40:34PM GMT, Alexander Mikhalitsyn wrote:
> Now we have everything in place and we can allow idmapped mounts
> by setting the FS_ALLOW_IDMAP flag. Notice that real availability
> of idmapped mounts will depend on the fuse daemon. Fuse daemon
> have to set FUSE_ALLOW_IDMAP flag in the FUSE_INIT reply.
> 
> To discuss:
> - we enable idmapped mounts support only if "default_permissions" mode is enabled,
> because otherwise we would need to deal with UID/GID mappings in the userspace side OR
> provide the userspace with idmapped req->in.h.uid/req->in.h.gid values which is not
> something that we probably want to. Idmapped mounts phylosophy is not about faking
> caller uid/gid.
> 
> - We have a small offlist discussion with Christian around adding fs_type->allow_idmap
> hook. Christian pointed that it would be nice to have a superblock flag instead like
> SB_I_NOIDMAP and we can set this flag during mount time if we see that filesystem does not
> support idmappings. But, unfortunately I didn't succeed here because the kernel will
> know if the filesystem supports idmapping or not after FUSE_INIT request, but FUSE_INIT request
> is being sent at the end of mounting process, so mount and superblock will exist and
> visible by the userspace in that time. It seems like setting SB_I_NOIDMAP flag in this
> case is too late as user may do the trick with creating a idmapped mount while it wasn't
> restricted by SB_I_NOIDMAP. Alternatively, we can introduce a "positive" version SB_I_ALLOWIDMAP

Hm, I'm confused why won't the following (uncompiled) work?

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ed4c2688047f..8ead1cacdd2f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1346,10 +1346,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
                        if (flags & FUSE_OWNER_UID_GID_EXT)
                                fc->owner_uid_gid_ext = 1;
                        if (flags & FUSE_ALLOW_IDMAP) {
-                               if (fc->owner_uid_gid_ext && fc->default_permissions)
+                               if (fc->owner_uid_gid_ext && fc->default_permissions) {
                                        fc->allow_idmap = 1;
-                               else
+                                       fm->sb->s_iflags &= ~SB_I_NOIDMAP;
+                               } else {
                                        ok = false;
+                               }
                        }
                } else {
                        ra_pages = fc->max_read / PAGE_SIZE;
@@ -1576,6 +1578,7 @@ static void fuse_sb_defaults(struct super_block *sb)
        sb->s_time_gran = 1;
        sb->s_export_op = &fuse_export_operations;
        sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+       sb->s_iflags |= SB_I_NOIDMAP;
        if (sb->s_user_ns != &init_user_ns)
                sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
        sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
diff --git a/fs/namespace.c b/fs/namespace.c
index 328087a4df8a..d1702285c915 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4436,6 +4436,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
        if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
                return -EINVAL;

+       /* The filesystem has turned off idmapped mounts. */
+       if (m->mnt_sb->s_iflags & SB_I_NOIDMAP)
+               return -EINVAL;
+
        /* We're not controlling the superblock. */
        if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
                return -EPERM;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd34b5755c0b..185004c41a5e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1189,6 +1189,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
 #define SB_I_RETIRED   0x00000800      /* superblock shouldn't be reused */
 #define SB_I_NOUMASK   0x00001000      /* VFS does not apply umask */
+#define SB_I_NOIDMAP   0x00002000      /* No idmapped mounts on this superblock */

 /* Possible states of 'frozen' field */
 enum {
Aleksandr Mikhalitsyn Aug. 15, 2024, 8:08 a.m. UTC | #2
On Wed, Aug 14, 2024 at 4:19 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Aug 14, 2024 at 01:40:34PM GMT, Alexander Mikhalitsyn wrote:
> > Now we have everything in place and we can allow idmapped mounts
> > by setting the FS_ALLOW_IDMAP flag. Notice that real availability
> > of idmapped mounts will depend on the fuse daemon. Fuse daemon
> > have to set FUSE_ALLOW_IDMAP flag in the FUSE_INIT reply.
> >
> > To discuss:
> > - we enable idmapped mounts support only if "default_permissions" mode is enabled,
> > because otherwise we would need to deal with UID/GID mappings in the userspace side OR
> > provide the userspace with idmapped req->in.h.uid/req->in.h.gid values which is not
> > something that we probably want to. Idmapped mounts phylosophy is not about faking
> > caller uid/gid.
> >
> > - We have a small offlist discussion with Christian around adding fs_type->allow_idmap
> > hook. Christian pointed that it would be nice to have a superblock flag instead like
> > SB_I_NOIDMAP and we can set this flag during mount time if we see that filesystem does not
> > support idmappings. But, unfortunately I didn't succeed here because the kernel will
> > know if the filesystem supports idmapping or not after FUSE_INIT request, but FUSE_INIT request
> > is being sent at the end of mounting process, so mount and superblock will exist and
> > visible by the userspace in that time. It seems like setting SB_I_NOIDMAP flag in this
> > case is too late as user may do the trick with creating a idmapped mount while it wasn't
> > restricted by SB_I_NOIDMAP. Alternatively, we can introduce a "positive" version SB_I_ALLOWIDMAP

Hi Christian,

>
> Hm, I'm confused why won't the following (uncompiled) work?

I believe that your way should work. Sorry about that. It's my bad that I
didn't consider setting SB_I_NOIDMAP in fill_super and unsetting it
later on once
we had enough information.

Huge thanks for pointing this out!

I'll drop -v3 soon and also add support for virtiofs in the same series.

Kind regards,
Alex

>
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ed4c2688047f..8ead1cacdd2f 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -1346,10 +1346,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
>                         if (flags & FUSE_OWNER_UID_GID_EXT)
>                                 fc->owner_uid_gid_ext = 1;
>                         if (flags & FUSE_ALLOW_IDMAP) {
> -                               if (fc->owner_uid_gid_ext && fc->default_permissions)
> +                               if (fc->owner_uid_gid_ext && fc->default_permissions) {
>                                         fc->allow_idmap = 1;
> -                               else
> +                                       fm->sb->s_iflags &= ~SB_I_NOIDMAP;
> +                               } else {
>                                         ok = false;
> +                               }
>                         }
>                 } else {
>                         ra_pages = fc->max_read / PAGE_SIZE;
> @@ -1576,6 +1578,7 @@ static void fuse_sb_defaults(struct super_block *sb)
>         sb->s_time_gran = 1;
>         sb->s_export_op = &fuse_export_operations;
>         sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
> +       sb->s_iflags |= SB_I_NOIDMAP;
>         if (sb->s_user_ns != &init_user_ns)
>                 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
>         sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 328087a4df8a..d1702285c915 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -4436,6 +4436,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
>         if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
>                 return -EINVAL;
>
> +       /* The filesystem has turned off idmapped mounts. */
> +       if (m->mnt_sb->s_iflags & SB_I_NOIDMAP)
> +               return -EINVAL;
> +
>         /* We're not controlling the superblock. */
>         if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
>                 return -EPERM;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index fd34b5755c0b..185004c41a5e 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1189,6 +1189,7 @@ extern int send_sigurg(struct fown_struct *fown);
>  #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
>  #define SB_I_RETIRED   0x00000800      /* superblock shouldn't be reused */
>  #define SB_I_NOUMASK   0x00001000      /* VFS does not apply umask */
> +#define SB_I_NOIDMAP   0x00002000      /* No idmapped mounts on this superblock */
>
>  /* Possible states of 'frozen' field */
>  enum {
Christian Brauner Aug. 15, 2024, 1:20 p.m. UTC | #3
> didn't consider setting SB_I_NOIDMAP in fill_super and unsetting it
> later on once
> we had enough information.

No worries, I probably just didn't clarify this.
diff mbox series

Patch

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 883151a44d72..b2780ab59069 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -848,6 +848,9 @@  struct fuse_conn {
 	/* Add owner_{u,g}id info when creating a new inode */
 	unsigned int owner_uid_gid_ext:1;
 
+	/* Allow creation of idmapped mounts */
+	unsigned int allow_idmap:1;
+
 	/* Does the filesystem support per inode DAX? */
 	unsigned int inode_dax:1;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6c205731c844..ed4c2688047f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1345,6 +1345,12 @@  static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
 				fm->sb->s_export_op = &fuse_export_fid_operations;
 			if (flags & FUSE_OWNER_UID_GID_EXT)
 				fc->owner_uid_gid_ext = 1;
+			if (flags & FUSE_ALLOW_IDMAP) {
+				if (fc->owner_uid_gid_ext && fc->default_permissions)
+					fc->allow_idmap = 1;
+				else
+					ok = false;
+			}
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
@@ -1392,7 +1398,8 @@  void fuse_send_init(struct fuse_mount *fm)
 		FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
 		FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
 		FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
-		FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_OWNER_UID_GID_EXT;
+		FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_OWNER_UID_GID_EXT |
+		FUSE_ALLOW_IDMAP;
 #ifdef CONFIG_FUSE_DAX
 	if (fm->fc->dax)
 		flags |= FUSE_MAP_ALIGNMENT;
@@ -1981,7 +1988,7 @@  static void fuse_kill_sb_anon(struct super_block *sb)
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
-	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
+	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
 	.init_fs_context = fuse_init_fs_context,
 	.parameters	= fuse_fs_parameters,
 	.kill_sb	= fuse_kill_sb_anon,
@@ -2002,7 +2009,7 @@  static struct file_system_type fuseblk_fs_type = {
 	.init_fs_context = fuse_init_fs_context,
 	.parameters	= fuse_fs_parameters,
 	.kill_sb	= fuse_kill_sb_blk,
-	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
+	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("fuseblk");
 
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index d9ecc17fd13b..b23e8247ce43 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -221,6 +221,7 @@ 
  *  7.41
  *  - add FUSE_EXT_OWNER_UID_GID
  *  - add FUSE_OWNER_UID_GID_EXT
+ *  - add FUSE_ALLOW_IDMAP
  */
 
 #ifndef _LINUX_FUSE_H
@@ -256,7 +257,7 @@ 
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 40
+#define FUSE_KERNEL_MINOR_VERSION 41
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -427,6 +428,7 @@  struct fuse_file_lock {
  *		    of the request ID indicates resend requests
  * FUSE_OWNER_UID_GID_EXT: add inode owner UID/GID info to create, mkdir,
  *			   symlink and mknod
+ * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -473,6 +475,7 @@  struct fuse_file_lock {
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX	FUSE_DIRECT_IO_ALLOW_MMAP
 #define FUSE_OWNER_UID_GID_EXT	(1ULL << 40)
+#define FUSE_ALLOW_IDMAP	(1ULL << 41)
 
 /**
  * CUSE INIT request/reply flags