diff mbox series

[RFC,6/6] pidfs: implement file handle support

Message ID 20241129-work-pidfs-file_handle-v1-6-87d803a42495@kernel.org (mailing list archive)
State New
Headers show
Series [RFC,1/6] pseudofs: add support for export_ops | expand

Commit Message

Christian Brauner Nov. 29, 2024, 1:38 p.m. UTC
On 64-bit platforms, userspace can read the pidfd's inode in order to
get a never-repeated PID identifier. On 32-bit platforms this identifier
is not exposed, as inodes are limited to 32 bits. Instead expose the
identifier via export_fh, which makes it available to userspace via
name_to_handle_at.

In addition we implement fh_to_dentry, which allows userspace to
recover a pidfd from a pidfs file handle.

Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
[brauner: patch heavily rewritten]
Co-Developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fhandle.c | 34 +++++++++++----------
 fs/pidfs.c   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 15 deletions(-)

Comments

Amir Goldstein Nov. 29, 2024, 2:52 p.m. UTC | #1
On Fri, Nov 29, 2024 at 2:39 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On 64-bit platforms, userspace can read the pidfd's inode in order to
> get a never-repeated PID identifier. On 32-bit platforms this identifier
> is not exposed, as inodes are limited to 32 bits. Instead expose the
> identifier via export_fh, which makes it available to userspace via
> name_to_handle_at.
>
> In addition we implement fh_to_dentry, which allows userspace to
> recover a pidfd from a pidfs file handle.
>
> Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
> [brauner: patch heavily rewritten]
> Co-Developed-by: Christian Brauner <brauner@kernel.org>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
>  fs/fhandle.c | 34 +++++++++++----------
>  fs/pidfs.c   | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 115 insertions(+), 15 deletions(-)
>
> diff --git a/fs/fhandle.c b/fs/fhandle.c
> index 23491094032ec037066a271873ea8ff794616bee..4c847ca16fabe31d51ff5698b0c9c355c3e2fb67 100644
> --- a/fs/fhandle.c
> +++ b/fs/fhandle.c
> @@ -268,20 +268,6 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
>         return 0;
>  }
>
> -/*
> - * Allow relaxed permissions of file handles if the caller has the
> - * ability to mount the filesystem or create a bind-mount of the
> - * provided @mountdirfd.
> - *
> - * In both cases the caller may be able to get an unobstructed way to
> - * the encoded file handle. If the caller is only able to create a
> - * bind-mount we need to verify that there are no locked mounts on top
> - * of it that could prevent us from getting to the encoded file.
> - *
> - * In principle, locked mounts can prevent the caller from mounting the
> - * filesystem but that only applies to procfs and sysfs neither of which
> - * support decoding file handles.
> - */
>  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
>                                  unsigned int o_flags)
>  {
> @@ -291,6 +277,19 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
>                 return true;
>
>         /*
> +        * Allow relaxed permissions of file handles if the caller has the
> +        * ability to mount the filesystem or create a bind-mount of the
> +        * provided @mountdirfd.
> +        *
> +        * In both cases the caller may be able to get an unobstructed way to
> +        * the encoded file handle. If the caller is only able to create a
> +        * bind-mount we need to verify that there are no locked mounts on top
> +        * of it that could prevent us from getting to the encoded file.
> +        *
> +        * In principle, locked mounts can prevent the caller from mounting the
> +        * filesystem but that only applies to procfs and sysfs neither of which
> +        * support decoding file handles.
> +        *

Belongs in patch 4

>          * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
>          * confusing api in the face of disconnected non-dir dentries.
>          *
> @@ -397,6 +396,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
>         long retval = 0;
>         struct path path __free(path_put) = {};
>         struct file *file;
> +       const struct export_operations *eops;
>
>         retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
>         if (retval)
> @@ -406,7 +406,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
>         if (fd < 0)
>                 return fd;
>
> -       file = file_open_root(&path, "", open_flag, 0);
> +       eops = path.mnt->mnt_sb->s_export_op;
> +       if (eops->open)
> +               file = eops->open(&path, open_flag);
> +       else
> +               file = file_open_root(&path, "", open_flag, 0);

Belongs in patch 3

>         if (IS_ERR(file))
>                 return PTR_ERR(file);
>
> diff --git a/fs/pidfs.c b/fs/pidfs.c
> index f73a47e1d8379df886a90a044fb887f8d06f7c0b..f09af08a4abe4a9100ed972bee8f5c5d7ab33d84 100644
> --- a/fs/pidfs.c
> +++ b/fs/pidfs.c
> @@ -1,5 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <linux/anon_inodes.h>
> +#include <linux/exportfs.h>
>  #include <linux/file.h>
>  #include <linux/fs.h>
>  #include <linux/cgroup.h>
> @@ -454,6 +455,100 @@ static const struct dentry_operations pidfs_dentry_operations = {
>         .d_prune        = stashed_dentry_prune,
>  };
>
> +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
> +                          struct inode *parent)
> +{
> +       struct pid *pid = inode->i_private;
> +
> +       if (*max_len < 2) {
> +               *max_len = 2;
> +               return FILEID_INVALID;
> +       }
> +
> +       *max_len = 2;
> +       *(u64 *)fh = pid->ino;
> +       return FILEID_KERNFS;
> +}
> +
> +/* Find a struct pid based on the inode number. */
> +static struct pid *pidfs_ino_get_pid(u64 ino)
> +{
> +       ino_t pid_ino = pidfs_ino(ino);
> +       u32 gen = pidfs_gen(ino);
> +       struct pid *pid;
> +
> +       guard(rcu)();
> +
> +       /* Handle @pid lookup carefully so there's no risk of UAF. */
> +       pid = idr_find(&pidfs_ino_idr, (u32)pid_ino);
> +       if (!pid)
> +               return NULL;
> +
> +       if (sizeof(ino_t) < sizeof(u64)) {

Not sure why the two cases are needed. Isn't this enough?

  if (pidfs_ino(pid->ino) != pid_ino || pidfs_gen(pid->ino) != gen)
         pid = NULL;


> +               if (gen && pidfs_gen(pid->ino) != gen)
> +                       pid = NULL;
> +       } else {
> +               if (pidfs_ino(pid->ino) != pid_ino)
> +                       pid = NULL;
> +       }
> +
> +       /* Within our pid namespace hierarchy? */
> +       if (pid_vnr(pid) == 0)
> +               pid = NULL;
> +
> +       return get_pid(pid);
> +}
> +
> +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
> +                                        struct fid *fid, int fh_len,
> +                                        int fh_type)
> +{
> +       int ret;
> +       u64 pid_ino;
> +       struct path path;
> +       struct pid *pid;
> +
> +       if (fh_len < 2)
> +               return NULL;
> +
> +       switch (fh_type) {
> +       case FILEID_KERNFS:
> +               pid_ino = *(u64 *)fid;
> +               break;
> +       default:
> +               return NULL;
> +       }
> +
> +       pid = pidfs_ino_get_pid(pid_ino);
> +       if (!pid)
> +               return NULL;
> +
> +       ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
> +       if (ret < 0)
> +               return ERR_PTR(ret);
> +
> +       mntput(path.mnt);
> +       return path.dentry;
> +}
> +
> +static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
> +                                  unsigned int oflags)
> +{

This deserves a comment to explain why no permissions are required.

> +       return 0;
> +}
> +
> +static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
> +{
> +       return dentry_open(path, oflags | O_RDWR, current_cred());

Why is O_RDWR needed here? perhaps a comment to explain.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/fhandle.c b/fs/fhandle.c
index 23491094032ec037066a271873ea8ff794616bee..4c847ca16fabe31d51ff5698b0c9c355c3e2fb67 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -268,20 +268,6 @@  static int do_handle_to_path(struct file_handle *handle, struct path *path,
 	return 0;
 }
 
-/*
- * Allow relaxed permissions of file handles if the caller has the
- * ability to mount the filesystem or create a bind-mount of the
- * provided @mountdirfd.
- *
- * In both cases the caller may be able to get an unobstructed way to
- * the encoded file handle. If the caller is only able to create a
- * bind-mount we need to verify that there are no locked mounts on top
- * of it that could prevent us from getting to the encoded file.
- *
- * In principle, locked mounts can prevent the caller from mounting the
- * filesystem but that only applies to procfs and sysfs neither of which
- * support decoding file handles.
- */
 static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
 				 unsigned int o_flags)
 {
@@ -291,6 +277,19 @@  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
 		return true;
 
 	/*
+	 * Allow relaxed permissions of file handles if the caller has the
+	 * ability to mount the filesystem or create a bind-mount of the
+	 * provided @mountdirfd.
+	 *
+	 * In both cases the caller may be able to get an unobstructed way to
+	 * the encoded file handle. If the caller is only able to create a
+	 * bind-mount we need to verify that there are no locked mounts on top
+	 * of it that could prevent us from getting to the encoded file.
+	 *
+	 * In principle, locked mounts can prevent the caller from mounting the
+	 * filesystem but that only applies to procfs and sysfs neither of which
+	 * support decoding file handles.
+	 *
 	 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
 	 * confusing api in the face of disconnected non-dir dentries.
 	 *
@@ -397,6 +396,7 @@  static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
 	long retval = 0;
 	struct path path __free(path_put) = {};
 	struct file *file;
+	const struct export_operations *eops;
 
 	retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
 	if (retval)
@@ -406,7 +406,11 @@  static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
 	if (fd < 0)
 		return fd;
 
-	file = file_open_root(&path, "", open_flag, 0);
+	eops = path.mnt->mnt_sb->s_export_op;
+	if (eops->open)
+		file = eops->open(&path, open_flag);
+	else
+		file = file_open_root(&path, "", open_flag, 0);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
diff --git a/fs/pidfs.c b/fs/pidfs.c
index f73a47e1d8379df886a90a044fb887f8d06f7c0b..f09af08a4abe4a9100ed972bee8f5c5d7ab33d84 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -1,5 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/anon_inodes.h>
+#include <linux/exportfs.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/cgroup.h>
@@ -454,6 +455,100 @@  static const struct dentry_operations pidfs_dentry_operations = {
 	.d_prune	= stashed_dentry_prune,
 };
 
+static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+			   struct inode *parent)
+{
+	struct pid *pid = inode->i_private;
+
+	if (*max_len < 2) {
+		*max_len = 2;
+		return FILEID_INVALID;
+	}
+
+	*max_len = 2;
+	*(u64 *)fh = pid->ino;
+	return FILEID_KERNFS;
+}
+
+/* Find a struct pid based on the inode number. */
+static struct pid *pidfs_ino_get_pid(u64 ino)
+{
+	ino_t pid_ino = pidfs_ino(ino);
+	u32 gen = pidfs_gen(ino);
+	struct pid *pid;
+
+	guard(rcu)();
+
+	/* Handle @pid lookup carefully so there's no risk of UAF. */
+	pid = idr_find(&pidfs_ino_idr, (u32)pid_ino);
+	if (!pid)
+		return NULL;
+
+	if (sizeof(ino_t) < sizeof(u64)) {
+		if (gen && pidfs_gen(pid->ino) != gen)
+			pid = NULL;
+	} else {
+		if (pidfs_ino(pid->ino) != pid_ino)
+			pid = NULL;
+	}
+
+	/* Within our pid namespace hierarchy? */
+	if (pid_vnr(pid) == 0)
+		pid = NULL;
+
+	return get_pid(pid);
+}
+
+static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
+					 struct fid *fid, int fh_len,
+					 int fh_type)
+{
+	int ret;
+	u64 pid_ino;
+	struct path path;
+	struct pid *pid;
+
+	if (fh_len < 2)
+		return NULL;
+
+	switch (fh_type) {
+	case FILEID_KERNFS:
+		pid_ino = *(u64 *)fid;
+		break;
+	default:
+		return NULL;
+	}
+
+	pid = pidfs_ino_get_pid(pid_ino);
+	if (!pid)
+		return NULL;
+
+	ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	mntput(path.mnt);
+	return path.dentry;
+}
+
+static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
+				   unsigned int oflags)
+{
+	return 0;
+}
+
+static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
+{
+	return dentry_open(path, oflags | O_RDWR, current_cred());
+}
+
+static const struct export_operations pidfs_export_operations = {
+	.encode_fh	= pidfs_encode_fh,
+	.fh_to_dentry	= pidfs_fh_to_dentry,
+	.open		= pidfs_export_open,
+	.permission	= pidfs_export_permission,
+};
+
 static int pidfs_init_inode(struct inode *inode, void *data)
 {
 	struct pid *pid = data;
@@ -488,6 +583,7 @@  static int pidfs_init_fs_context(struct fs_context *fc)
 		return -ENOMEM;
 
 	ctx->ops = &pidfs_sops;
+	ctx->eops = &pidfs_export_operations;
 	ctx->dops = &pidfs_dentry_operations;
 	fc->s_fs_info = (void *)&pidfs_stashed_ops;
 	return 0;