Message ID | 20241129-work-pidfs-file_handle-v1-6-87d803a42495@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [RFC,1/6] pseudofs: add support for export_ops | expand |
On Fri, Nov 29, 2024 at 2:39 PM Christian Brauner <brauner@kernel.org> wrote: > > On 64-bit platforms, userspace can read the pidfd's inode in order to > get a never-repeated PID identifier. On 32-bit platforms this identifier > is not exposed, as inodes are limited to 32 bits. Instead expose the > identifier via export_fh, which makes it available to userspace via > name_to_handle_at. > > In addition we implement fh_to_dentry, which allows userspace to > recover a pidfd from a pidfs file handle. > > Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu> > [brauner: patch heavily rewritten] > Co-Developed-by: Christian Brauner <brauner@kernel.org> > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > fs/fhandle.c | 34 +++++++++++---------- > fs/pidfs.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 115 insertions(+), 15 deletions(-) > > diff --git a/fs/fhandle.c b/fs/fhandle.c > index 23491094032ec037066a271873ea8ff794616bee..4c847ca16fabe31d51ff5698b0c9c355c3e2fb67 100644 > --- a/fs/fhandle.c > +++ b/fs/fhandle.c > @@ -268,20 +268,6 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, > return 0; > } > > -/* > - * Allow relaxed permissions of file handles if the caller has the > - * ability to mount the filesystem or create a bind-mount of the > - * provided @mountdirfd. > - * > - * In both cases the caller may be able to get an unobstructed way to > - * the encoded file handle. If the caller is only able to create a > - * bind-mount we need to verify that there are no locked mounts on top > - * of it that could prevent us from getting to the encoded file. > - * > - * In principle, locked mounts can prevent the caller from mounting the > - * filesystem but that only applies to procfs and sysfs neither of which > - * support decoding file handles. > - */ > static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, > unsigned int o_flags) > { > @@ -291,6 +277,19 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, > return true; > > /* > + * Allow relaxed permissions of file handles if the caller has the > + * ability to mount the filesystem or create a bind-mount of the > + * provided @mountdirfd. > + * > + * In both cases the caller may be able to get an unobstructed way to > + * the encoded file handle. If the caller is only able to create a > + * bind-mount we need to verify that there are no locked mounts on top > + * of it that could prevent us from getting to the encoded file. > + * > + * In principle, locked mounts can prevent the caller from mounting the > + * filesystem but that only applies to procfs and sysfs neither of which > + * support decoding file handles. > + * Belongs in patch 4 > * Restrict to O_DIRECTORY to provide a deterministic API that avoids a > * confusing api in the face of disconnected non-dir dentries. > * > @@ -397,6 +396,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, > long retval = 0; > struct path path __free(path_put) = {}; > struct file *file; > + const struct export_operations *eops; > > retval = handle_to_path(mountdirfd, ufh, &path, open_flag); > if (retval) > @@ -406,7 +406,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, > if (fd < 0) > return fd; > > - file = file_open_root(&path, "", open_flag, 0); > + eops = path.mnt->mnt_sb->s_export_op; > + if (eops->open) > + file = eops->open(&path, open_flag); > + else > + file = file_open_root(&path, "", open_flag, 0); Belongs in patch 3 > if (IS_ERR(file)) > return PTR_ERR(file); > > diff --git a/fs/pidfs.c b/fs/pidfs.c > index f73a47e1d8379df886a90a044fb887f8d06f7c0b..f09af08a4abe4a9100ed972bee8f5c5d7ab33d84 100644 > --- a/fs/pidfs.c > +++ b/fs/pidfs.c > @@ -1,5 +1,6 @@ > // SPDX-License-Identifier: GPL-2.0 > #include <linux/anon_inodes.h> > +#include <linux/exportfs.h> > #include <linux/file.h> > #include <linux/fs.h> > #include <linux/cgroup.h> > @@ -454,6 +455,100 @@ static const struct dentry_operations pidfs_dentry_operations = { > .d_prune = stashed_dentry_prune, > }; > > +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, > + struct inode *parent) > +{ > + struct pid *pid = inode->i_private; > + > + if (*max_len < 2) { > + *max_len = 2; > + return FILEID_INVALID; > + } > + > + *max_len = 2; > + *(u64 *)fh = pid->ino; > + return FILEID_KERNFS; > +} > + > +/* Find a struct pid based on the inode number. */ > +static struct pid *pidfs_ino_get_pid(u64 ino) > +{ > + ino_t pid_ino = pidfs_ino(ino); > + u32 gen = pidfs_gen(ino); > + struct pid *pid; > + > + guard(rcu)(); > + > + /* Handle @pid lookup carefully so there's no risk of UAF. */ > + pid = idr_find(&pidfs_ino_idr, (u32)pid_ino); > + if (!pid) > + return NULL; > + > + if (sizeof(ino_t) < sizeof(u64)) { Not sure why the two cases are needed. Isn't this enough? if (pidfs_ino(pid->ino) != pid_ino || pidfs_gen(pid->ino) != gen) pid = NULL; > + if (gen && pidfs_gen(pid->ino) != gen) > + pid = NULL; > + } else { > + if (pidfs_ino(pid->ino) != pid_ino) > + pid = NULL; > + } > + > + /* Within our pid namespace hierarchy? */ > + if (pid_vnr(pid) == 0) > + pid = NULL; > + > + return get_pid(pid); > +} > + > +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, > + struct fid *fid, int fh_len, > + int fh_type) > +{ > + int ret; > + u64 pid_ino; > + struct path path; > + struct pid *pid; > + > + if (fh_len < 2) > + return NULL; > + > + switch (fh_type) { > + case FILEID_KERNFS: > + pid_ino = *(u64 *)fid; > + break; > + default: > + return NULL; > + } > + > + pid = pidfs_ino_get_pid(pid_ino); > + if (!pid) > + return NULL; > + > + ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); > + if (ret < 0) > + return ERR_PTR(ret); > + > + mntput(path.mnt); > + return path.dentry; > +} > + > +static int pidfs_export_permission(struct handle_to_path_ctx *ctx, > + unsigned int oflags) > +{ This deserves a comment to explain why no permissions are required. > + return 0; > +} > + > +static struct file *pidfs_export_open(struct path *path, unsigned int oflags) > +{ > + return dentry_open(path, oflags | O_RDWR, current_cred()); Why is O_RDWR needed here? perhaps a comment to explain. Thanks, Amir.
diff --git a/fs/fhandle.c b/fs/fhandle.c index 23491094032ec037066a271873ea8ff794616bee..4c847ca16fabe31d51ff5698b0c9c355c3e2fb67 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -268,20 +268,6 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, return 0; } -/* - * Allow relaxed permissions of file handles if the caller has the - * ability to mount the filesystem or create a bind-mount of the - * provided @mountdirfd. - * - * In both cases the caller may be able to get an unobstructed way to - * the encoded file handle. If the caller is only able to create a - * bind-mount we need to verify that there are no locked mounts on top - * of it that could prevent us from getting to the encoded file. - * - * In principle, locked mounts can prevent the caller from mounting the - * filesystem but that only applies to procfs and sysfs neither of which - * support decoding file handles. - */ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, unsigned int o_flags) { @@ -291,6 +277,19 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, return true; /* + * Allow relaxed permissions of file handles if the caller has the + * ability to mount the filesystem or create a bind-mount of the + * provided @mountdirfd. + * + * In both cases the caller may be able to get an unobstructed way to + * the encoded file handle. If the caller is only able to create a + * bind-mount we need to verify that there are no locked mounts on top + * of it that could prevent us from getting to the encoded file. + * + * In principle, locked mounts can prevent the caller from mounting the + * filesystem but that only applies to procfs and sysfs neither of which + * support decoding file handles. + * * Restrict to O_DIRECTORY to provide a deterministic API that avoids a * confusing api in the face of disconnected non-dir dentries. * @@ -397,6 +396,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, long retval = 0; struct path path __free(path_put) = {}; struct file *file; + const struct export_operations *eops; retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) @@ -406,7 +406,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, if (fd < 0) return fd; - file = file_open_root(&path, "", open_flag, 0); + eops = path.mnt->mnt_sb->s_export_op; + if (eops->open) + file = eops->open(&path, open_flag); + else + file = file_open_root(&path, "", open_flag, 0); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/fs/pidfs.c b/fs/pidfs.c index f73a47e1d8379df886a90a044fb887f8d06f7c0b..f09af08a4abe4a9100ed972bee8f5c5d7ab33d84 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/anon_inodes.h> +#include <linux/exportfs.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/cgroup.h> @@ -454,6 +455,100 @@ static const struct dentry_operations pidfs_dentry_operations = { .d_prune = stashed_dentry_prune, }; +static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) +{ + struct pid *pid = inode->i_private; + + if (*max_len < 2) { + *max_len = 2; + return FILEID_INVALID; + } + + *max_len = 2; + *(u64 *)fh = pid->ino; + return FILEID_KERNFS; +} + +/* Find a struct pid based on the inode number. */ +static struct pid *pidfs_ino_get_pid(u64 ino) +{ + ino_t pid_ino = pidfs_ino(ino); + u32 gen = pidfs_gen(ino); + struct pid *pid; + + guard(rcu)(); + + /* Handle @pid lookup carefully so there's no risk of UAF. */ + pid = idr_find(&pidfs_ino_idr, (u32)pid_ino); + if (!pid) + return NULL; + + if (sizeof(ino_t) < sizeof(u64)) { + if (gen && pidfs_gen(pid->ino) != gen) + pid = NULL; + } else { + if (pidfs_ino(pid->ino) != pid_ino) + pid = NULL; + } + + /* Within our pid namespace hierarchy? */ + if (pid_vnr(pid) == 0) + pid = NULL; + + return get_pid(pid); +} + +static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, + int fh_type) +{ + int ret; + u64 pid_ino; + struct path path; + struct pid *pid; + + if (fh_len < 2) + return NULL; + + switch (fh_type) { + case FILEID_KERNFS: + pid_ino = *(u64 *)fid; + break; + default: + return NULL; + } + + pid = pidfs_ino_get_pid(pid_ino); + if (!pid) + return NULL; + + ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); + if (ret < 0) + return ERR_PTR(ret); + + mntput(path.mnt); + return path.dentry; +} + +static int pidfs_export_permission(struct handle_to_path_ctx *ctx, + unsigned int oflags) +{ + return 0; +} + +static struct file *pidfs_export_open(struct path *path, unsigned int oflags) +{ + return dentry_open(path, oflags | O_RDWR, current_cred()); +} + +static const struct export_operations pidfs_export_operations = { + .encode_fh = pidfs_encode_fh, + .fh_to_dentry = pidfs_fh_to_dentry, + .open = pidfs_export_open, + .permission = pidfs_export_permission, +}; + static int pidfs_init_inode(struct inode *inode, void *data) { struct pid *pid = data; @@ -488,6 +583,7 @@ static int pidfs_init_fs_context(struct fs_context *fc) return -ENOMEM; ctx->ops = &pidfs_sops; + ctx->eops = &pidfs_export_operations; ctx->dops = &pidfs_dentry_operations; fc->s_fs_info = (void *)&pidfs_stashed_ops; return 0;