Message ID | 20241128-work-pidfs-v1-0-80f267639d98@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
On Thu, Nov 28, 2024 at 1:34 PM Christian Brauner <brauner@kernel.org> wrote: > > Hey, > > This reworks the inode number allocation for pidfs in order to support > file handles properly. > > Recently we received a patchset that aims to enable file handle encoding > and decoding via name_to_handle_at(2) and open_by_handle_at(2). > > A crucical step in the patch series is how to go from inode number to > struct pid without leaking information into unprivileged contexts. The > issue is that in order to find a struct pid the pid number in the > initial pid namespace must be encoded into the file handle via > name_to_handle_at(2). This can be used by containers using a separate > pid namespace to learn what the pid number of a given process in the > initial pid namespace is. While this is a weak information leak it could > be used in various exploits and in general is an ugly wart in the > design. > > To solve this problem a new way is needed to lookup a struct pid based > on the inode number allocated for that struct pid. The other part is to > remove the custom inode number allocation on 32bit systems that is also > an ugly wart that should go away. > > So, a new scheme is used that I was discusssing with Tejun some time > back. A cyclic ida is used for the lower 32 bits and a the high 32 bits > are used for the generation number. This gives a 64 bit inode number > that is unique on both 32 bit and 64 bit. The lower 32 bit number is > recycled slowly and can be used to lookup struct pids. > > So after applying the pidfs file handle series at > https://lore.kernel.org/r/20241101135452.19359-1-erin.shepherd@e43.eu on > top of the patches here we should be able to simplify encoding and > decoding to something like: > > diff --git a/fs/pidfs.c b/fs/pidfs.c > index e71294d3d607..a38b833a2d38 100644 > --- a/fs/pidfs.c > +++ b/fs/pidfs.c > @@ -78,7 +78,7 @@ void pidfs_remove_pid(struct pid *pid) > } > > /* Find a struct pid based on the inode number. */ > -static __maybe_unused struct pid *pidfs_ino_get_pid(u64 ino) > +static struct pid *pidfs_ino_get_pid(u64 ino) > { > ino_t pid_ino = pidfs_ino(ino); > u32 gen = pidfs_gen(ino); > @@ -475,49 +475,37 @@ static const struct dentry_operations pidfs_dentry_operations = { > .d_prune = stashed_dentry_prune, > }; > > -#define PIDFD_FID_LEN 3 > - > -struct pidfd_fid { > - u64 ino; > - s32 pid; > -} __packed; > - > -static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, > +static int pidfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, > struct inode *parent) > { > struct pid *pid = inode->i_private; > - struct pidfd_fid *fid = (struct pidfd_fid *)fh; > > - if (*max_len < PIDFD_FID_LEN) { > - *max_len = PIDFD_FID_LEN; > + if (*max_len < 2) { > + *max_len = 2; > return FILEID_INVALID; > } > > - fid->ino = pid->ino; > - fid->pid = pid_nr(pid); > - *max_len = PIDFD_FID_LEN; > + *max_len = 2; > + *(u64 *)fh = pid->ino; > return FILEID_INO64_GEN; Semantic remark: /* * 64 bit inode number, 32 bit generation number. */ FILEID_INO64_GEN = 0x81, filesystems are free to abuse the constants and return whatever id they want (e.g. shmem_encode_fh()), but if you want to play by the rules, this would be either: /* * 64 bit unique kernfs id */ FILEID_KERNFS = 0xfe, or: /* * 32bit inode number, 32 bit generation number. */ FILEID_INO32_GEN = 1, which is at least sometimes correct. or define: /* * 64 bit inode number. */ FILEID_INO64 = 0x80, Thanks, Amir.
diff --git a/fs/pidfs.c b/fs/pidfs.c index e71294d3d607..a38b833a2d38 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -78,7 +78,7 @@ void pidfs_remove_pid(struct pid *pid) } /* Find a struct pid based on the inode number. */ -static __maybe_unused struct pid *pidfs_ino_get_pid(u64 ino) +static struct pid *pidfs_ino_get_pid(u64 ino) { ino_t pid_ino = pidfs_ino(ino); u32 gen = pidfs_gen(ino); @@ -475,49 +475,37 @@ static const struct dentry_operations pidfs_dentry_operations = { .d_prune = stashed_dentry_prune, }; -#define PIDFD_FID_LEN 3 - -struct pidfd_fid { - u64 ino; - s32 pid; -} __packed; - -static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, +static int pidfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct pid *pid = inode->i_private; - struct pidfd_fid *fid = (struct pidfd_fid *)fh; - if (*max_len < PIDFD_FID_LEN) { - *max_len = PIDFD_FID_LEN; + if (*max_len < 2) { + *max_len = 2; return FILEID_INVALID; } - fid->ino = pid->ino; - fid->pid = pid_nr(pid); - *max_len = PIDFD_FID_LEN; + *max_len = 2; + *(u64 *)fh = pid->ino; return FILEID_INO64_GEN; } static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, - struct fid *gen_fid, + struct fid *fid, int fh_len, int fh_type) { int ret; struct path path; - struct pidfd_fid *fid = (struct pidfd_fid *)gen_fid; struct pid *pid; + u64 pid_ino; - if (fh_type != FILEID_INO64_GEN || fh_len < PIDFD_FID_LEN) + if (fh_type != FILEID_INO64_GEN || fh_len < 2) return NULL; - scoped_guard(rcu) { - pid = find_pid_ns(fid->pid, &init_pid_ns); - if (!pid || pid->ino != fid->ino || pid_vnr(pid) == 0) - return NULL; - - pid = get_pid(pid); - } + pid_ino = *(u64 *)fid; + pid = pidfs_ino_get_pid(pid_ino); + if (!pid) + return NULL; ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); if (ret < 0)