diff mbox series

[v3,2/4] pid: Add PIDFD_IOCTL_GETFD to fetch file descriptors from processes

Message ID 20191217010001.GA14461@ircssh-2.c.rugged-nimbus-611.internal (mailing list archive)
State New, archived
Headers show
Series Add pidfd getfd ioctl (Was Add ptrace get_fd request) | expand

Commit Message

Sargun Dhillon Dec. 17, 2019, 1 a.m. UTC
This adds an ioctl which allows file descriptors to be extracted
from processes based on their pidfd.

One reason to use this is to allow sandboxers to take actions on file
descriptors on the behalf of another process. For example, this can be
combined with seccomp-bpf's user notification to do on-demand fd
extraction and take privileged actions. For example, it can be used
to bind a socket to a privileged port. This is similar to ptrace, and
using ptrace parasitic code injection to extract a file descriptor from a
process, but without breaking debuggers, or paying the ptrace overhead
cost.

You must have the ability to ptrace the process in order to extract any
file descriptors from it. ptrace can already be used to extract file
descriptors based on parasitic code injections, so the permissions
model is aligned.

The ioctl takes a pointer to pidfd_getfd_args. pidfd_getfd_args contains
a size, which allows for gradual evolution of the API. There is an options
field, which can be used to state whether the fd should be opened with
CLOEXEC, or not. An additional options field may be added in the future
to include the ability to clear cgroup information about the file
descriptor at a later point. If the structure is from a newer kernel, and
includes members which make it larger than the structure that's known to
this kernel version, E2BIG will be returned.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
---
 Documentation/ioctl/ioctl-number.rst |  1 +
 include/linux/pid.h                  |  1 +
 include/uapi/linux/pid.h             | 26 ++++++++++
 kernel/fork.c                        | 72 ++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+)
 create mode 100644 include/uapi/linux/pid.h

Comments

Jann Horn Dec. 17, 2019, 1:13 a.m. UTC | #1
On Tue, Dec 17, 2019 at 2:00 AM Sargun Dhillon <sargun@sargun.me> wrote:
> This adds an ioctl which allows file descriptors to be extracted
> from processes based on their pidfd.
[...]
> You must have the ability to ptrace the process in order to extract any
> file descriptors from it. ptrace can already be used to extract file
> descriptors based on parasitic code injections, so the permissions
> model is aligned.
[...]
> +       task = get_pid_task(pid, PIDTYPE_PID);
> +       if (!task)
> +               return -ESRCH;
> +       ret = -EPERM;

Please add something like

if (mutex_lock_killable(&task->signal->cred_guard_mutex))
  goto out;

here, and drop the mutex after fget_task().

> +       if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
> +               goto out;
> +       ret = -EBADF;
> +       file = fget_task(task, args.fd);
> +       if (!file)
> +               goto out;
> +
> +       fd = get_unused_fd_flags(fd_flags);
> +       if (fd < 0) {
> +               ret = fd;
> +               goto out_put_file;
> +       }
> +       /*
> +        * security_file_receive must come last since it may have side effects
> +        * and cannot be reversed.
> +        */
> +       ret = security_file_receive(file);
> +       if (ret)
> +               goto out_put_fd;
> +
> +       fd_install(fd, file);
> +       put_task_struct(task);
> +       return fd;
> +
> +out_put_fd:
> +       put_unused_fd(fd);
> +out_put_file:
> +       fput(file);
> +out:
> +       put_task_struct(task);
> +       return ret;
> +}
Christian Brauner Dec. 17, 2019, 1:50 a.m. UTC | #2
[Cc Arnd since he fiddled with ioctl()s quite a bit recently.]

On Tue, Dec 17, 2019 at 01:00:04AM +0000, Sargun Dhillon wrote:
> This adds an ioctl which allows file descriptors to be extracted
> from processes based on their pidfd.
> 
> One reason to use this is to allow sandboxers to take actions on file
> descriptors on the behalf of another process. For example, this can be
> combined with seccomp-bpf's user notification to do on-demand fd
> extraction and take privileged actions. For example, it can be used
> to bind a socket to a privileged port. This is similar to ptrace, and
> using ptrace parasitic code injection to extract a file descriptor from a
> process, but without breaking debuggers, or paying the ptrace overhead
> cost.
> 
> You must have the ability to ptrace the process in order to extract any
> file descriptors from it. ptrace can already be used to extract file
> descriptors based on parasitic code injections, so the permissions
> model is aligned.
> 
> The ioctl takes a pointer to pidfd_getfd_args. pidfd_getfd_args contains
> a size, which allows for gradual evolution of the API. There is an options
> field, which can be used to state whether the fd should be opened with
> CLOEXEC, or not. An additional options field may be added in the future
> to include the ability to clear cgroup information about the file
> descriptor at a later point. If the structure is from a newer kernel, and
> includes members which make it larger than the structure that's known to
> this kernel version, E2BIG will be returned.
> 
> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> ---
>  Documentation/ioctl/ioctl-number.rst |  1 +
>  include/linux/pid.h                  |  1 +
>  include/uapi/linux/pid.h             | 26 ++++++++++
>  kernel/fork.c                        | 72 ++++++++++++++++++++++++++++
>  4 files changed, 100 insertions(+)
>  create mode 100644 include/uapi/linux/pid.h
> 
> diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
> index bef79cd4c6b4..be2efb93acd1 100644
> --- a/Documentation/ioctl/ioctl-number.rst
> +++ b/Documentation/ioctl/ioctl-number.rst
> @@ -272,6 +272,7 @@ Code  Seq#    Include File                                           Comments
>                                                                       <mailto:tim@cyberelk.net>
>  'p'   A1-A5  linux/pps.h                                             LinuxPPS
>                                                                       <mailto:giometti@linux.it>
> +'p'   B0-CF  uapi/linux/pid.h
>  'q'   00-1F  linux/serio.h
>  'q'   80-FF  linux/telephony.h                                       Internet PhoneJACK, Internet LineJACK
>               linux/ixjuser.h                                         <http://web.archive.org/web/%2A/http://www.quicknet.net>
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 9645b1194c98..65f1a73040c9 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -5,6 +5,7 @@
>  #include <linux/rculist.h>
>  #include <linux/wait.h>
>  #include <linux/refcount.h>
> +#include <uapi/linux/pid.h>

That should be pidfd.h and the resulting new file be placed under the
pidfd entry in maintainers:
+F:     include/uapi/linux/pidfd.h

>  
>  enum pid_type
>  {
> diff --git a/include/uapi/linux/pid.h b/include/uapi/linux/pid.h
> new file mode 100644
> index 000000000000..4ec02ed8b39a
> --- /dev/null
> +++ b/include/uapi/linux/pid.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +#ifndef _UAPI_LINUX_PID_H
> +#define _UAPI_LINUX_PID_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +/* options to pass in to pidfd_getfd_args flags */
> +#define PIDFD_GETFD_CLOEXEC (1 << 0)	/* open the fd with cloexec */

Please, make them cloexec by default unless there's a very good reason
not to.

> +
> +struct pidfd_getfd_args {
> +	__u32 size;		/* sizeof(pidfd_getfd_args) */
> +	__u32 fd;       /* the tracee's file descriptor to get */
> +	__u32 flags;
> +};

I think you want to either want to pad this

+struct pidfd_getfd_args {
+	__u32 size;		/* sizeof(pidfd_getfd_args) */
+	__u32 fd;       /* the tracee's file descriptor to get */
+	__u32 flags;
	__u32 reserved;
+};

or use __aligned_u64 everywhere which I'd personally prefer instead of
this manual padding everywhere.

> +
> +#define PIDFD_IOC_MAGIC			'p'
> +#define PIDFD_IO(nr)			_IO(PIDFD_IOC_MAGIC, nr)
> +#define PIDFD_IOR(nr, type)		_IOR(PIDFD_IOC_MAGIC, nr, type)
> +#define PIDFD_IOW(nr, type)		_IOW(PIDFD_IOC_MAGIC, nr, type)
> +#define PIDFD_IOWR(nr, type)		_IOWR(PIDFD_IOC_MAGIC, nr, type)
> +
> +#define PIDFD_IOCTL_GETFD		PIDFD_IOWR(0xb0, \
> +						struct pidfd_getfd_args)
> +
> +#endif /* _UAPI_LINUX_PID_H */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 6cabc124378c..d9971e664e82 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1726,9 +1726,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
>  	return poll_flags;
>  }
>  
> +static long pidfd_getfd(struct pid *pid, struct pidfd_getfd_args __user *buf)
> +{
> +	struct pidfd_getfd_args args;
> +	unsigned int fd_flags = 0;
> +	struct task_struct *task;
> +	struct file *file;
> +	u32 user_size;
> +	int ret, fd;
> +
> +	ret = get_user(user_size, &buf->size);
> +	if (ret)
> +		return ret;
> +
> +	ret = copy_struct_from_user(&args, sizeof(args), buf, user_size);
> +	if (ret)
> +		return ret;
> +	if ((args.flags & ~(PIDFD_GETFD_CLOEXEC)) != 0)
> +		return -EINVAL;

Nit: It's more common - especially in this file - to do

if (args.flags & ~PIDFD_GETFD_CLOEXEC)
	return -EINVAL;

> +	if (args.flags & PIDFD_GETFD_CLOEXEC)
> +		fd_flags |= O_CLOEXEC;
> +
> +	task = get_pid_task(pid, PIDTYPE_PID);
> +	if (!task)
> +		return -ESRCH;

\n

> +	ret = -EPERM;
> +	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
> +		goto out;

\n

Please don't pre-set errors unless they are used by multiple exit paths.
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
	ret = -EPERM;
	goto out;
}

> +	ret = -EBADF;
> +	file = fget_task(task, args.fd);
> +	if (!file)
> +		goto out;

Same.

> +
> +	fd = get_unused_fd_flags(fd_flags);
> +	if (fd < 0) {
> +		ret = fd;
> +		goto out_put_file;
> +	}

\n

> +	/*
> +	 * security_file_receive must come last since it may have side effects
> +	 * and cannot be reversed.
> +	 */
> +	ret = security_file_receive(file);
> +	if (ret)
> +		goto out_put_fd;
> +
> +	fd_install(fd, file);
> +	put_task_struct(task);
> +	return fd;
> +
> +out_put_fd:
> +	put_unused_fd(fd);
> +out_put_file:
> +	fput(file);
> +out:
> +	put_task_struct(task);
> +	return ret;
> +}
> +
> +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> +	struct pid *pid = file->private_data;
> +	void __user *buf = (void __user *)arg;
> +
> +	switch (cmd) {
> +	case PIDFD_IOCTL_GETFD:
> +		return pidfd_getfd(pid, buf);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
>  const struct file_operations pidfd_fops = {
>  	.release = pidfd_release,
>  	.poll = pidfd_poll,
> +	.unlocked_ioctl = pidfd_ioctl,
>  #ifdef CONFIG_PROC_FS
>  	.show_fdinfo = pidfd_show_fdinfo,
>  #endif
> -- 
> 2.20.1
>
Sargun Dhillon Dec. 17, 2019, 2:49 a.m. UTC | #3
On Mon, Dec 16, 2019 at 5:50 PM Christian Brauner
<christian.brauner@ubuntu.com> wrote:
>
> [Cc Arnd since he fiddled with ioctl()s quite a bit recently.]
>
>
> That should be pidfd.h and the resulting new file be placed under the
> pidfd entry in maintainers:
> +F:     include/uapi/linux/pidfd.h
>
> >
> >  enum pid_type
> >  {
> > diff --git a/include/uapi/linux/pid.h b/include/uapi/linux/pid.h
> > new file mode 100644
> > index 000000000000..4ec02ed8b39a
> > --- /dev/null
> > +++ b/include/uapi/linux/pid.h
> > @@ -0,0 +1,26 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +#ifndef _UAPI_LINUX_PID_H
> > +#define _UAPI_LINUX_PID_H
> > +
> > +#include <linux/types.h>
> > +#include <linux/ioctl.h>
> > +
> > +/* options to pass in to pidfd_getfd_args flags */
> > +#define PIDFD_GETFD_CLOEXEC (1 << 0) /* open the fd with cloexec */
>
> Please, make them cloexec by default unless there's a very good reason
> not to.
>
For now then, should I have flags, and just say "reserved for future usage",
or would you prefer that I drop flags entirely?

> > +
> > +struct pidfd_getfd_args {
> > +     __u32 size;             /* sizeof(pidfd_getfd_args) */
> > +     __u32 fd;       /* the tracee's file descriptor to get */
> > +     __u32 flags;
> > +};
>
> I think you want to either want to pad this
>
> +struct pidfd_getfd_args {
> +       __u32 size;             /* sizeof(pidfd_getfd_args) */
> +       __u32 fd;       /* the tracee's file descriptor to get */
> +       __u32 flags;
>         __u32 reserved;
> +};
>
> or use __aligned_u64 everywhere which I'd personally prefer instead of
> this manual padding everywhere.
>
Wouldn't __attribute__((packed)) achieve a similar thing of making sure
the struct is a constant size across all compilers?

I'll go with __aligned_u64 instead of packed, if you don't want to use packed.

> > +
> > +#define PIDFD_IOC_MAGIC                      'p'
> > +#define PIDFD_IO(nr)                 _IO(PIDFD_IOC_MAGIC, nr)
> > +#define PIDFD_IOR(nr, type)          _IOR(PIDFD_IOC_MAGIC, nr, type)
> > +#define PIDFD_IOW(nr, type)          _IOW(PIDFD_IOC_MAGIC, nr, type)
> > +#define PIDFD_IOWR(nr, type)         _IOWR(PIDFD_IOC_MAGIC, nr, type)
> > +
> > +#define PIDFD_IOCTL_GETFD            PIDFD_IOWR(0xb0, \
> > +                                             struct pidfd_getfd_args)
> > +
> > +#endif /* _UAPI_LINUX_PID_H */
> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index 6cabc124378c..d9971e664e82 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -1726,9 +1726,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
> >       return poll_flags;
> >  }
> >
> > +static long pidfd_getfd(struct pid *pid, struct pidfd_getfd_args __user *buf)
> > +{
> > +     struct pidfd_getfd_args args;
> > +     unsigned int fd_flags = 0;
> > +     struct task_struct *task;
> > +     struct file *file;
> > +     u32 user_size;
> > +     int ret, fd;
> > +
> > +     ret = get_user(user_size, &buf->size);
> > +     if (ret)
> > +             return ret;
> > +
> > +     ret = copy_struct_from_user(&args, sizeof(args), buf, user_size);
> > +     if (ret)
> > +             return ret;
> > +     if ((args.flags & ~(PIDFD_GETFD_CLOEXEC)) != 0)
> > +             return -EINVAL;
>
> Nit: It's more common - especially in this file - to do
>
> if (args.flags & ~PIDFD_GETFD_CLOEXEC)
>         return -EINVAL;
>
> > +     if (args.flags & PIDFD_GETFD_CLOEXEC)
> > +             fd_flags |= O_CLOEXEC;
> > +
I'll drop this bit, and just make it CLOEXEC by default.

> > +     task = get_pid_task(pid, PIDTYPE_PID);
> > +     if (!task)
> > +             return -ESRCH;
>
> \n
>
> > +     ret = -EPERM;
> > +     if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
> > +             goto out;
>
> \n
>
> Please don't pre-set errors unless they are used by multiple exit paths.
> if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
>         ret = -EPERM;
>         goto out;
> }
>
> > +     ret = -EBADF;
> > +     file = fget_task(task, args.fd);
> > +     if (!file)
> > +             goto out;
>
> Same.
>
> > +
> > +     fd = get_unused_fd_flags(fd_flags);
> > +     if (fd < 0) {
> > +             ret = fd;
> > +             goto out_put_file;
> > +     }
>
> \n
>
> > +     /*
> > +      * security_file_receive must come last since it may have side effects
> > +      * and cannot be reversed.
> > +      */
> > +     ret = security_file_receive(file);
> > +     if (ret)
> > +             goto out_put_fd;
> > +
> > +     fd_install(fd, file);
> > +     put_task_struct(task);
> > +     return fd;
> > +
> > +out_put_fd:
> > +     put_unused_fd(fd);
> > +out_put_file:
> > +     fput(file);
> > +out:
> > +     put_task_struct(task);
> > +     return ret;
> > +}
> > +
> > +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> > +{
> > +     struct pid *pid = file->private_data;
> > +     void __user *buf = (void __user *)arg;
> > +
> > +     switch (cmd) {
> > +     case PIDFD_IOCTL_GETFD:
> > +             return pidfd_getfd(pid, buf);
> > +     default:
> > +             return -EINVAL;
> > +     }
> > +}
> > +
> >  const struct file_operations pidfd_fops = {
> >       .release = pidfd_release,
> >       .poll = pidfd_poll,
> > +     .unlocked_ioctl = pidfd_ioctl,
> >  #ifdef CONFIG_PROC_FS
> >       .show_fdinfo = pidfd_show_fdinfo,
> >  #endif
> > --
> > 2.20.1
> >
Christian Brauner Dec. 17, 2019, 3 a.m. UTC | #4
On Mon, Dec 16, 2019 at 06:49:37PM -0800, Sargun Dhillon wrote:
> On Mon, Dec 16, 2019 at 5:50 PM Christian Brauner
> <christian.brauner@ubuntu.com> wrote:
> >
> > [Cc Arnd since he fiddled with ioctl()s quite a bit recently.]
> >
> >
> > That should be pidfd.h and the resulting new file be placed under the
> > pidfd entry in maintainers:
> > +F:     include/uapi/linux/pidfd.h
> >
> > >
> > >  enum pid_type
> > >  {
> > > diff --git a/include/uapi/linux/pid.h b/include/uapi/linux/pid.h
> > > new file mode 100644
> > > index 000000000000..4ec02ed8b39a
> > > --- /dev/null
> > > +++ b/include/uapi/linux/pid.h
> > > @@ -0,0 +1,26 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > > +#ifndef _UAPI_LINUX_PID_H
> > > +#define _UAPI_LINUX_PID_H
> > > +
> > > +#include <linux/types.h>
> > > +#include <linux/ioctl.h>
> > > +
> > > +/* options to pass in to pidfd_getfd_args flags */
> > > +#define PIDFD_GETFD_CLOEXEC (1 << 0) /* open the fd with cloexec */
> >
> > Please, make them cloexec by default unless there's a very good reason
> > not to.
> >
> For now then, should I have flags, and just say "reserved for future usage",
> or would you prefer that I drop flags entirely?

Hm, you can leave the flags argument imho but maybe someone else has
stronger opinions about this.

> 
> > > +
> > > +struct pidfd_getfd_args {
> > > +     __u32 size;             /* sizeof(pidfd_getfd_args) */
> > > +     __u32 fd;       /* the tracee's file descriptor to get */
> > > +     __u32 flags;
> > > +};
> >
> > I think you want to either want to pad this
> >
> > +struct pidfd_getfd_args {
> > +       __u32 size;             /* sizeof(pidfd_getfd_args) */
> > +       __u32 fd;       /* the tracee's file descriptor to get */
> > +       __u32 flags;
> >         __u32 reserved;
> > +};
> >
> > or use __aligned_u64 everywhere which I'd personally prefer instead of
> > this manual padding everywhere.
> >
> Wouldn't __attribute__((packed)) achieve a similar thing of making sure
> the struct is a constant size across all compilers?
> 
> I'll go with __aligned_u64 instead of packed, if you don't want to use packed.

We had a discussion about this in relation to the openat2()
patchset just recently. Florian and a few others raised good points why
we might not want to use packed:
https://lore.kernel.org/lkml/87o8w9bcaf.fsf@mid.deneb.enyo.de/
https://lore.kernel.org/lkml/a328b91d-fd8f-4f27-b3c2-91a9c45f18c0@rasmusvillemoes.dk/

Christian
Arnd Bergmann Dec. 17, 2019, 8:54 a.m. UTC | #5
On Tue, Dec 17, 2019 at 3:50 AM Sargun Dhillon <sargun@sargun.me> wrote:
> On Mon, Dec 16, 2019 at 5:50 PM Christian Brauner <christian.brauner@ubuntu.com> wrote:
> > > +
> > > +#include <linux/types.h>
> > > +#include <linux/ioctl.h>
> > > +
> > > +/* options to pass in to pidfd_getfd_args flags */
> > > +#define PIDFD_GETFD_CLOEXEC (1 << 0) /* open the fd with cloexec */
> >
> > Please, make them cloexec by default unless there's a very good reason
> > not to.
> >
> For now then, should I have flags, and just say "reserved for future usage",
> or would you prefer that I drop flags entirely?

There is no need for adding reserved fields in an ioctl, just add a new ioctl
number if you need it later.

> > > +
> > > +struct pidfd_getfd_args {
> > > +     __u32 size;             /* sizeof(pidfd_getfd_args) */
> > > +     __u32 fd;       /* the tracee's file descriptor to get */
> > > +     __u32 flags;
> > > +};
> >
> > I think you want to either want to pad this
> >
> > +struct pidfd_getfd_args {
> > +       __u32 size;             /* sizeof(pidfd_getfd_args) */
> > +       __u32 fd;       /* the tracee's file descriptor to get */
> > +       __u32 flags;
> >         __u32 reserved;
> > +};
> >
> > or use __aligned_u64 everywhere which I'd personally prefer instead of
> > this manual padding everywhere.

No, don't make ioctl structures extensible. If there is no 64-bit member
in it, 32-bit alignment is sufficient.

Also, having implicit padding is dangerous because it makes it easier to
leave it uninitialized, leaking kernel stack information on the copy_to_user().

Please drop the '__u32 size' argument, too: the size is fixed by definition
(through the _IOWR macro) and if you need to extend it you get a new
command anyway.

> Wouldn't __attribute__((packed)) achieve a similar thing of making sure
> the struct is a constant size across all compilers?
>
> I'll go with __aligned_u64 instead of packed, if you don't want to use packed.

__attribute__((packed)) is worse because it forces compilers to use byte
access on architectures that have no fast unaligned 32-bit load/store.
Basically you should never put __packed on a structure, but instead add
it to members that need to be unaligned within a sturct for compatibility
reasons.

> > > +
> > > +#define PIDFD_IOC_MAGIC                      'p'
> > > +#define PIDFD_IO(nr)                 _IO(PIDFD_IOC_MAGIC, nr)
> > > +#define PIDFD_IOR(nr, type)          _IOR(PIDFD_IOC_MAGIC, nr, type)
> > > +#define PIDFD_IOW(nr, type)          _IOW(PIDFD_IOC_MAGIC, nr, type)
> > > +#define PIDFD_IOWR(nr, type)         _IOWR(PIDFD_IOC_MAGIC, nr, type)

Drop these macros, they just make it harder to grep or script around the use
of _IOWR/_IOR/_IOW

> > > +#define PIDFD_IOCTL_GETFD            PIDFD_IOWR(0xb0, \
> > > +                                             struct pidfd_getfd_args)

Without the size and flag members, this can become the simpler

#define PIDFD_IOCTL_GETFD  _IOWR('p', 0xb0, __u32)

> > > +
> > >  const struct file_operations pidfd_fops = {
> > >       .release = pidfd_release,
> > >       .poll = pidfd_poll,
> > > +     .unlocked_ioctl = pidfd_ioctl,

This needs

+    .compat_ioctl = compat_ptr_ioctl,

To work on compat tasks.

Finally, there is the question whether this should be an ioctl
operation at all, or
if it would better be done as a proper syscall. Functionally the two
are the same
here, but doing such a fundamental operation as an ioctl doesn't feel
quite right
to me. As a system call, this could be something like

int pidfd_get_fd(int pidfd, int their_fd, int flags);

along the lines of dup3().

        Arnd
Christian Brauner Dec. 17, 2019, 11:19 a.m. UTC | #6
On Tue, Dec 17, 2019 at 09:54:40AM +0100, Arnd Bergmann wrote:
> On Tue, Dec 17, 2019 at 3:50 AM Sargun Dhillon <sargun@sargun.me> wrote:
> > On Mon, Dec 16, 2019 at 5:50 PM Christian Brauner <christian.brauner@ubuntu.com> wrote:
> Finally, there is the question whether this should be an ioctl
> operation at all, or
> if it would better be done as a proper syscall. Functionally the two
> are the same
> here, but doing such a fundamental operation as an ioctl doesn't feel
> quite right
> to me. As a system call, this could be something like
> 
> int pidfd_get_fd(int pidfd, int their_fd, int flags);
> 
> along the lines of dup3().

Thanks for taking a look, Arnd!

Yeah, Oleg hinted at this in the first version as well. I originally
disagreed but we can sure also do this as a separate syscall.
What we should keep in mind is that people already brought up adding new
fds to a task. Which is not a problem just something to remember as it
might potentially mean another syscall.

Christian
diff mbox series

Patch

diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
index bef79cd4c6b4..be2efb93acd1 100644
--- a/Documentation/ioctl/ioctl-number.rst
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -272,6 +272,7 @@  Code  Seq#    Include File                                           Comments
                                                                      <mailto:tim@cyberelk.net>
 'p'   A1-A5  linux/pps.h                                             LinuxPPS
                                                                      <mailto:giometti@linux.it>
+'p'   B0-CF  uapi/linux/pid.h
 'q'   00-1F  linux/serio.h
 'q'   80-FF  linux/telephony.h                                       Internet PhoneJACK, Internet LineJACK
              linux/ixjuser.h                                         <http://web.archive.org/web/%2A/http://www.quicknet.net>
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 9645b1194c98..65f1a73040c9 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -5,6 +5,7 @@ 
 #include <linux/rculist.h>
 #include <linux/wait.h>
 #include <linux/refcount.h>
+#include <uapi/linux/pid.h>
 
 enum pid_type
 {
diff --git a/include/uapi/linux/pid.h b/include/uapi/linux/pid.h
new file mode 100644
index 000000000000..4ec02ed8b39a
--- /dev/null
+++ b/include/uapi/linux/pid.h
@@ -0,0 +1,26 @@ 
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_PID_H
+#define _UAPI_LINUX_PID_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* options to pass in to pidfd_getfd_args flags */
+#define PIDFD_GETFD_CLOEXEC (1 << 0)	/* open the fd with cloexec */
+
+struct pidfd_getfd_args {
+	__u32 size;		/* sizeof(pidfd_getfd_args) */
+	__u32 fd;       /* the tracee's file descriptor to get */
+	__u32 flags;
+};
+
+#define PIDFD_IOC_MAGIC			'p'
+#define PIDFD_IO(nr)			_IO(PIDFD_IOC_MAGIC, nr)
+#define PIDFD_IOR(nr, type)		_IOR(PIDFD_IOC_MAGIC, nr, type)
+#define PIDFD_IOW(nr, type)		_IOW(PIDFD_IOC_MAGIC, nr, type)
+#define PIDFD_IOWR(nr, type)		_IOWR(PIDFD_IOC_MAGIC, nr, type)
+
+#define PIDFD_IOCTL_GETFD		PIDFD_IOWR(0xb0, \
+						struct pidfd_getfd_args)
+
+#endif /* _UAPI_LINUX_PID_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 6cabc124378c..d9971e664e82 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1726,9 +1726,81 @@  static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
 	return poll_flags;
 }
 
+static long pidfd_getfd(struct pid *pid, struct pidfd_getfd_args __user *buf)
+{
+	struct pidfd_getfd_args args;
+	unsigned int fd_flags = 0;
+	struct task_struct *task;
+	struct file *file;
+	u32 user_size;
+	int ret, fd;
+
+	ret = get_user(user_size, &buf->size);
+	if (ret)
+		return ret;
+
+	ret = copy_struct_from_user(&args, sizeof(args), buf, user_size);
+	if (ret)
+		return ret;
+	if ((args.flags & ~(PIDFD_GETFD_CLOEXEC)) != 0)
+		return -EINVAL;
+	if (args.flags & PIDFD_GETFD_CLOEXEC)
+		fd_flags |= O_CLOEXEC;
+
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task)
+		return -ESRCH;
+	ret = -EPERM;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+		goto out;
+	ret = -EBADF;
+	file = fget_task(task, args.fd);
+	if (!file)
+		goto out;
+
+	fd = get_unused_fd_flags(fd_flags);
+	if (fd < 0) {
+		ret = fd;
+		goto out_put_file;
+	}
+	/*
+	 * security_file_receive must come last since it may have side effects
+	 * and cannot be reversed.
+	 */
+	ret = security_file_receive(file);
+	if (ret)
+		goto out_put_fd;
+
+	fd_install(fd, file);
+	put_task_struct(task);
+	return fd;
+
+out_put_fd:
+	put_unused_fd(fd);
+out_put_file:
+	fput(file);
+out:
+	put_task_struct(task);
+	return ret;
+}
+
+static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct pid *pid = file->private_data;
+	void __user *buf = (void __user *)arg;
+
+	switch (cmd) {
+	case PIDFD_IOCTL_GETFD:
+		return pidfd_getfd(pid, buf);
+	default:
+		return -EINVAL;
+	}
+}
+
 const struct file_operations pidfd_fops = {
 	.release = pidfd_release,
 	.poll = pidfd_poll,
+	.unlocked_ioctl = pidfd_ioctl,
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo = pidfd_show_fdinfo,
 #endif