diff mbox series

[3/4] seccomp: Add SECCOMP_USER_NOTIF_FLAG_PIDFD to get pidfd on listener trap

Message ID 20200124091743.3357-4-sargun@sargun.me (mailing list archive)
State New, archived
Headers show
Series Add the ability to get a pidfd on seccomp user notifications | expand

Commit Message

Sargun Dhillon Jan. 24, 2020, 9:17 a.m. UTC
This introduces the capability for users of seccomp's listener behaviour
to be able to receive the pidfd of the process that triggered the event.
Currently, this just opens the group leader of the thread that triggere
the event, as pidfds (currently) are limited to group leaders.

For actions which do not act on the process outside of the pidfd, there
is then no need to check the cookie to ensure validity of the request
throughout the listener's handling of it.

This can be extended later on as well when pidfd capabilities are added
to be able to have the listener imbue the pidfd with certain capabilities
when it is delivered to userspace.

It is the responsibility of the user to close the pidfd.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
---
 include/uapi/linux/seccomp.h |  4 +++
 kernel/seccomp.c             | 68 ++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 6 deletions(-)

Comments

Tycho Andersen Jan. 24, 2020, 6:03 p.m. UTC | #1
On Fri, Jan 24, 2020 at 01:17:42AM -0800, Sargun Dhillon wrote:
> Currently, this just opens the group leader of the thread that triggere
> the event, as pidfds (currently) are limited to group leaders.

I don't love the semantics of this; when they're not limited to thread
group leaders any more, we won't be able to change this. Is that work
far off?

Tycho
Sargun Dhillon Jan. 24, 2020, 8:09 p.m. UTC | #2
On Fri, Jan 24, 2020 at 10:03 AM Tycho Andersen <tycho@tycho.ws> wrote:
>
> On Fri, Jan 24, 2020 at 01:17:42AM -0800, Sargun Dhillon wrote:
> > Currently, this just opens the group leader of the thread that triggere
> > the event, as pidfds (currently) are limited to group leaders.
>
> I don't love the semantics of this; when they're not limited to thread
> group leaders any more, we won't be able to change this. Is that work
> far off?
>
> Tycho

We would be able to change this in the future if we introduced a flag like
SECCOMP_USER_NOTIF_FLAG_PIDFD_THREAD which would send a
pidfd that's for the thread, and not just the group leader. The flag could
either be XOR with SECCOMP_USER_NOTIF_FLAG_PIDFD, or
could require both. Alternatively, we can rename
SECCOMP_USER_NOTIF_FLAG_PIDFD to
SECCOMP_USER_NOTIF_FLAG_GROUP_LEADER_PIDFD.
Aleksa Sarai Jan. 26, 2020, 4:03 a.m. UTC | #3
On 2020-01-24, Sargun Dhillon <sargun@sargun.me> wrote:
> This introduces the capability for users of seccomp's listener behaviour
> to be able to receive the pidfd of the process that triggered the event.
> Currently, this just opens the group leader of the thread that triggere
> the event, as pidfds (currently) are limited to group leaders.
> 
> For actions which do not act on the process outside of the pidfd, there
> is then no need to check the cookie to ensure validity of the request
> throughout the listener's handling of it.
> 
> This can be extended later on as well when pidfd capabilities are added
> to be able to have the listener imbue the pidfd with certain capabilities
> when it is delivered to userspace.
> 
> It is the responsibility of the user to close the pidfd.
> 
> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> ---
>  include/uapi/linux/seccomp.h |  4 +++
>  kernel/seccomp.c             | 68 ++++++++++++++++++++++++++++++++----
>  2 files changed, 66 insertions(+), 6 deletions(-)
> 
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index be84d87f1f46..64f6fc5c95f1 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -69,11 +69,15 @@ struct seccomp_notif_sizes {
>  	__u16 seccomp_data;
>  };
>  
> +/* Valid flags for struct seccomp_notif */
> +#define SECCOMP_USER_NOTIF_FLAG_PIDFD	(1UL << 0) /* populate pidfd */
> +
>  struct seccomp_notif {
>  	__u64 id;
>  	__u32 pid;
>  	__u32 flags;
>  	struct seccomp_data data;
> +	__u32 pidfd;
>  };
>  
>  /*
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index b6ea3dcb57bf..93f9cf45ce07 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -1019,21 +1019,61 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
>  	return 0;
>  }
>  
> +
> +static long __seccomp_notify_recv_pidfd(void __user *buf,
> +					struct seccomp_notif *unotif,
> +					struct task_struct *group_leader)
> +{
> +	struct file *pidfd_file;
> +	struct pid *pid;
> +	int fd;
> +
> +	pid = get_task_pid(group_leader, PIDTYPE_PID);
> +	pidfd_file = pidfd_create_file(pid);
> +	put_pid(pid);
> +	if (IS_ERR(pidfd_file))
> +		return PTR_ERR(pidfd_file);
> +
> +	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);

You don't need to pass O_RDWR -- only O_CLOEXEC is checked by
get_unused_fd_flags().

> +	if (fd < 0) {
> +		fput(pidfd_file);
> +		return fd;
> +	}
> +
> +	unotif->pidfd = fd;
> +
> +	if (copy_to_user(buf, unotif, sizeof(*unotif))) {
> +		put_unused_fd(fd);
> +		fput(pidfd_file);
> +		return -EFAULT;
> +	}
> +
> +	fd_install(fd, pidfd_file);
> +
> +	return 0;
> +}
> +
>  static long seccomp_notify_recv(struct seccomp_filter *filter,
>  				void __user *buf)
>  {
>  	struct seccomp_knotif *knotif = NULL, *cur;
>  	struct seccomp_notif unotif;
> +	struct task_struct *group_leader;
> +	bool send_pidfd;
>  	ssize_t ret;
>  
> +	if (copy_from_user(&unotif, buf, sizeof(unotif)))
> +		return -EFAULT;
>  	/* Verify that we're not given garbage to keep struct extensible. */
> -	ret = check_zeroed_user(buf, sizeof(unotif));
> -	if (ret < 0)
> -		return ret;
> -	if (!ret)
> +	if (unotif.id ||
> +	    unotif.pid ||
> +	    memchr_inv(&unotif.data, 0, sizeof(unotif.data)) ||
> +	    unotif.pidfd)
> +		return -EINVAL;

IMHO this check is more confusing than the original check_zeroed_user().
Something like the following is simpler and less prone to forgetting to
add a new field in the future:

	if (memchr_inv(&unotif, 0, sizeof(unotif)))
		return -EINVAL;

> +	if (unotif.flags & ~(SECCOMP_USER_NOTIF_FLAG_PIDFD))
>  		return -EINVAL;
>  
> -	memset(&unotif, 0, sizeof(unotif));
> +	send_pidfd = unotif.flags & SECCOMP_USER_NOTIF_FLAG_PIDFD;
>  
>  	ret = down_interruptible(&filter->notif->request);
>  	if (ret < 0)
> @@ -1057,9 +1097,13 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
>  		goto out;
>  	}
>  
> +	memset(&unotif, 0, sizeof(unotif));
> +
>  	unotif.id = knotif->id;
>  	unotif.pid = task_pid_vnr(knotif->task);
>  	unotif.data = *(knotif->data);
> +	if (send_pidfd)
> +		group_leader = get_task_struct(knotif->task->group_leader);
>  
>  	knotif->state = SECCOMP_NOTIFY_SENT;
>  	wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM);
> @@ -1067,9 +1111,21 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
>  out:
>  	mutex_unlock(&filter->notify_lock);
>  
> -	if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * We've successfully received a notification, let's try to copy it to
> +	 * userspace.
> +	 */
> +	if (send_pidfd) {
> +		ret = __seccomp_notify_recv_pidfd(buf, &unotif, group_leader);
> +		put_task_struct(group_leader);
> +	} else if (copy_to_user(buf, &unotif, sizeof(unotif))) {
>  		ret = -EFAULT;
> +	}

To my eye, the way this helper is used is a bit ugly -- my first
question when reading this was "why aren't we doing a copy_to_user() for
pidfds?".

Something like the following might be a bit cleaner I think:

	struct file *pidfd_file = NULL;

	if (send_pidfd) {
		// helper allocates the pidfd_file and sets unotify->fd
		ret = __seccomp_notify_recv_pidfd(&unotify, &pidfd_file)
		if (ret)
			goto err; // or whatever
	}

	if (copy_to_user(buf, &unotif, sizeof(unotif))) {
		ret = -EFAULT;
		goto err; // or whatever
	}

	if (send_pidfd)
		fd_install(unotif.fd, pidfd_file)

But to be fair, this is also somewhat ugly too.
Aleksa Sarai Jan. 26, 2020, 4:10 a.m. UTC | #4
On 2020-01-24, Sargun Dhillon <sargun@sargun.me> wrote:
> On Fri, Jan 24, 2020 at 10:03 AM Tycho Andersen <tycho@tycho.ws> wrote:
> >
> > On Fri, Jan 24, 2020 at 01:17:42AM -0800, Sargun Dhillon wrote:
> > > Currently, this just opens the group leader of the thread that triggere
> > > the event, as pidfds (currently) are limited to group leaders.
> >
> > I don't love the semantics of this; when they're not limited to thread
> > group leaders any more, we won't be able to change this. Is that work
> > far off?
> >
> > Tycho
> 
> We would be able to change this in the future if we introduced a flag like
> SECCOMP_USER_NOTIF_FLAG_PIDFD_THREAD which would send a
> pidfd that's for the thread, and not just the group leader. The flag could
> either be XOR with SECCOMP_USER_NOTIF_FLAG_PIDFD, or
> could require both. Alternatively, we can rename
> SECCOMP_USER_NOTIF_FLAG_PIDFD to
> SECCOMP_USER_NOTIF_FLAG_GROUP_LEADER_PIDFD.

Possibly unpopular proposal -- would it make sense to just store the
pidfd_open(2) flags rather than coming up with our own set for
SECCOMP_USER_NOTIF? If/when pidfds are expanded to include non-leaders
there will be a corresponding flag for pidfd_open(2). Something like:

	struct seccomp_notif {
		__u64 id;
		__u32 pid;
		__u32 flags;
		struct seccomp_data data;
		__u64 pidfd_flags; // or __u32 -- not sure what Christian plans
		__u32 pidfd;
		__u32 __padding;
	};

This does mean there'll be an additional flags field, but I think it's a
slightly more consistent way to indicate "SECCOMP_USER_NOTIF_FLAG_PIDFD
implies a pidfd_open(2) on the traced task".
Aleksa Sarai Jan. 26, 2020, 4:14 a.m. UTC | #5
On 2020-01-26, Aleksa Sarai <cyphar@cyphar.com> wrote:
> On 2020-01-24, Sargun Dhillon <sargun@sargun.me> wrote:
> >  static long seccomp_notify_recv(struct seccomp_filter *filter,
> >  				void __user *buf)
> >  {
> >  	struct seccomp_knotif *knotif = NULL, *cur;
> >  	struct seccomp_notif unotif;
> > +	struct task_struct *group_leader;
> > +	bool send_pidfd;
> >  	ssize_t ret;
> >  
> > +	if (copy_from_user(&unotif, buf, sizeof(unotif)))
> > +		return -EFAULT;
> >  	/* Verify that we're not given garbage to keep struct extensible. */
> > -	ret = check_zeroed_user(buf, sizeof(unotif));
> > -	if (ret < 0)
> > -		return ret;
> > -	if (!ret)
> > +	if (unotif.id ||
> > +	    unotif.pid ||
> > +	    memchr_inv(&unotif.data, 0, sizeof(unotif.data)) ||
> > +	    unotif.pidfd)
> > +		return -EINVAL;
> 
> IMHO this check is more confusing than the original check_zeroed_user().
> Something like the following is simpler and less prone to forgetting to
> add a new field in the future:
> 
> 	if (memchr_inv(&unotif, 0, sizeof(unotif)))
> 		return -EINVAL;

Also the check in the patch doesn't ensure that any unnamed padding is
zeroed -- memchr_inv(&unotif, 0, sizeof(unotif)) does.
Tycho Andersen Jan. 26, 2020, 5:42 a.m. UTC | #6
On Fri, Jan 24, 2020 at 12:09:37PM -0800, Sargun Dhillon wrote:
> On Fri, Jan 24, 2020 at 10:03 AM Tycho Andersen <tycho@tycho.ws> wrote:
> >
> > On Fri, Jan 24, 2020 at 01:17:42AM -0800, Sargun Dhillon wrote:
> > > Currently, this just opens the group leader of the thread that triggere
> > > the event, as pidfds (currently) are limited to group leaders.
> >
> > I don't love the semantics of this; when they're not limited to thread
> > group leaders any more, we won't be able to change this. Is that work
> > far off?
> >
> > Tycho
> 
> We would be able to change this in the future if we introduced a flag like
> SECCOMP_USER_NOTIF_FLAG_PIDFD_THREAD which would send a
> pidfd that's for the thread, and not just the group leader. The flag could
> either be XOR with SECCOMP_USER_NOTIF_FLAG_PIDFD, or
> could require both. Alternatively, we can rename
> SECCOMP_USER_NOTIF_FLAG_PIDFD to
> SECCOMP_USER_NOTIF_FLAG_GROUP_LEADER_PIDFD.

Ok, but then isn't this just another temporary API? Seems like it's
worth waiting until the Right Way exists.

Tycho
Sargun Dhillon Jan. 27, 2020, 5:06 a.m. UTC | #7
On Sun, Jan 26, 2020 at 03:14:39PM +1100, Aleksa Sarai wrote:
> On 2020-01-26, Aleksa Sarai <cyphar@cyphar.com> wrote:
> > On 2020-01-24, Sargun Dhillon <sargun@sargun.me> wrote:
> > >  static long seccomp_notify_recv(struct seccomp_filter *filter,
> > >  				void __user *buf)
> > >  {
> > >  	struct seccomp_knotif *knotif = NULL, *cur;
> > >  	struct seccomp_notif unotif;
> > > +	struct task_struct *group_leader;
> > > +	bool send_pidfd;
> > >  	ssize_t ret;
> > >  
> > > +	if (copy_from_user(&unotif, buf, sizeof(unotif)))
> > > +		return -EFAULT;
> > >  	/* Verify that we're not given garbage to keep struct extensible. */
> > > -	ret = check_zeroed_user(buf, sizeof(unotif));
> > > -	if (ret < 0)
> > > -		return ret;
> > > -	if (!ret)
> > > +	if (unotif.id ||
> > > +	    unotif.pid ||
> > > +	    memchr_inv(&unotif.data, 0, sizeof(unotif.data)) ||
> > > +	    unotif.pidfd)
> > > +		return -EINVAL;
> > 
> > IMHO this check is more confusing than the original check_zeroed_user().
> > Something like the following is simpler and less prone to forgetting to
> > add a new field in the future:
> > 
I'm all for this, originally my patch read:

__u32 flags = 0;
swap(unotif.flags, flags);
if (memchr(&unotif, 0, sizeof(unotif))
	return -EINVAL;

--- And then check flags appropriately. I'm not sure if this is "better",
as I didn't see any other implementations that look like this in the
kernel. What do you think? It could even look "simpler", as in:

__u32 flags;

if (copy_from_user(....))
	return -EFAULT;
flags = unotif.flags;
unotif.flags = 0;
if (memchr_inv(&unotif, 0, sizeof(unotif)))
	return -EINVAL;


Are either of those preferential, reasonable, or at a minimum inoffensive?
> > 	if (memchr_inv(&unotif, 0, sizeof(unotif)))
> > 		return -EINVAL;
> 
Wouldn't this fail if flags was set to any value? We either need to zero
out flags prior to checking, or split it into range checks that exclude
flags.

> Also the check in the patch doesn't ensure that any unnamed padding is
> zeroed -- memchr_inv(&unotif, 0, sizeof(unotif)) does.
> 
> -- 
> Aleksa Sarai
> Senior Software Engineer (Containers)
> SUSE Linux GmbH
> <https://www.cyphar.com/>
Christian Brauner May 15, 2020, 11:58 a.m. UTC | #8
On Fri, May 15, 2020 at 04:49:14AM -0700, Sargun Dhillon wrote:
> On Sat, Jan 25, 2020 at 9:42 PM Tycho Andersen <tycho@tycho.ws> wrote:
> 
> > On Fri, Jan 24, 2020 at 12:09:37PM -0800, Sargun Dhillon wrote:
> > > On Fri, Jan 24, 2020 at 10:03 AM Tycho Andersen <tycho@tycho.ws> wrote:
> > > >
> > > > On Fri, Jan 24, 2020 at 01:17:42AM -0800, Sargun Dhillon wrote:
> > > > > Currently, this just opens the group leader of the thread that
> > triggere
> > > > > the event, as pidfds (currently) are limited to group leaders.
> > > >
> > > > I don't love the semantics of this; when they're not limited to thread
> > > > group leaders any more, we won't be able to change this. Is that work
> > > > far off?
> > > >
> > > > Tycho
> > >
> > > We would be able to change this in the future if we introduced a flag
> > like
> > > SECCOMP_USER_NOTIF_FLAG_PIDFD_THREAD which would send a
> > > pidfd that's for the thread, and not just the group leader. The flag
> > could
> > > either be XOR with SECCOMP_USER_NOTIF_FLAG_PIDFD, or
> > > could require both. Alternatively, we can rename
> > > SECCOMP_USER_NOTIF_FLAG_PIDFD to
> > > SECCOMP_USER_NOTIF_FLAG_GROUP_LEADER_PIDFD.
> >
> > Ok, but then isn't this just another temporary API? Seems like it's
> > worth waiting until the Right Way exists.
> >
> > Tycho
> >
> 
> It's been a few months. It does not appear like much progress has been made
> moving away from
> pidfd being only useful for leaders.
> 
> I would either like to respin this patch, or at a minimum, include the
> process group leader pid number
> in the seccomp notification, to simplify things for tracers.

I'd prefer if you went with the second option where you include the
process group leader pid number.
I'm against adding countless ways of producing pidfds through various
unrelated apis. The api is still quite fresh so I'd like to not overdo
it.

Christian
diff mbox series

Patch

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index be84d87f1f46..64f6fc5c95f1 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -69,11 +69,15 @@  struct seccomp_notif_sizes {
 	__u16 seccomp_data;
 };
 
+/* Valid flags for struct seccomp_notif */
+#define SECCOMP_USER_NOTIF_FLAG_PIDFD	(1UL << 0) /* populate pidfd */
+
 struct seccomp_notif {
 	__u64 id;
 	__u32 pid;
 	__u32 flags;
 	struct seccomp_data data;
+	__u32 pidfd;
 };
 
 /*
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b6ea3dcb57bf..93f9cf45ce07 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1019,21 +1019,61 @@  static int seccomp_notify_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+
+static long __seccomp_notify_recv_pidfd(void __user *buf,
+					struct seccomp_notif *unotif,
+					struct task_struct *group_leader)
+{
+	struct file *pidfd_file;
+	struct pid *pid;
+	int fd;
+
+	pid = get_task_pid(group_leader, PIDTYPE_PID);
+	pidfd_file = pidfd_create_file(pid);
+	put_pid(pid);
+	if (IS_ERR(pidfd_file))
+		return PTR_ERR(pidfd_file);
+
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		fput(pidfd_file);
+		return fd;
+	}
+
+	unotif->pidfd = fd;
+
+	if (copy_to_user(buf, unotif, sizeof(*unotif))) {
+		put_unused_fd(fd);
+		fput(pidfd_file);
+		return -EFAULT;
+	}
+
+	fd_install(fd, pidfd_file);
+
+	return 0;
+}
+
 static long seccomp_notify_recv(struct seccomp_filter *filter,
 				void __user *buf)
 {
 	struct seccomp_knotif *knotif = NULL, *cur;
 	struct seccomp_notif unotif;
+	struct task_struct *group_leader;
+	bool send_pidfd;
 	ssize_t ret;
 
+	if (copy_from_user(&unotif, buf, sizeof(unotif)))
+		return -EFAULT;
 	/* Verify that we're not given garbage to keep struct extensible. */
-	ret = check_zeroed_user(buf, sizeof(unotif));
-	if (ret < 0)
-		return ret;
-	if (!ret)
+	if (unotif.id ||
+	    unotif.pid ||
+	    memchr_inv(&unotif.data, 0, sizeof(unotif.data)) ||
+	    unotif.pidfd)
+		return -EINVAL;
+	if (unotif.flags & ~(SECCOMP_USER_NOTIF_FLAG_PIDFD))
 		return -EINVAL;
 
-	memset(&unotif, 0, sizeof(unotif));
+	send_pidfd = unotif.flags & SECCOMP_USER_NOTIF_FLAG_PIDFD;
 
 	ret = down_interruptible(&filter->notif->request);
 	if (ret < 0)
@@ -1057,9 +1097,13 @@  static long seccomp_notify_recv(struct seccomp_filter *filter,
 		goto out;
 	}
 
+	memset(&unotif, 0, sizeof(unotif));
+
 	unotif.id = knotif->id;
 	unotif.pid = task_pid_vnr(knotif->task);
 	unotif.data = *(knotif->data);
+	if (send_pidfd)
+		group_leader = get_task_struct(knotif->task->group_leader);
 
 	knotif->state = SECCOMP_NOTIFY_SENT;
 	wake_up_poll(&filter->notif->wqh, EPOLLOUT | EPOLLWRNORM);
@@ -1067,9 +1111,21 @@  static long seccomp_notify_recv(struct seccomp_filter *filter,
 out:
 	mutex_unlock(&filter->notify_lock);
 
-	if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
+	if (ret)
+		return ret;
+
+	/*
+	 * We've successfully received a notification, let's try to copy it to
+	 * userspace.
+	 */
+	if (send_pidfd) {
+		ret = __seccomp_notify_recv_pidfd(buf, &unotif, group_leader);
+		put_task_struct(group_leader);
+	} else if (copy_to_user(buf, &unotif, sizeof(unotif))) {
 		ret = -EFAULT;
+	}
 
+	if (ret) {
 		/*
 		 * Userspace screwed up. To make sure that we keep this
 		 * notification alive, let's reset it back to INIT. It