Message ID | 20230413133355.350571-2-aleksandr.mikhalitsyn@canonical.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add SCM_PIDFD and SO_PEERPIDFD | expand |
On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote: > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, > but it contains pidfd instead of plain pid, which allows programmers not > to care about PID reuse problem. > > Idea comes from UAPI kernel group: > https://uapi-group.org/kernel-features/ > > Big thanks to Christian Brauner and Lennart Poettering for productive > discussions about this. > > Cc: "David S. Miller" <davem@davemloft.net> > Cc: Eric Dumazet <edumazet@google.com> > Cc: Jakub Kicinski <kuba@kernel.org> > Cc: Paolo Abeni <pabeni@redhat.com> > Cc: Leon Romanovsky <leon@kernel.org> > Cc: David Ahern <dsahern@kernel.org> > Cc: Arnd Bergmann <arnd@arndb.de> > Cc: Kees Cook <keescook@chromium.org> > Cc: Christian Brauner <brauner@kernel.org> > Cc: Kuniyuki Iwashima <kuniyu@amazon.com> > Cc: Lennart Poettering <mzxreary@0pointer.de> > Cc: Luca Boccassi <bluca@debian.org> > Cc: linux-kernel@vger.kernel.org > Cc: netdev@vger.kernel.org > Cc: linux-arch@vger.kernel.org > Tested-by: Luca Boccassi <bluca@debian.org> > Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> > --- > v4: > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian) > v2: > According to review comments from Kuniyuki Iwashima and Christian Brauner: > - use pidfd_create(..) retval as a result > - whitespace change > --- > arch/alpha/include/uapi/asm/socket.h | 2 ++ > arch/mips/include/uapi/asm/socket.h | 2 ++ > arch/parisc/include/uapi/asm/socket.h | 2 ++ > arch/sparc/include/uapi/asm/socket.h | 2 ++ > include/linux/net.h | 1 + > include/linux/socket.h | 1 + > include/net/scm.h | 39 +++++++++++++++++++++++-- > include/uapi/asm-generic/socket.h | 2 ++ > net/core/sock.c | 11 +++++++ > net/mptcp/sockopt.c | 1 + > net/unix/af_unix.c | 18 ++++++++---- > tools/include/uapi/asm-generic/socket.h | 2 ++ > 12 files changed, 76 insertions(+), 7 deletions(-) > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > index 739891b94136..ff310613ae64 100644 > --- a/arch/alpha/include/uapi/asm/socket.h > +++ b/arch/alpha/include/uapi/asm/socket.h > @@ -137,6 +137,8 @@ > > #define SO_RCVMARK 75 > > +#define SO_PASSPIDFD 76 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > index 18f3d95ecfec..762dcb80e4ec 100644 > --- a/arch/mips/include/uapi/asm/socket.h > +++ b/arch/mips/include/uapi/asm/socket.h > @@ -148,6 +148,8 @@ > > #define SO_RCVMARK 75 > > +#define SO_PASSPIDFD 76 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > index f486d3dfb6bb..df16a3e16d64 100644 > --- a/arch/parisc/include/uapi/asm/socket.h > +++ b/arch/parisc/include/uapi/asm/socket.h > @@ -129,6 +129,8 @@ > > #define SO_RCVMARK 0x4049 > > +#define SO_PASSPIDFD 0x404A > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > index 2fda57a3ea86..6e2847804fea 100644 > --- a/arch/sparc/include/uapi/asm/socket.h > +++ b/arch/sparc/include/uapi/asm/socket.h > @@ -130,6 +130,8 @@ > > #define SO_RCVMARK 0x0054 > > +#define SO_PASSPIDFD 0x0055 > + > #if !defined(__KERNEL__) > > > diff --git a/include/linux/net.h b/include/linux/net.h > index b73ad8e3c212..c234dfbe7a30 100644 > --- a/include/linux/net.h > +++ b/include/linux/net.h > @@ -43,6 +43,7 @@ struct net; > #define SOCK_PASSSEC 4 > #define SOCK_SUPPORT_ZC 5 > #define SOCK_CUSTOM_SOCKOPT 6 > +#define SOCK_PASSPIDFD 7 > > #ifndef ARCH_HAS_SOCKET_TYPES > /** > diff --git a/include/linux/socket.h b/include/linux/socket.h > index 13c3a237b9c9..6bf90f251910 100644 > --- a/include/linux/socket.h > +++ b/include/linux/socket.h > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg) > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ > #define SCM_SECURITY 0x03 /* rw: security label */ > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */ > > struct ucred { > __u32 pid; > diff --git a/include/net/scm.h b/include/net/scm.h > index 585adc1346bd..c67f765a165b 100644 > --- a/include/net/scm.h > +++ b/include/net/scm.h > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock) > } > #endif /* CONFIG_SECURITY_NETWORK */ > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) > +{ > + struct file *pidfd_file = NULL; > + int pidfd; > + > + /* > + * put_cmsg() doesn't return an error if CMSG is truncated, > + * that's why we need to opencode these checks here. > + */ > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { > + msg->msg_flags |= MSG_CTRUNC; > + return; Hm, curious about this: We mark the message as truncated for SCM_PIDFD but if the same conditions were to apply for SCM_PASSCRED we don't mark the message as truncated. Am I reading this correct? And is so, you please briefly explain this difference? > + } > + > + WARN_ON_ONCE(!scm->pid); > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); > + > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { If the put_cmsg() of the pidfd fails userspace needs to be able to detect this. Otherwise they can't distinguish between the SCM_PIDFD value being zero because the put_cmsg() failed or put_cmsg() succeeded and the allocated fd nr was 0. Looking at put_cmsg() it looks to me that userspace will receive a SCM_PIDFD message only if the put_cmsg() is completely successful. IIUC, then this change is fine.
On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <brauner@kernel.org> wrote: > > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote: > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, > > but it contains pidfd instead of plain pid, which allows programmers not > > to care about PID reuse problem. > > > > Idea comes from UAPI kernel group: > > https://uapi-group.org/kernel-features/ > > > > Big thanks to Christian Brauner and Lennart Poettering for productive > > discussions about this. > > > > Cc: "David S. Miller" <davem@davemloft.net> > > Cc: Eric Dumazet <edumazet@google.com> > > Cc: Jakub Kicinski <kuba@kernel.org> > > Cc: Paolo Abeni <pabeni@redhat.com> > > Cc: Leon Romanovsky <leon@kernel.org> > > Cc: David Ahern <dsahern@kernel.org> > > Cc: Arnd Bergmann <arnd@arndb.de> > > Cc: Kees Cook <keescook@chromium.org> > > Cc: Christian Brauner <brauner@kernel.org> > > Cc: Kuniyuki Iwashima <kuniyu@amazon.com> > > Cc: Lennart Poettering <mzxreary@0pointer.de> > > Cc: Luca Boccassi <bluca@debian.org> > > Cc: linux-kernel@vger.kernel.org > > Cc: netdev@vger.kernel.org > > Cc: linux-arch@vger.kernel.org > > Tested-by: Luca Boccassi <bluca@debian.org> > > Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> > > --- > > v4: > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian) > > v2: > > According to review comments from Kuniyuki Iwashima and Christian Brauner: > > - use pidfd_create(..) retval as a result > > - whitespace change > > --- > > arch/alpha/include/uapi/asm/socket.h | 2 ++ > > arch/mips/include/uapi/asm/socket.h | 2 ++ > > arch/parisc/include/uapi/asm/socket.h | 2 ++ > > arch/sparc/include/uapi/asm/socket.h | 2 ++ > > include/linux/net.h | 1 + > > include/linux/socket.h | 1 + > > include/net/scm.h | 39 +++++++++++++++++++++++-- > > include/uapi/asm-generic/socket.h | 2 ++ > > net/core/sock.c | 11 +++++++ > > net/mptcp/sockopt.c | 1 + > > net/unix/af_unix.c | 18 ++++++++---- > > tools/include/uapi/asm-generic/socket.h | 2 ++ > > 12 files changed, 76 insertions(+), 7 deletions(-) > > > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > > index 739891b94136..ff310613ae64 100644 > > --- a/arch/alpha/include/uapi/asm/socket.h > > +++ b/arch/alpha/include/uapi/asm/socket.h > > @@ -137,6 +137,8 @@ > > > > #define SO_RCVMARK 75 > > > > +#define SO_PASSPIDFD 76 > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > > index 18f3d95ecfec..762dcb80e4ec 100644 > > --- a/arch/mips/include/uapi/asm/socket.h > > +++ b/arch/mips/include/uapi/asm/socket.h > > @@ -148,6 +148,8 @@ > > > > #define SO_RCVMARK 75 > > > > +#define SO_PASSPIDFD 76 > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > > index f486d3dfb6bb..df16a3e16d64 100644 > > --- a/arch/parisc/include/uapi/asm/socket.h > > +++ b/arch/parisc/include/uapi/asm/socket.h > > @@ -129,6 +129,8 @@ > > > > #define SO_RCVMARK 0x4049 > > > > +#define SO_PASSPIDFD 0x404A > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > > index 2fda57a3ea86..6e2847804fea 100644 > > --- a/arch/sparc/include/uapi/asm/socket.h > > +++ b/arch/sparc/include/uapi/asm/socket.h > > @@ -130,6 +130,8 @@ > > > > #define SO_RCVMARK 0x0054 > > > > +#define SO_PASSPIDFD 0x0055 > > + > > #if !defined(__KERNEL__) > > > > > > diff --git a/include/linux/net.h b/include/linux/net.h > > index b73ad8e3c212..c234dfbe7a30 100644 > > --- a/include/linux/net.h > > +++ b/include/linux/net.h > > @@ -43,6 +43,7 @@ struct net; > > #define SOCK_PASSSEC 4 > > #define SOCK_SUPPORT_ZC 5 > > #define SOCK_CUSTOM_SOCKOPT 6 > > +#define SOCK_PASSPIDFD 7 > > > > #ifndef ARCH_HAS_SOCKET_TYPES > > /** > > diff --git a/include/linux/socket.h b/include/linux/socket.h > > index 13c3a237b9c9..6bf90f251910 100644 > > --- a/include/linux/socket.h > > +++ b/include/linux/socket.h > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg) > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ > > #define SCM_SECURITY 0x03 /* rw: security label */ > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */ > > > > struct ucred { > > __u32 pid; > > diff --git a/include/net/scm.h b/include/net/scm.h > > index 585adc1346bd..c67f765a165b 100644 > > --- a/include/net/scm.h > > +++ b/include/net/scm.h > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock) > > } > > #endif /* CONFIG_SECURITY_NETWORK */ > > > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) > > +{ > > + struct file *pidfd_file = NULL; > > + int pidfd; > > + > > + /* > > + * put_cmsg() doesn't return an error if CMSG is truncated, > > + * that's why we need to opencode these checks here. > > + */ > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { > > + msg->msg_flags |= MSG_CTRUNC; > > + return; > > Hm, curious about this: We mark the message as truncated for SCM_PIDFD > but if the same conditions were to apply for SCM_PASSCRED we don't mark > the message as truncated. Am I reading this correct? And is so, you > please briefly explain this difference? Hi, Christian! For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function: https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225 The reason why I'm open-coding these checks is that I want to know that the message doesn't fit into the userspace buffer before doing pidfd_prepare and other stuff and because put_cmsg is not returning an error when message doesn't fit in the userspace buffer and we won't be able to properly do pidfd cleanup (put struct pid and fd index). > > > + } > > + > > + WARN_ON_ONCE(!scm->pid); > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); > > + > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { > > If the put_cmsg() of the pidfd fails userspace needs to be able to > detect this. Otherwise they can't distinguish between the SCM_PIDFD > value being zero because the put_cmsg() failed or put_cmsg() succeeded > and the allocated fd nr was 0. If pidfd_prepare fails then userspace will receive SCM_PIDFD message with negative pidfd value. > > Looking at put_cmsg() it looks to me that userspace will receive a > SCM_PIDFD message only if the put_cmsg() is completely successful. IIUC, > then this change is fine. Kind regards, Alex
On Mon, Apr 17, 2023 at 06:01:16PM +0200, Aleksandr Mikhalitsyn wrote: > On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote: > > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, > > > but it contains pidfd instead of plain pid, which allows programmers not > > > to care about PID reuse problem. > > > > > > Idea comes from UAPI kernel group: > > > https://uapi-group.org/kernel-features/ > > > > > > Big thanks to Christian Brauner and Lennart Poettering for productive > > > discussions about this. > > > > > > Cc: "David S. Miller" <davem@davemloft.net> > > > Cc: Eric Dumazet <edumazet@google.com> > > > Cc: Jakub Kicinski <kuba@kernel.org> > > > Cc: Paolo Abeni <pabeni@redhat.com> > > > Cc: Leon Romanovsky <leon@kernel.org> > > > Cc: David Ahern <dsahern@kernel.org> > > > Cc: Arnd Bergmann <arnd@arndb.de> > > > Cc: Kees Cook <keescook@chromium.org> > > > Cc: Christian Brauner <brauner@kernel.org> > > > Cc: Kuniyuki Iwashima <kuniyu@amazon.com> > > > Cc: Lennart Poettering <mzxreary@0pointer.de> > > > Cc: Luca Boccassi <bluca@debian.org> > > > Cc: linux-kernel@vger.kernel.org > > > Cc: netdev@vger.kernel.org > > > Cc: linux-arch@vger.kernel.org > > > Tested-by: Luca Boccassi <bluca@debian.org> > > > Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> > > > --- > > > v4: > > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian) > > > v2: > > > According to review comments from Kuniyuki Iwashima and Christian Brauner: > > > - use pidfd_create(..) retval as a result > > > - whitespace change > > > --- > > > arch/alpha/include/uapi/asm/socket.h | 2 ++ > > > arch/mips/include/uapi/asm/socket.h | 2 ++ > > > arch/parisc/include/uapi/asm/socket.h | 2 ++ > > > arch/sparc/include/uapi/asm/socket.h | 2 ++ > > > include/linux/net.h | 1 + > > > include/linux/socket.h | 1 + > > > include/net/scm.h | 39 +++++++++++++++++++++++-- > > > include/uapi/asm-generic/socket.h | 2 ++ > > > net/core/sock.c | 11 +++++++ > > > net/mptcp/sockopt.c | 1 + > > > net/unix/af_unix.c | 18 ++++++++---- > > > tools/include/uapi/asm-generic/socket.h | 2 ++ > > > 12 files changed, 76 insertions(+), 7 deletions(-) > > > > > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > > > index 739891b94136..ff310613ae64 100644 > > > --- a/arch/alpha/include/uapi/asm/socket.h > > > +++ b/arch/alpha/include/uapi/asm/socket.h > > > @@ -137,6 +137,8 @@ > > > > > > #define SO_RCVMARK 75 > > > > > > +#define SO_PASSPIDFD 76 > > > + > > > #if !defined(__KERNEL__) > > > > > > #if __BITS_PER_LONG == 64 > > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > > > index 18f3d95ecfec..762dcb80e4ec 100644 > > > --- a/arch/mips/include/uapi/asm/socket.h > > > +++ b/arch/mips/include/uapi/asm/socket.h > > > @@ -148,6 +148,8 @@ > > > > > > #define SO_RCVMARK 75 > > > > > > +#define SO_PASSPIDFD 76 > > > + > > > #if !defined(__KERNEL__) > > > > > > #if __BITS_PER_LONG == 64 > > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > > > index f486d3dfb6bb..df16a3e16d64 100644 > > > --- a/arch/parisc/include/uapi/asm/socket.h > > > +++ b/arch/parisc/include/uapi/asm/socket.h > > > @@ -129,6 +129,8 @@ > > > > > > #define SO_RCVMARK 0x4049 > > > > > > +#define SO_PASSPIDFD 0x404A > > > + > > > #if !defined(__KERNEL__) > > > > > > #if __BITS_PER_LONG == 64 > > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > > > index 2fda57a3ea86..6e2847804fea 100644 > > > --- a/arch/sparc/include/uapi/asm/socket.h > > > +++ b/arch/sparc/include/uapi/asm/socket.h > > > @@ -130,6 +130,8 @@ > > > > > > #define SO_RCVMARK 0x0054 > > > > > > +#define SO_PASSPIDFD 0x0055 > > > + > > > #if !defined(__KERNEL__) > > > > > > > > > diff --git a/include/linux/net.h b/include/linux/net.h > > > index b73ad8e3c212..c234dfbe7a30 100644 > > > --- a/include/linux/net.h > > > +++ b/include/linux/net.h > > > @@ -43,6 +43,7 @@ struct net; > > > #define SOCK_PASSSEC 4 > > > #define SOCK_SUPPORT_ZC 5 > > > #define SOCK_CUSTOM_SOCKOPT 6 > > > +#define SOCK_PASSPIDFD 7 > > > > > > #ifndef ARCH_HAS_SOCKET_TYPES > > > /** > > > diff --git a/include/linux/socket.h b/include/linux/socket.h > > > index 13c3a237b9c9..6bf90f251910 100644 > > > --- a/include/linux/socket.h > > > +++ b/include/linux/socket.h > > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg) > > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ > > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ > > > #define SCM_SECURITY 0x03 /* rw: security label */ > > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */ > > > > > > struct ucred { > > > __u32 pid; > > > diff --git a/include/net/scm.h b/include/net/scm.h > > > index 585adc1346bd..c67f765a165b 100644 > > > --- a/include/net/scm.h > > > +++ b/include/net/scm.h > > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock) > > > } > > > #endif /* CONFIG_SECURITY_NETWORK */ > > > > > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) > > > +{ > > > + struct file *pidfd_file = NULL; > > > + int pidfd; > > > + > > > + /* > > > + * put_cmsg() doesn't return an error if CMSG is truncated, > > > + * that's why we need to opencode these checks here. > > > + */ > > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || > > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { > > > + msg->msg_flags |= MSG_CTRUNC; > > > + return; > > > > Hm, curious about this: We mark the message as truncated for SCM_PIDFD > > but if the same conditions were to apply for SCM_PASSCRED we don't mark > > the message as truncated. Am I reading this correct? And is so, you > > please briefly explain this difference? > > Hi, Christian! > > For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function: > https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225 > > The reason why I'm open-coding these checks is that I want to know > that the message > doesn't fit into the userspace buffer before doing pidfd_prepare and > other stuff and because > put_cmsg is not returning an error when message doesn't fit in the > userspace buffer and > we won't be able to properly do pidfd cleanup (put struct pid and fd index). > > > > > > + } > > > + > > > + WARN_ON_ONCE(!scm->pid); > > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); > > > + > > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { > > > > If the put_cmsg() of the pidfd fails userspace needs to be able to > > detect this. Otherwise they can't distinguish between the SCM_PIDFD > > value being zero because the put_cmsg() failed or put_cmsg() succeeded > > and the allocated fd nr was 0. > > If pidfd_prepare fails then userspace will receive SCM_PIDFD message > with negative pidfd value. So we discussed this a bit offline and I think there's still an issue. If put_cmsg() fails if (msg->msg_control_is_user) { struct cmsghdr __user *cm = msg->msg_control_user; check_object_size(data, cmlen - sizeof(*cm), true); if (!user_write_access_begin(cm, cmlen)) goto efault; // This succeeds so cm->cmsg_len == sizeof(int) unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); // This succeeds so cm->cmsg_level == SOL_SOCKET unsafe_put_user(level, &cm->cmsg_level, efault_end); // This succeeds so cm->cmsg_type == SCM_PIDFD unsafe_put_user(type, &cm->cmsg_type, efault_end); // This fails and leaves all bits set to 0 unsafe_copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm), efault_end); user_write_access_end(); so now we hit if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { if (pidfd_file) { put_unused_fd(pidfd); fput(pidfd_file); } return; } and return early. Afaict, userspace would now receive: if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int)) && cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_PIDFD) { memcpy(&pidfd, CMSG_DATA(cmsg), sizeof(int)); // pidfd is now 0 which is a valid fd number // it'll likely refer to /dev/stdin or whatever and so // will fail or, worst case, 0 refers to another pidfd :) pidfd_send_signal(pidfd, SIGKILL); so we need to address this. So one way I think that would solve this is: diff --git a/net/core/scm.c b/net/core/scm.c index 3cd7dd377e53..d1f4cd135c5a 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -236,9 +236,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); unsafe_put_user(level, &cm->cmsg_level, efault_end); - unsafe_put_user(type, &cm->cmsg_type, efault_end); unsafe_copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm), efault_end); + unsafe_put_user(type, &cm->cmsg_type, efault_end); user_write_access_end(); } else { struct cmsghdr *cm = msg->msg_control; such that we only copy cm->cmsg_type after we transfered the data.
On Mon, Apr 17, 2023 at 7:16 PM Christian Brauner <brauner@kernel.org> wrote: > > On Mon, Apr 17, 2023 at 06:01:16PM +0200, Aleksandr Mikhalitsyn wrote: > > On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote: > > > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, > > > > but it contains pidfd instead of plain pid, which allows programmers not > > > > to care about PID reuse problem. > > > > > > > > Idea comes from UAPI kernel group: > > > > https://uapi-group.org/kernel-features/ > > > > > > > > Big thanks to Christian Brauner and Lennart Poettering for productive > > > > discussions about this. > > > > > > > > Cc: "David S. Miller" <davem@davemloft.net> > > > > Cc: Eric Dumazet <edumazet@google.com> > > > > Cc: Jakub Kicinski <kuba@kernel.org> > > > > Cc: Paolo Abeni <pabeni@redhat.com> > > > > Cc: Leon Romanovsky <leon@kernel.org> > > > > Cc: David Ahern <dsahern@kernel.org> > > > > Cc: Arnd Bergmann <arnd@arndb.de> > > > > Cc: Kees Cook <keescook@chromium.org> > > > > Cc: Christian Brauner <brauner@kernel.org> > > > > Cc: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > Cc: Lennart Poettering <mzxreary@0pointer.de> > > > > Cc: Luca Boccassi <bluca@debian.org> > > > > Cc: linux-kernel@vger.kernel.org > > > > Cc: netdev@vger.kernel.org > > > > Cc: linux-arch@vger.kernel.org > > > > Tested-by: Luca Boccassi <bluca@debian.org> > > > > Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> > > > > --- > > > > v4: > > > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian) > > > > v2: > > > > According to review comments from Kuniyuki Iwashima and Christian Brauner: > > > > - use pidfd_create(..) retval as a result > > > > - whitespace change > > > > --- > > > > arch/alpha/include/uapi/asm/socket.h | 2 ++ > > > > arch/mips/include/uapi/asm/socket.h | 2 ++ > > > > arch/parisc/include/uapi/asm/socket.h | 2 ++ > > > > arch/sparc/include/uapi/asm/socket.h | 2 ++ > > > > include/linux/net.h | 1 + > > > > include/linux/socket.h | 1 + > > > > include/net/scm.h | 39 +++++++++++++++++++++++-- > > > > include/uapi/asm-generic/socket.h | 2 ++ > > > > net/core/sock.c | 11 +++++++ > > > > net/mptcp/sockopt.c | 1 + > > > > net/unix/af_unix.c | 18 ++++++++---- > > > > tools/include/uapi/asm-generic/socket.h | 2 ++ > > > > 12 files changed, 76 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > > > > index 739891b94136..ff310613ae64 100644 > > > > --- a/arch/alpha/include/uapi/asm/socket.h > > > > +++ b/arch/alpha/include/uapi/asm/socket.h > > > > @@ -137,6 +137,8 @@ > > > > > > > > #define SO_RCVMARK 75 > > > > > > > > +#define SO_PASSPIDFD 76 > > > > + > > > > #if !defined(__KERNEL__) > > > > > > > > #if __BITS_PER_LONG == 64 > > > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > > > > index 18f3d95ecfec..762dcb80e4ec 100644 > > > > --- a/arch/mips/include/uapi/asm/socket.h > > > > +++ b/arch/mips/include/uapi/asm/socket.h > > > > @@ -148,6 +148,8 @@ > > > > > > > > #define SO_RCVMARK 75 > > > > > > > > +#define SO_PASSPIDFD 76 > > > > + > > > > #if !defined(__KERNEL__) > > > > > > > > #if __BITS_PER_LONG == 64 > > > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > > > > index f486d3dfb6bb..df16a3e16d64 100644 > > > > --- a/arch/parisc/include/uapi/asm/socket.h > > > > +++ b/arch/parisc/include/uapi/asm/socket.h > > > > @@ -129,6 +129,8 @@ > > > > > > > > #define SO_RCVMARK 0x4049 > > > > > > > > +#define SO_PASSPIDFD 0x404A > > > > + > > > > #if !defined(__KERNEL__) > > > > > > > > #if __BITS_PER_LONG == 64 > > > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > > > > index 2fda57a3ea86..6e2847804fea 100644 > > > > --- a/arch/sparc/include/uapi/asm/socket.h > > > > +++ b/arch/sparc/include/uapi/asm/socket.h > > > > @@ -130,6 +130,8 @@ > > > > > > > > #define SO_RCVMARK 0x0054 > > > > > > > > +#define SO_PASSPIDFD 0x0055 > > > > + > > > > #if !defined(__KERNEL__) > > > > > > > > > > > > diff --git a/include/linux/net.h b/include/linux/net.h > > > > index b73ad8e3c212..c234dfbe7a30 100644 > > > > --- a/include/linux/net.h > > > > +++ b/include/linux/net.h > > > > @@ -43,6 +43,7 @@ struct net; > > > > #define SOCK_PASSSEC 4 > > > > #define SOCK_SUPPORT_ZC 5 > > > > #define SOCK_CUSTOM_SOCKOPT 6 > > > > +#define SOCK_PASSPIDFD 7 > > > > > > > > #ifndef ARCH_HAS_SOCKET_TYPES > > > > /** > > > > diff --git a/include/linux/socket.h b/include/linux/socket.h > > > > index 13c3a237b9c9..6bf90f251910 100644 > > > > --- a/include/linux/socket.h > > > > +++ b/include/linux/socket.h > > > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg) > > > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ > > > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ > > > > #define SCM_SECURITY 0x03 /* rw: security label */ > > > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */ > > > > > > > > struct ucred { > > > > __u32 pid; > > > > diff --git a/include/net/scm.h b/include/net/scm.h > > > > index 585adc1346bd..c67f765a165b 100644 > > > > --- a/include/net/scm.h > > > > +++ b/include/net/scm.h > > > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock) > > > > } > > > > #endif /* CONFIG_SECURITY_NETWORK */ > > > > > > > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) > > > > +{ > > > > + struct file *pidfd_file = NULL; > > > > + int pidfd; > > > > + > > > > + /* > > > > + * put_cmsg() doesn't return an error if CMSG is truncated, > > > > + * that's why we need to opencode these checks here. > > > > + */ > > > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || > > > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { > > > > + msg->msg_flags |= MSG_CTRUNC; > > > > + return; > > > > > > Hm, curious about this: We mark the message as truncated for SCM_PIDFD > > > but if the same conditions were to apply for SCM_PASSCRED we don't mark > > > the message as truncated. Am I reading this correct? And is so, you > > > please briefly explain this difference? > > > > Hi, Christian! > > > > For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function: > > https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225 > > > > The reason why I'm open-coding these checks is that I want to know > > that the message > > doesn't fit into the userspace buffer before doing pidfd_prepare and > > other stuff and because > > put_cmsg is not returning an error when message doesn't fit in the > > userspace buffer and > > we won't be able to properly do pidfd cleanup (put struct pid and fd index). > > > > > > > > > + } > > > > + > > > > + WARN_ON_ONCE(!scm->pid); > > > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); > > > > + > > > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { > > > > > > If the put_cmsg() of the pidfd fails userspace needs to be able to > > > detect this. Otherwise they can't distinguish between the SCM_PIDFD > > > value being zero because the put_cmsg() failed or put_cmsg() succeeded > > > and the allocated fd nr was 0. > > > > If pidfd_prepare fails then userspace will receive SCM_PIDFD message > > with negative pidfd value. > > So we discussed this a bit offline and I think there's still an issue. > If put_cmsg() fails > > if (msg->msg_control_is_user) { > struct cmsghdr __user *cm = msg->msg_control_user; > > check_object_size(data, cmlen - sizeof(*cm), true); > > if (!user_write_access_begin(cm, cmlen)) > goto efault; > > // This succeeds so cm->cmsg_len == sizeof(int) > unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); > > // This succeeds so cm->cmsg_level == SOL_SOCKET > unsafe_put_user(level, &cm->cmsg_level, efault_end); > > // This succeeds so cm->cmsg_type == SCM_PIDFD > unsafe_put_user(type, &cm->cmsg_type, efault_end); > > // This fails and leaves all bits set to 0 > unsafe_copy_to_user(CMSG_USER_DATA(cm), data, > cmlen - sizeof(*cm), efault_end); > user_write_access_end(); > > so now we hit > > if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { > if (pidfd_file) { > put_unused_fd(pidfd); > fput(pidfd_file); > } > > return; > } > > and return early. Afaict, userspace would now receive: > > if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int)) && > cmsg->cmsg_level == SOL_SOCKET && > cmsg->cmsg_type == SCM_PIDFD) { > memcpy(&pidfd, CMSG_DATA(cmsg), sizeof(int)); > > // pidfd is now 0 which is a valid fd number > // it'll likely refer to /dev/stdin or whatever and so > // will fail or, worst case, 0 refers to another pidfd :) > pidfd_send_signal(pidfd, SIGKILL); > > so we need to address this. So one way I think that would solve this is: > > diff --git a/net/core/scm.c b/net/core/scm.c > index 3cd7dd377e53..d1f4cd135c5a 100644 > --- a/net/core/scm.c > +++ b/net/core/scm.c > @@ -236,9 +236,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) > > unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); > unsafe_put_user(level, &cm->cmsg_level, efault_end); > - unsafe_put_user(type, &cm->cmsg_type, efault_end); > unsafe_copy_to_user(CMSG_USER_DATA(cm), data, > cmlen - sizeof(*cm), efault_end); > + unsafe_put_user(type, &cm->cmsg_type, efault_end); > user_write_access_end(); > } else { > struct cmsghdr *cm = msg->msg_control; > > such that we only copy cm->cmsg_type after we transfered the data. This looks wrong to me. if put_cmsg() returns -EFAULT, then msg->msg_control and msg->msg_controllen were not changed. So the user application should not attempt to read this part of the control buffer, this could contain garbage.
On Mon, Apr 17, 2023 at 07:43:19PM +0200, Eric Dumazet wrote: > On Mon, Apr 17, 2023 at 7:16 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Mon, Apr 17, 2023 at 06:01:16PM +0200, Aleksandr Mikhalitsyn wrote: > > > On Mon, Apr 17, 2023 at 5:18 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > > > On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote: > > > > > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, > > > > > but it contains pidfd instead of plain pid, which allows programmers not > > > > > to care about PID reuse problem. > > > > > > > > > > Idea comes from UAPI kernel group: > > > > > https://uapi-group.org/kernel-features/ > > > > > > > > > > Big thanks to Christian Brauner and Lennart Poettering for productive > > > > > discussions about this. > > > > > > > > > > Cc: "David S. Miller" <davem@davemloft.net> > > > > > Cc: Eric Dumazet <edumazet@google.com> > > > > > Cc: Jakub Kicinski <kuba@kernel.org> > > > > > Cc: Paolo Abeni <pabeni@redhat.com> > > > > > Cc: Leon Romanovsky <leon@kernel.org> > > > > > Cc: David Ahern <dsahern@kernel.org> > > > > > Cc: Arnd Bergmann <arnd@arndb.de> > > > > > Cc: Kees Cook <keescook@chromium.org> > > > > > Cc: Christian Brauner <brauner@kernel.org> > > > > > Cc: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > > Cc: Lennart Poettering <mzxreary@0pointer.de> > > > > > Cc: Luca Boccassi <bluca@debian.org> > > > > > Cc: linux-kernel@vger.kernel.org > > > > > Cc: netdev@vger.kernel.org > > > > > Cc: linux-arch@vger.kernel.org > > > > > Tested-by: Luca Boccassi <bluca@debian.org> > > > > > Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> > > > > > --- > > > > > v4: > > > > > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian) > > > > > v2: > > > > > According to review comments from Kuniyuki Iwashima and Christian Brauner: > > > > > - use pidfd_create(..) retval as a result > > > > > - whitespace change > > > > > --- > > > > > arch/alpha/include/uapi/asm/socket.h | 2 ++ > > > > > arch/mips/include/uapi/asm/socket.h | 2 ++ > > > > > arch/parisc/include/uapi/asm/socket.h | 2 ++ > > > > > arch/sparc/include/uapi/asm/socket.h | 2 ++ > > > > > include/linux/net.h | 1 + > > > > > include/linux/socket.h | 1 + > > > > > include/net/scm.h | 39 +++++++++++++++++++++++-- > > > > > include/uapi/asm-generic/socket.h | 2 ++ > > > > > net/core/sock.c | 11 +++++++ > > > > > net/mptcp/sockopt.c | 1 + > > > > > net/unix/af_unix.c | 18 ++++++++---- > > > > > tools/include/uapi/asm-generic/socket.h | 2 ++ > > > > > 12 files changed, 76 insertions(+), 7 deletions(-) > > > > > > > > > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > > > > > index 739891b94136..ff310613ae64 100644 > > > > > --- a/arch/alpha/include/uapi/asm/socket.h > > > > > +++ b/arch/alpha/include/uapi/asm/socket.h > > > > > @@ -137,6 +137,8 @@ > > > > > > > > > > #define SO_RCVMARK 75 > > > > > > > > > > +#define SO_PASSPIDFD 76 > > > > > + > > > > > #if !defined(__KERNEL__) > > > > > > > > > > #if __BITS_PER_LONG == 64 > > > > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > > > > > index 18f3d95ecfec..762dcb80e4ec 100644 > > > > > --- a/arch/mips/include/uapi/asm/socket.h > > > > > +++ b/arch/mips/include/uapi/asm/socket.h > > > > > @@ -148,6 +148,8 @@ > > > > > > > > > > #define SO_RCVMARK 75 > > > > > > > > > > +#define SO_PASSPIDFD 76 > > > > > + > > > > > #if !defined(__KERNEL__) > > > > > > > > > > #if __BITS_PER_LONG == 64 > > > > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > > > > > index f486d3dfb6bb..df16a3e16d64 100644 > > > > > --- a/arch/parisc/include/uapi/asm/socket.h > > > > > +++ b/arch/parisc/include/uapi/asm/socket.h > > > > > @@ -129,6 +129,8 @@ > > > > > > > > > > #define SO_RCVMARK 0x4049 > > > > > > > > > > +#define SO_PASSPIDFD 0x404A > > > > > + > > > > > #if !defined(__KERNEL__) > > > > > > > > > > #if __BITS_PER_LONG == 64 > > > > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > > > > > index 2fda57a3ea86..6e2847804fea 100644 > > > > > --- a/arch/sparc/include/uapi/asm/socket.h > > > > > +++ b/arch/sparc/include/uapi/asm/socket.h > > > > > @@ -130,6 +130,8 @@ > > > > > > > > > > #define SO_RCVMARK 0x0054 > > > > > > > > > > +#define SO_PASSPIDFD 0x0055 > > > > > + > > > > > #if !defined(__KERNEL__) > > > > > > > > > > > > > > > diff --git a/include/linux/net.h b/include/linux/net.h > > > > > index b73ad8e3c212..c234dfbe7a30 100644 > > > > > --- a/include/linux/net.h > > > > > +++ b/include/linux/net.h > > > > > @@ -43,6 +43,7 @@ struct net; > > > > > #define SOCK_PASSSEC 4 > > > > > #define SOCK_SUPPORT_ZC 5 > > > > > #define SOCK_CUSTOM_SOCKOPT 6 > > > > > +#define SOCK_PASSPIDFD 7 > > > > > > > > > > #ifndef ARCH_HAS_SOCKET_TYPES > > > > > /** > > > > > diff --git a/include/linux/socket.h b/include/linux/socket.h > > > > > index 13c3a237b9c9..6bf90f251910 100644 > > > > > --- a/include/linux/socket.h > > > > > +++ b/include/linux/socket.h > > > > > @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg) > > > > > #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ > > > > > #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ > > > > > #define SCM_SECURITY 0x03 /* rw: security label */ > > > > > +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */ > > > > > > > > > > struct ucred { > > > > > __u32 pid; > > > > > diff --git a/include/net/scm.h b/include/net/scm.h > > > > > index 585adc1346bd..c67f765a165b 100644 > > > > > --- a/include/net/scm.h > > > > > +++ b/include/net/scm.h > > > > > @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock) > > > > > } > > > > > #endif /* CONFIG_SECURITY_NETWORK */ > > > > > > > > > > +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) > > > > > +{ > > > > > + struct file *pidfd_file = NULL; > > > > > + int pidfd; > > > > > + > > > > > + /* > > > > > + * put_cmsg() doesn't return an error if CMSG is truncated, > > > > > + * that's why we need to opencode these checks here. > > > > > + */ > > > > > + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || > > > > > + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { > > > > > + msg->msg_flags |= MSG_CTRUNC; > > > > > + return; > > > > > > > > Hm, curious about this: We mark the message as truncated for SCM_PIDFD > > > > but if the same conditions were to apply for SCM_PASSCRED we don't mark > > > > the message as truncated. Am I reading this correct? And is so, you > > > > please briefly explain this difference? > > > > > > Hi, Christian! > > > > > > For SCM_CREDENTIALS we mark it too. Inside the put_cmsg function: > > > https://github.com/torvalds/linux/blob/6a8f57ae2eb07ab39a6f0ccad60c760743051026/net/core/scm.c#L225 > > > > > > The reason why I'm open-coding these checks is that I want to know > > > that the message > > > doesn't fit into the userspace buffer before doing pidfd_prepare and > > > other stuff and because > > > put_cmsg is not returning an error when message doesn't fit in the > > > userspace buffer and > > > we won't be able to properly do pidfd cleanup (put struct pid and fd index). > > > > > > > > > > > > + } > > > > > + > > > > > + WARN_ON_ONCE(!scm->pid); > > > > > + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); > > > > > + > > > > > + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { > > > > > > > > If the put_cmsg() of the pidfd fails userspace needs to be able to > > > > detect this. Otherwise they can't distinguish between the SCM_PIDFD > > > > value being zero because the put_cmsg() failed or put_cmsg() succeeded > > > > and the allocated fd nr was 0. > > > > > > If pidfd_prepare fails then userspace will receive SCM_PIDFD message > > > with negative pidfd value. > > > > So we discussed this a bit offline and I think there's still an issue. > > If put_cmsg() fails > > > > if (msg->msg_control_is_user) { > > struct cmsghdr __user *cm = msg->msg_control_user; > > > > check_object_size(data, cmlen - sizeof(*cm), true); > > > > if (!user_write_access_begin(cm, cmlen)) > > goto efault; > > > > // This succeeds so cm->cmsg_len == sizeof(int) > > unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); > > > > // This succeeds so cm->cmsg_level == SOL_SOCKET > > unsafe_put_user(level, &cm->cmsg_level, efault_end); > > > > // This succeeds so cm->cmsg_type == SCM_PIDFD > > unsafe_put_user(type, &cm->cmsg_type, efault_end); > > > > // This fails and leaves all bits set to 0 > > unsafe_copy_to_user(CMSG_USER_DATA(cm), data, > > cmlen - sizeof(*cm), efault_end); > > user_write_access_end(); > > > > so now we hit > > > > if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { > > if (pidfd_file) { > > put_unused_fd(pidfd); > > fput(pidfd_file); > > } > > > > return; > > } > > > > and return early. Afaict, userspace would now receive: > > > > if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(int)) && > > cmsg->cmsg_level == SOL_SOCKET && > > cmsg->cmsg_type == SCM_PIDFD) { > > memcpy(&pidfd, CMSG_DATA(cmsg), sizeof(int)); > > > > // pidfd is now 0 which is a valid fd number > > // it'll likely refer to /dev/stdin or whatever and so > > // will fail or, worst case, 0 refers to another pidfd :) > > pidfd_send_signal(pidfd, SIGKILL); > > > > so we need to address this. So one way I think that would solve this is: > > > > diff --git a/net/core/scm.c b/net/core/scm.c > > index 3cd7dd377e53..d1f4cd135c5a 100644 > > --- a/net/core/scm.c > > +++ b/net/core/scm.c > > @@ -236,9 +236,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) > > > > unsafe_put_user(cmlen, &cm->cmsg_len, efault_end); > > unsafe_put_user(level, &cm->cmsg_level, efault_end); > > - unsafe_put_user(type, &cm->cmsg_type, efault_end); > > unsafe_copy_to_user(CMSG_USER_DATA(cm), data, > > cmlen - sizeof(*cm), efault_end); > > + unsafe_put_user(type, &cm->cmsg_type, efault_end); > > user_write_access_end(); > > } else { > > struct cmsghdr *cm = msg->msg_control; > > > > such that we only copy cm->cmsg_type after we transfered the data. > > This looks wrong to me. > > if put_cmsg() returns -EFAULT, then msg->msg_control and > msg->msg_controllen were not changed. > > So the user application should not attempt to read this part of the > control buffer, this could contain garbage. Thanks for the review, Eric. That's reassuring. I've done a bit of container related networking before but I'm fumbling my way through the reviews here. So any additional reviews here would be very helpful.
On Thu, Apr 13, 2023 at 03:33:52PM +0200, Alexander Mikhalitsyn wrote: > Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS, > but it contains pidfd instead of plain pid, which allows programmers not > to care about PID reuse problem. > > Idea comes from UAPI kernel group: > https://uapi-group.org/kernel-features/ > > Big thanks to Christian Brauner and Lennart Poettering for productive > discussions about this. > > Cc: "David S. Miller" <davem@davemloft.net> > Cc: Eric Dumazet <edumazet@google.com> > Cc: Jakub Kicinski <kuba@kernel.org> > Cc: Paolo Abeni <pabeni@redhat.com> > Cc: Leon Romanovsky <leon@kernel.org> > Cc: David Ahern <dsahern@kernel.org> > Cc: Arnd Bergmann <arnd@arndb.de> > Cc: Kees Cook <keescook@chromium.org> > Cc: Christian Brauner <brauner@kernel.org> > Cc: Kuniyuki Iwashima <kuniyu@amazon.com> > Cc: Lennart Poettering <mzxreary@0pointer.de> > Cc: Luca Boccassi <bluca@debian.org> > Cc: linux-kernel@vger.kernel.org > Cc: netdev@vger.kernel.org > Cc: linux-arch@vger.kernel.org > Tested-by: Luca Boccassi <bluca@debian.org> > Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> > --- > v4: > - fixed silent fd_install if writting of CMSG to the userspace fails (pointed by Christian) I don't have a lot more to add to this, Reviewed-by: Christian Brauner <brauner@kernel.org>
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 739891b94136..ff310613ae64 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -137,6 +137,8 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 18f3d95ecfec..762dcb80e4ec 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -148,6 +148,8 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index f486d3dfb6bb..df16a3e16d64 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -129,6 +129,8 @@ #define SO_RCVMARK 0x4049 +#define SO_PASSPIDFD 0x404A + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 2fda57a3ea86..6e2847804fea 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -130,6 +130,8 @@ #define SO_RCVMARK 0x0054 +#define SO_PASSPIDFD 0x0055 + #if !defined(__KERNEL__) diff --git a/include/linux/net.h b/include/linux/net.h index b73ad8e3c212..c234dfbe7a30 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -43,6 +43,7 @@ struct net; #define SOCK_PASSSEC 4 #define SOCK_SUPPORT_ZC 5 #define SOCK_CUSTOM_SOCKOPT 6 +#define SOCK_PASSPIDFD 7 #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/include/linux/socket.h b/include/linux/socket.h index 13c3a237b9c9..6bf90f251910 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg) #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ #define SCM_SECURITY 0x03 /* rw: security label */ +#define SCM_PIDFD 0x04 /* ro: pidfd (int) */ struct ucred { __u32 pid; diff --git a/include/net/scm.h b/include/net/scm.h index 585adc1346bd..c67f765a165b 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock) } #endif /* CONFIG_SECURITY_NETWORK */ +static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) +{ + struct file *pidfd_file = NULL; + int pidfd; + + /* + * put_cmsg() doesn't return an error if CMSG is truncated, + * that's why we need to opencode these checks here. + */ + if ((msg->msg_controllen <= sizeof(struct cmsghdr)) || + (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) { + msg->msg_flags |= MSG_CTRUNC; + return; + } + + WARN_ON_ONCE(!scm->pid); + pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file); + + if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) { + if (pidfd_file) { + put_unused_fd(pidfd); + fput(pidfd_file); + } + + return; + } + + if (pidfd_file) + fd_install(pidfd, pidfd_file); +} + static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!msg->msg_control) { - if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp || - scm_has_secdata(sock)) + if (test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags) || + scm->fp || scm_has_secdata(sock)) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return; @@ -141,6 +173,9 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds); } + if (test_bit(SOCK_PASSPIDFD, &sock->flags)) + scm_pidfd_recv(msg, scm); + scm_destroy_cred(scm); scm_passec(sock, msg, scm); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 638230899e98..b76169fdb80b 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -132,6 +132,8 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index c25888795390..3f974246ba3e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1246,6 +1246,13 @@ int sk_setsockopt(struct sock *sk, int level, int optname, clear_bit(SOCK_PASSCRED, &sock->flags); break; + case SO_PASSPIDFD: + if (valbool) + set_bit(SOCK_PASSPIDFD, &sock->flags); + else + clear_bit(SOCK_PASSPIDFD, &sock->flags); + break; + case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: @@ -1737,6 +1744,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); break; + case SO_PASSPIDFD: + v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags); + break; + case SO_PEERCRED: { struct ucred peercred; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index b655cebda0f3..67be0558862f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -355,6 +355,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BROADCAST: case SO_BSDCOMPAT: case SO_PASSCRED: + case SO_PASSPIDFD: case SO_PASSSEC: case SO_RXQ_OVFL: case SO_WIFI_STATUS: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fb31e8a4409e..6d5dff4dfe83 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1361,7 +1361,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - if (test_bit(SOCK_PASSCRED, &sock->flags) && + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { err = unix_autobind(sk); if (err) @@ -1469,7 +1470,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); if (err) goto out; @@ -1670,6 +1672,8 @@ static void unix_sock_inherit_flags(const struct socket *old, { if (test_bit(SOCK_PASSCRED, &old->flags)) set_bit(SOCK_PASSCRED, &new->flags); + if (test_bit(SOCK_PASSPIDFD, &old->flags)) + set_bit(SOCK_PASSPIDFD, &new->flags); if (test_bit(SOCK_PASSSEC, &old->flags)) set_bit(SOCK_PASSSEC, &new->flags); } @@ -1819,8 +1823,10 @@ static bool unix_passcred_enabled(const struct socket *sock, const struct sock *other) { return test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags) || !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags); + test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || + test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); } /* @@ -1922,7 +1928,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); if (err) goto out; @@ -2824,7 +2831,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, /* Never glue messages from different writers */ if (!unix_skb_scm_eq(skb, &scm)) break; - } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { + } else if (test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 8756df13be50..fbbc4bf53ee3 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -121,6 +121,8 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))