Message ID | 87k12dakfx.fsf_-_@x220.int.ebiederm.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 | expand |
Le 17/04/2020 à 23:09, Eric W. Biederman a écrit : > > To remove the use of set_fs in the coredump code there needs to be a > way to convert a kernel siginfo to a userspace compat siginfo. > > Call that function copy_siginfo_to_compat and factor it out of > copy_siginfo_to_user32. I find it a pitty to do that. The existing function could have been easily converted to using user_access_begin() + user_access_end() and use unsafe_put_user() to copy to userspace to avoid copying through a temporary structure on the stack. With your change, it becomes impossible to do that. Is that really an issue to use that set_fs() in the coredump code ? Christophe > > The existence of x32 complicates this code. On x32 SIGCHLD uses 64bit > times for utime and stime. As only SIGCHLD is affected and SIGCHLD > never causes a coredump I have avoided handling that case. > > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > --- > include/linux/compat.h | 1 + > kernel/signal.c | 108 +++++++++++++++++++++++------------------ > 2 files changed, 63 insertions(+), 46 deletions(-) > > diff --git a/include/linux/compat.h b/include/linux/compat.h > index 0480ba4db592..4962b254e550 100644 > --- a/include/linux/compat.h > +++ b/include/linux/compat.h > @@ -402,6 +402,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, > unsigned long bitmap_size); > long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, > unsigned long bitmap_size); > +void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from); > int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from); > int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); > int get_compat_sigevent(struct sigevent *event, > diff --git a/kernel/signal.c b/kernel/signal.c > index e58a6c619824..578f196898cb 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -3235,90 +3235,106 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from) > } > > #ifdef CONFIG_COMPAT > -int copy_siginfo_to_user32(struct compat_siginfo __user *to, > - const struct kernel_siginfo *from) > -#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) > +void copy_siginfo_to_external32(struct compat_siginfo *to, > + const struct kernel_siginfo *from) > { > - return __copy_siginfo_to_user32(to, from, in_x32_syscall()); > -} > -int __copy_siginfo_to_user32(struct compat_siginfo __user *to, > - const struct kernel_siginfo *from, bool x32_ABI) > -#endif > -{ > - struct compat_siginfo new; > - memset(&new, 0, sizeof(new)); > + /* > + * This function does not work properly for SIGCHLD on x32, > + * but it does not need to as SIGCHLD never causes a coredump. > + */ > + memset(to, 0, sizeof(*to)); > > - new.si_signo = from->si_signo; > - new.si_errno = from->si_errno; > - new.si_code = from->si_code; > + to->si_signo = from->si_signo; > + to->si_errno = from->si_errno; > + to->si_code = from->si_code; > switch(siginfo_layout(from->si_signo, from->si_code)) { > case SIL_KILL: > - new.si_pid = from->si_pid; > - new.si_uid = from->si_uid; > + to->si_pid = from->si_pid; > + to->si_uid = from->si_uid; > break; > case SIL_TIMER: > - new.si_tid = from->si_tid; > - new.si_overrun = from->si_overrun; > - new.si_int = from->si_int; > + to->si_tid = from->si_tid; > + to->si_overrun = from->si_overrun; > + to->si_int = from->si_int; > break; > case SIL_POLL: > - new.si_band = from->si_band; > - new.si_fd = from->si_fd; > + to->si_band = from->si_band; > + to->si_fd = from->si_fd; > break; > case SIL_FAULT: > - new.si_addr = ptr_to_compat(from->si_addr); > + to->si_addr = ptr_to_compat(from->si_addr); > #ifdef __ARCH_SI_TRAPNO > - new.si_trapno = from->si_trapno; > + to->si_trapno = from->si_trapno; > #endif > break; > case SIL_FAULT_MCEERR: > - new.si_addr = ptr_to_compat(from->si_addr); > + to->si_addr = ptr_to_compat(from->si_addr); > #ifdef __ARCH_SI_TRAPNO > - new.si_trapno = from->si_trapno; > + to->si_trapno = from->si_trapno; > #endif > - new.si_addr_lsb = from->si_addr_lsb; > + to->si_addr_lsb = from->si_addr_lsb; > break; > case SIL_FAULT_BNDERR: > - new.si_addr = ptr_to_compat(from->si_addr); > + to->si_addr = ptr_to_compat(from->si_addr); > #ifdef __ARCH_SI_TRAPNO > - new.si_trapno = from->si_trapno; > + to->si_trapno = from->si_trapno; > #endif > - new.si_lower = ptr_to_compat(from->si_lower); > - new.si_upper = ptr_to_compat(from->si_upper); > + to->si_lower = ptr_to_compat(from->si_lower); > + to->si_upper = ptr_to_compat(from->si_upper); > break; > case SIL_FAULT_PKUERR: > - new.si_addr = ptr_to_compat(from->si_addr); > + to->si_addr = ptr_to_compat(from->si_addr); > #ifdef __ARCH_SI_TRAPNO > - new.si_trapno = from->si_trapno; > + to->si_trapno = from->si_trapno; > #endif > - new.si_pkey = from->si_pkey; > + to->si_pkey = from->si_pkey; > break; > case SIL_CHLD: > - new.si_pid = from->si_pid; > - new.si_uid = from->si_uid; > - new.si_status = from->si_status; > + to->si_pid = from->si_pid; > + to->si_uid = from->si_uid; > + to->si_status = from->si_status; > + to->si_utime = from->si_utime; > + to->si_stime = from->si_stime; > #ifdef CONFIG_X86_X32_ABI > if (x32_ABI) { > - new._sifields._sigchld_x32._utime = from->si_utime; > - new._sifields._sigchld_x32._stime = from->si_stime; > + to->_sifields._sigchld_x32._utime = from->si_utime; > + to->_sifields._sigchld_x32._stime = from->si_stime; > } else > #endif > { > - new.si_utime = from->si_utime; > - new.si_stime = from->si_stime; > } > break; > case SIL_RT: > - new.si_pid = from->si_pid; > - new.si_uid = from->si_uid; > - new.si_int = from->si_int; > + to->si_pid = from->si_pid; > + to->si_uid = from->si_uid; > + to->si_int = from->si_int; > break; > case SIL_SYS: > - new.si_call_addr = ptr_to_compat(from->si_call_addr); > - new.si_syscall = from->si_syscall; > - new.si_arch = from->si_arch; > + to->si_call_addr = ptr_to_compat(from->si_call_addr); > + to->si_syscall = from->si_syscall; > + to->si_arch = from->si_arch; > break; > } > +} > + > +int copy_siginfo_to_user32(struct compat_siginfo __user *to, > + const struct kernel_siginfo *from) > +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) > +{ > + return __copy_siginfo_to_user32(to, from, in_x32_syscall()); > +} > +int __copy_siginfo_to_user32(struct compat_siginfo __user *to, > + const struct kernel_siginfo *from, bool x32_ABI) > +#endif > +{ > + struct compat_siginfo new; > + copy_siginfo_to_external32(&new, from); > +#ifdef CONFIG_X86_X32_ABI > + if (x32_ABI && from->si_signo == SIGCHLD) { > + new._sifields._sigchld_x32._utime = from->si_utime; > + new._sifields._sigchld_x32._stime = from->si_stime; > + } > +#endif > > if (copy_to_user(to, &new, sizeof(struct compat_siginfo))) > return -EFAULT; >
Christophe Leroy <christophe.leroy@c-s.fr> writes: > Le 17/04/2020 à 23:09, Eric W. Biederman a écrit : >> >> To remove the use of set_fs in the coredump code there needs to be a >> way to convert a kernel siginfo to a userspace compat siginfo. >> >> Call that function copy_siginfo_to_compat and factor it out of >> copy_siginfo_to_user32. > > I find it a pitty to do that. > > The existing function could have been easily converted to using > user_access_begin() + user_access_end() and use unsafe_put_user() to copy to > userspace to avoid copying through a temporary structure on the stack. > > With your change, it becomes impossible to do that. I don't follow. You don't like temporary structures in the coredump code or temporary structures in copy_siginfo_to_user32? A temporary structure in copy_siginfo_to_user is pretty much required so that it can be zeroed to guarantee we don't pass a structure with holes to userspace. The implementation of copy_siginfo_to_user32 used to use the equivalent of user_access_begin() and user_access_end() and the code was a mess that was very difficult to reason about. I recall their being holes in the structure that were being copied to userspace. Meanwhile if you are going to set all of the bytes a cache hot temporary structure is quite cheap. > Is that really an issue to use that set_fs() in the coredump code ? Using set_fs() is pretty bad and something that we would like to remove from the kernel entirely. The fewer instances of set_fs() we have the better. I forget all of the details but set_fs() is both a type violation and an attack point when people are attacking the kernel. The existence of set_fs() requires somethings that should be constants to be variables. Something about that means that our current code is difficult to protect from spectre style vulnerabilities. There was a very good thread about it all in I think 2018 but unfortunately I can't find it now. Eric
On Sat, Apr 18, 2020 at 10:05:19AM +0200, Christophe Leroy wrote: > > > Le 17/04/2020 à 23:09, Eric W. Biederman a écrit : >> >> To remove the use of set_fs in the coredump code there needs to be a >> way to convert a kernel siginfo to a userspace compat siginfo. >> >> Call that function copy_siginfo_to_compat and factor it out of >> copy_siginfo_to_user32. > > I find it a pitty to do that. > > The existing function could have been easily converted to using > user_access_begin() + user_access_end() and use unsafe_put_user() to copy > to userspace to avoid copying through a temporary structure on the stack. > > With your change, it becomes impossible to do that. As Eric said we need a struct to clear all padding. Note that I though about converting to unsafe_copy_to_user in my variant as we can pretty easily do that if pre-filling the structure earlier. But I didn't want to throw in such unrelated changes for now - I'll volunteer to do it later, though.
On Sat, Apr 18, 2020 at 06:55:56AM -0500, Eric W. Biederman wrote: > > Is that really an issue to use that set_fs() in the coredump code ? > > Using set_fs() is pretty bad and something that we would like to remove > from the kernel entirely. The fewer instances of set_fs() we have the > better. > > I forget all of the details but set_fs() is both a type violation and an > attack point when people are attacking the kernel. The existence of > set_fs() requires somethings that should be constants to be variables. > Something about that means that our current code is difficult to protect > from spectre style vulnerabilities. Yes, set_fs requires variable based address checking in the uaccess routines for architectures with a shared address space, or even entirely different code for architectures with separate kernel and user address spaces. My plan is to hopefully kill set_fs in its current form a few merge windows down the road. We'll probably still need some form of it to e.g. mark a thread as kernel thread vs also being able to execute user code, but it will be much ore limited than before, called from very few places and actually be a no-op for many architectures.
Le 19/04/2020 à 10:13, Christoph Hellwig a écrit : > On Sat, Apr 18, 2020 at 06:55:56AM -0500, Eric W. Biederman wrote: >>> Is that really an issue to use that set_fs() in the coredump code ? >> >> Using set_fs() is pretty bad and something that we would like to remove >> from the kernel entirely. The fewer instances of set_fs() we have the >> better. >> >> I forget all of the details but set_fs() is both a type violation and an >> attack point when people are attacking the kernel. The existence of >> set_fs() requires somethings that should be constants to be variables. >> Something about that means that our current code is difficult to protect >> from spectre style vulnerabilities. > > Yes, set_fs requires variable based address checking in the uaccess > routines for architectures with a shared address space, or even entirely > different code for architectures with separate kernel and user address > spaces. My plan is to hopefully kill set_fs in its current form a few > merge windows down the road. We'll probably still need some form of > it to e.g. mark a thread as kernel thread vs also being able to execute > user code, but it will be much ore limited than before, called from very > few places and actually be a no-op for many architectures. > Oh nice. Some time ago I proposed a patch to change set_fs() to a flip/flop flag based logic, see https://patchwork.ozlabs.org/project/linuxppc-dev/patch/dd2876b808ea38eb7b7f760ecd6ce06096c61fb5.1580295551.git.christophe.leroy@c-s.fr/ But if we manage to get rid of it completely, that's even better.
Le 18/04/2020 à 13:55, Eric W. Biederman a écrit : > Christophe Leroy <christophe.leroy@c-s.fr> writes: > >> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit : >>> >>> To remove the use of set_fs in the coredump code there needs to be a >>> way to convert a kernel siginfo to a userspace compat siginfo. >>> >>> Call that function copy_siginfo_to_compat and factor it out of >>> copy_siginfo_to_user32. >> >> I find it a pitty to do that. >> >> The existing function could have been easily converted to using >> user_access_begin() + user_access_end() and use unsafe_put_user() to copy to >> userspace to avoid copying through a temporary structure on the stack. >> >> With your change, it becomes impossible to do that. > > I don't follow. You don't like temporary structures in the coredump > code or temporary structures in copy_siginfo_to_user32? In copy_siginfo_to_user32() > > A temporary structure in copy_siginfo_to_user is pretty much required > so that it can be zeroed to guarantee we don't pass a structure with > holes to userspace. Why ? We can zeroize the user structure directly, either with clear_user() or with some not yet existing unsafe_clear_user() or equivalent. > > The implementation of copy_siginfo_to_user32 used to use the equivalent > of user_access_begin() and user_access_end() and the code was a mess > that was very difficult to reason about. I recall their being holes > in the structure that were being copied to userspace. > > Meanwhile if you are going to set all of the bytes a cache hot temporary > structure is quite cheap. But how can we be sure it is cache hot ? As we are using memset() to zeroize it, it won't be loaded from memory as it will use dcbz instruction, but at some point in time it will get flushed back to memory, that's consuming anyway. Unless we invalidate it after the copy, but that becomes complex. Christophe
diff --git a/include/linux/compat.h b/include/linux/compat.h index 0480ba4db592..4962b254e550 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -402,6 +402,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size); long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, unsigned long bitmap_size); +void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from); int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from); int get_compat_sigevent(struct sigevent *event, diff --git a/kernel/signal.c b/kernel/signal.c index e58a6c619824..578f196898cb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3235,90 +3235,106 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from) } #ifdef CONFIG_COMPAT -int copy_siginfo_to_user32(struct compat_siginfo __user *to, - const struct kernel_siginfo *from) -#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) +void copy_siginfo_to_external32(struct compat_siginfo *to, + const struct kernel_siginfo *from) { - return __copy_siginfo_to_user32(to, from, in_x32_syscall()); -} -int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const struct kernel_siginfo *from, bool x32_ABI) -#endif -{ - struct compat_siginfo new; - memset(&new, 0, sizeof(new)); + /* + * This function does not work properly for SIGCHLD on x32, + * but it does not need to as SIGCHLD never causes a coredump. + */ + memset(to, 0, sizeof(*to)); - new.si_signo = from->si_signo; - new.si_errno = from->si_errno; - new.si_code = from->si_code; + to->si_signo = from->si_signo; + to->si_errno = from->si_errno; + to->si_code = from->si_code; switch(siginfo_layout(from->si_signo, from->si_code)) { case SIL_KILL: - new.si_pid = from->si_pid; - new.si_uid = from->si_uid; + to->si_pid = from->si_pid; + to->si_uid = from->si_uid; break; case SIL_TIMER: - new.si_tid = from->si_tid; - new.si_overrun = from->si_overrun; - new.si_int = from->si_int; + to->si_tid = from->si_tid; + to->si_overrun = from->si_overrun; + to->si_int = from->si_int; break; case SIL_POLL: - new.si_band = from->si_band; - new.si_fd = from->si_fd; + to->si_band = from->si_band; + to->si_fd = from->si_fd; break; case SIL_FAULT: - new.si_addr = ptr_to_compat(from->si_addr); + to->si_addr = ptr_to_compat(from->si_addr); #ifdef __ARCH_SI_TRAPNO - new.si_trapno = from->si_trapno; + to->si_trapno = from->si_trapno; #endif break; case SIL_FAULT_MCEERR: - new.si_addr = ptr_to_compat(from->si_addr); + to->si_addr = ptr_to_compat(from->si_addr); #ifdef __ARCH_SI_TRAPNO - new.si_trapno = from->si_trapno; + to->si_trapno = from->si_trapno; #endif - new.si_addr_lsb = from->si_addr_lsb; + to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: - new.si_addr = ptr_to_compat(from->si_addr); + to->si_addr = ptr_to_compat(from->si_addr); #ifdef __ARCH_SI_TRAPNO - new.si_trapno = from->si_trapno; + to->si_trapno = from->si_trapno; #endif - new.si_lower = ptr_to_compat(from->si_lower); - new.si_upper = ptr_to_compat(from->si_upper); + to->si_lower = ptr_to_compat(from->si_lower); + to->si_upper = ptr_to_compat(from->si_upper); break; case SIL_FAULT_PKUERR: - new.si_addr = ptr_to_compat(from->si_addr); + to->si_addr = ptr_to_compat(from->si_addr); #ifdef __ARCH_SI_TRAPNO - new.si_trapno = from->si_trapno; + to->si_trapno = from->si_trapno; #endif - new.si_pkey = from->si_pkey; + to->si_pkey = from->si_pkey; break; case SIL_CHLD: - new.si_pid = from->si_pid; - new.si_uid = from->si_uid; - new.si_status = from->si_status; + to->si_pid = from->si_pid; + to->si_uid = from->si_uid; + to->si_status = from->si_status; + to->si_utime = from->si_utime; + to->si_stime = from->si_stime; #ifdef CONFIG_X86_X32_ABI if (x32_ABI) { - new._sifields._sigchld_x32._utime = from->si_utime; - new._sifields._sigchld_x32._stime = from->si_stime; + to->_sifields._sigchld_x32._utime = from->si_utime; + to->_sifields._sigchld_x32._stime = from->si_stime; } else #endif { - new.si_utime = from->si_utime; - new.si_stime = from->si_stime; } break; case SIL_RT: - new.si_pid = from->si_pid; - new.si_uid = from->si_uid; - new.si_int = from->si_int; + to->si_pid = from->si_pid; + to->si_uid = from->si_uid; + to->si_int = from->si_int; break; case SIL_SYS: - new.si_call_addr = ptr_to_compat(from->si_call_addr); - new.si_syscall = from->si_syscall; - new.si_arch = from->si_arch; + to->si_call_addr = ptr_to_compat(from->si_call_addr); + to->si_syscall = from->si_syscall; + to->si_arch = from->si_arch; break; } +} + +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const struct kernel_siginfo *from) +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) +{ + return __copy_siginfo_to_user32(to, from, in_x32_syscall()); +} +int __copy_siginfo_to_user32(struct compat_siginfo __user *to, + const struct kernel_siginfo *from, bool x32_ABI) +#endif +{ + struct compat_siginfo new; + copy_siginfo_to_external32(&new, from); +#ifdef CONFIG_X86_X32_ABI + if (x32_ABI && from->si_signo == SIGCHLD) { + new._sifields._sigchld_x32._utime = from->si_utime; + new._sifields._sigchld_x32._stime = from->si_stime; + } +#endif if (copy_to_user(to, &new, sizeof(struct compat_siginfo))) return -EFAULT;
To remove the use of set_fs in the coredump code there needs to be a way to convert a kernel siginfo to a userspace compat siginfo. Call that function copy_siginfo_to_compat and factor it out of copy_siginfo_to_user32. The existence of x32 complicates this code. On x32 SIGCHLD uses 64bit times for utime and stime. As only SIGCHLD is affected and SIGCHLD never causes a coredump I have avoided handling that case. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- include/linux/compat.h | 1 + kernel/signal.c | 108 +++++++++++++++++++++++------------------ 2 files changed, 63 insertions(+), 46 deletions(-)