diff mbox series

[1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32

Message ID 87k12dakfx.fsf_-_@x220.int.ebiederm.org (mailing list archive)
State New, archived
Headers show
Series [1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 | expand

Commit Message

Eric W. Biederman April 17, 2020, 9:09 p.m. UTC
To remove the use of set_fs in the coredump code there needs to be a
way to convert a kernel siginfo to a userspace compat siginfo.

Call that function copy_siginfo_to_compat and factor it out of
copy_siginfo_to_user32.

The existence of x32 complicates this code.  On x32 SIGCHLD uses 64bit
times for utime and stime.  As only SIGCHLD is affected and SIGCHLD
never causes a coredump I have avoided handling that case.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/compat.h |   1 +
 kernel/signal.c        | 108 +++++++++++++++++++++++------------------
 2 files changed, 63 insertions(+), 46 deletions(-)

Comments

Christophe Leroy April 18, 2020, 8:05 a.m. UTC | #1
Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
> 
> To remove the use of set_fs in the coredump code there needs to be a
> way to convert a kernel siginfo to a userspace compat siginfo.
> 
> Call that function copy_siginfo_to_compat and factor it out of
> copy_siginfo_to_user32.

I find it a pitty to do that.

The existing function could have been easily converted to using 
user_access_begin() + user_access_end() and use unsafe_put_user() to 
copy to userspace to avoid copying through a temporary structure on the 
stack.

With your change, it becomes impossible to do that.

Is that really an issue to use that set_fs() in the coredump code ?

Christophe

> 
> The existence of x32 complicates this code.  On x32 SIGCHLD uses 64bit
> times for utime and stime.  As only SIGCHLD is affected and SIGCHLD
> never causes a coredump I have avoided handling that case.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>   include/linux/compat.h |   1 +
>   kernel/signal.c        | 108 +++++++++++++++++++++++------------------
>   2 files changed, 63 insertions(+), 46 deletions(-)
> 
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index 0480ba4db592..4962b254e550 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -402,6 +402,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
>   		       unsigned long bitmap_size);
>   long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
>   		       unsigned long bitmap_size);
> +void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from);
>   int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
>   int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
>   int get_compat_sigevent(struct sigevent *event,
> diff --git a/kernel/signal.c b/kernel/signal.c
> index e58a6c619824..578f196898cb 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -3235,90 +3235,106 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
>   }
>   
>   #ifdef CONFIG_COMPAT
> -int copy_siginfo_to_user32(struct compat_siginfo __user *to,
> -			   const struct kernel_siginfo *from)
> -#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
> +void copy_siginfo_to_external32(struct compat_siginfo *to,
> +				const struct kernel_siginfo *from)
>   {
> -	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
> -}
> -int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
> -			     const struct kernel_siginfo *from, bool x32_ABI)
> -#endif
> -{
> -	struct compat_siginfo new;
> -	memset(&new, 0, sizeof(new));
> +	/*
> +	 * This function does not work properly for SIGCHLD on x32,
> +	 * but it does not need to as SIGCHLD never causes a coredump.
> +	 */
> +	memset(to, 0, sizeof(*to));
>   
> -	new.si_signo = from->si_signo;
> -	new.si_errno = from->si_errno;
> -	new.si_code  = from->si_code;
> +	to->si_signo = from->si_signo;
> +	to->si_errno = from->si_errno;
> +	to->si_code  = from->si_code;
>   	switch(siginfo_layout(from->si_signo, from->si_code)) {
>   	case SIL_KILL:
> -		new.si_pid = from->si_pid;
> -		new.si_uid = from->si_uid;
> +		to->si_pid = from->si_pid;
> +		to->si_uid = from->si_uid;
>   		break;
>   	case SIL_TIMER:
> -		new.si_tid     = from->si_tid;
> -		new.si_overrun = from->si_overrun;
> -		new.si_int     = from->si_int;
> +		to->si_tid     = from->si_tid;
> +		to->si_overrun = from->si_overrun;
> +		to->si_int     = from->si_int;
>   		break;
>   	case SIL_POLL:
> -		new.si_band = from->si_band;
> -		new.si_fd   = from->si_fd;
> +		to->si_band = from->si_band;
> +		to->si_fd   = from->si_fd;
>   		break;
>   	case SIL_FAULT:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
>   		break;
>   	case SIL_FAULT_MCEERR:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
> -		new.si_addr_lsb = from->si_addr_lsb;
> +		to->si_addr_lsb = from->si_addr_lsb;
>   		break;
>   	case SIL_FAULT_BNDERR:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
> -		new.si_lower = ptr_to_compat(from->si_lower);
> -		new.si_upper = ptr_to_compat(from->si_upper);
> +		to->si_lower = ptr_to_compat(from->si_lower);
> +		to->si_upper = ptr_to_compat(from->si_upper);
>   		break;
>   	case SIL_FAULT_PKUERR:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
> -		new.si_pkey = from->si_pkey;
> +		to->si_pkey = from->si_pkey;
>   		break;
>   	case SIL_CHLD:
> -		new.si_pid    = from->si_pid;
> -		new.si_uid    = from->si_uid;
> -		new.si_status = from->si_status;
> +		to->si_pid    = from->si_pid;
> +		to->si_uid    = from->si_uid;
> +		to->si_status = from->si_status;
> +		to->si_utime = from->si_utime;
> +		to->si_stime = from->si_stime;
>   #ifdef CONFIG_X86_X32_ABI
>   		if (x32_ABI) {
> -			new._sifields._sigchld_x32._utime = from->si_utime;
> -			new._sifields._sigchld_x32._stime = from->si_stime;
> +			to->_sifields._sigchld_x32._utime = from->si_utime;
> +			to->_sifields._sigchld_x32._stime = from->si_stime;
>   		} else
>   #endif
>   		{
> -			new.si_utime = from->si_utime;
> -			new.si_stime = from->si_stime;
>   		}
>   		break;
>   	case SIL_RT:
> -		new.si_pid = from->si_pid;
> -		new.si_uid = from->si_uid;
> -		new.si_int = from->si_int;
> +		to->si_pid = from->si_pid;
> +		to->si_uid = from->si_uid;
> +		to->si_int = from->si_int;
>   		break;
>   	case SIL_SYS:
> -		new.si_call_addr = ptr_to_compat(from->si_call_addr);
> -		new.si_syscall   = from->si_syscall;
> -		new.si_arch      = from->si_arch;
> +		to->si_call_addr = ptr_to_compat(from->si_call_addr);
> +		to->si_syscall   = from->si_syscall;
> +		to->si_arch      = from->si_arch;
>   		break;
>   	}
> +}
> +
> +int copy_siginfo_to_user32(struct compat_siginfo __user *to,
> +			   const struct kernel_siginfo *from)
> +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
> +{
> +	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
> +}
> +int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
> +			     const struct kernel_siginfo *from, bool x32_ABI)
> +#endif
> +{
> +	struct compat_siginfo new;
> +	copy_siginfo_to_external32(&new, from);
> +#ifdef CONFIG_X86_X32_ABI
> +	if (x32_ABI && from->si_signo == SIGCHLD) {
> +		new._sifields._sigchld_x32._utime = from->si_utime;
> +		new._sifields._sigchld_x32._stime = from->si_stime;
> +	}
> +#endif
>   
>   	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
>   		return -EFAULT;
>
Eric W. Biederman April 18, 2020, 11:55 a.m. UTC | #2
Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
>>
>> To remove the use of set_fs in the coredump code there needs to be a
>> way to convert a kernel siginfo to a userspace compat siginfo.
>>
>> Call that function copy_siginfo_to_compat and factor it out of
>> copy_siginfo_to_user32.
>
> I find it a pitty to do that.
>
> The existing function could have been easily converted to using
> user_access_begin() + user_access_end() and use unsafe_put_user() to copy to
> userspace to avoid copying through a temporary structure on the stack.
>
> With your change, it becomes impossible to do that.

I don't follow.  You don't like temporary structures in the coredump
code or temporary structures in copy_siginfo_to_user32?

A temporary structure in copy_siginfo_to_user is pretty much required
so that it can be zeroed to guarantee we don't pass a structure with
holes to userspace.

The implementation of copy_siginfo_to_user32 used to use the equivalent
of user_access_begin() and user_access_end() and the code was a mess
that was very difficult to reason about.  I recall their being holes
in the structure that were being copied to userspace.

Meanwhile if you are going to set all of the bytes a cache hot temporary
structure is quite cheap.

> Is that really an issue to use that set_fs() in the coredump code ?

Using set_fs() is pretty bad and something that we would like to remove
from the kernel entirely.  The fewer instances of set_fs() we have the
better.

I forget all of the details but set_fs() is both a type violation and an
attack point when people are attacking the kernel.  The existence of
set_fs() requires somethings that should be constants to be variables.
Something about that means that our current code is difficult to protect
from spectre style vulnerabilities.

There was a very good thread about it all in I think 2018 but
unfortunately I can't find it now.

Eric
Christoph Hellwig April 19, 2020, 8:05 a.m. UTC | #3
On Sat, Apr 18, 2020 at 10:05:19AM +0200, Christophe Leroy wrote:
>
>
> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
>>
>> To remove the use of set_fs in the coredump code there needs to be a
>> way to convert a kernel siginfo to a userspace compat siginfo.
>>
>> Call that function copy_siginfo_to_compat and factor it out of
>> copy_siginfo_to_user32.
>
> I find it a pitty to do that.
>
> The existing function could have been easily converted to using 
> user_access_begin() + user_access_end() and use unsafe_put_user() to copy 
> to userspace to avoid copying through a temporary structure on the stack.
>
> With your change, it becomes impossible to do that.

As Eric said we need a struct to clear all padding.  Note that I
though about converting to unsafe_copy_to_user in my variant as we
can pretty easily do that if pre-filling the structure earlier.  But
I didn't want to throw in such unrelated changes for now - I'll volunteer
to do it later, though.
Christoph Hellwig April 19, 2020, 8:13 a.m. UTC | #4
On Sat, Apr 18, 2020 at 06:55:56AM -0500, Eric W. Biederman wrote:
> > Is that really an issue to use that set_fs() in the coredump code ?
> 
> Using set_fs() is pretty bad and something that we would like to remove
> from the kernel entirely.  The fewer instances of set_fs() we have the
> better.
> 
> I forget all of the details but set_fs() is both a type violation and an
> attack point when people are attacking the kernel.  The existence of
> set_fs() requires somethings that should be constants to be variables.
> Something about that means that our current code is difficult to protect
> from spectre style vulnerabilities.

Yes, set_fs requires variable based address checking in the uaccess
routines for architectures with a shared address space, or even entirely
different code for architectures with separate kernel and user address
spaces.  My plan is to hopefully kill set_fs in its current form a few
merge windows down the road.  We'll probably still need some form of
it to e.g. mark a thread as kernel thread vs also being able to execute
user code, but it will be much ore limited than before, called from very
few places and actually be a no-op for many architectures.
Christophe Leroy April 19, 2020, 9:46 a.m. UTC | #5
Le 19/04/2020 à 10:13, Christoph Hellwig a écrit :
> On Sat, Apr 18, 2020 at 06:55:56AM -0500, Eric W. Biederman wrote:
>>> Is that really an issue to use that set_fs() in the coredump code ?
>>
>> Using set_fs() is pretty bad and something that we would like to remove
>> from the kernel entirely.  The fewer instances of set_fs() we have the
>> better.
>>
>> I forget all of the details but set_fs() is both a type violation and an
>> attack point when people are attacking the kernel.  The existence of
>> set_fs() requires somethings that should be constants to be variables.
>> Something about that means that our current code is difficult to protect
>> from spectre style vulnerabilities.
> 
> Yes, set_fs requires variable based address checking in the uaccess
> routines for architectures with a shared address space, or even entirely
> different code for architectures with separate kernel and user address
> spaces.  My plan is to hopefully kill set_fs in its current form a few
> merge windows down the road.  We'll probably still need some form of
> it to e.g. mark a thread as kernel thread vs also being able to execute
> user code, but it will be much ore limited than before, called from very
> few places and actually be a no-op for many architectures.
> 

Oh nice. Some time ago I proposed a patch to change set_fs() to a 
flip/flop flag based logic, see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/dd2876b808ea38eb7b7f760ecd6ce06096c61fb5.1580295551.git.christophe.leroy@c-s.fr/

But if we manage to get rid of it completely, that's even better.
Christophe Leroy April 19, 2020, 9:54 a.m. UTC | #6
Le 18/04/2020 à 13:55, Eric W. Biederman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> 
>> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
>>>
>>> To remove the use of set_fs in the coredump code there needs to be a
>>> way to convert a kernel siginfo to a userspace compat siginfo.
>>>
>>> Call that function copy_siginfo_to_compat and factor it out of
>>> copy_siginfo_to_user32.
>>
>> I find it a pitty to do that.
>>
>> The existing function could have been easily converted to using
>> user_access_begin() + user_access_end() and use unsafe_put_user() to copy to
>> userspace to avoid copying through a temporary structure on the stack.
>>
>> With your change, it becomes impossible to do that.
> 
> I don't follow.  You don't like temporary structures in the coredump
> code or temporary structures in copy_siginfo_to_user32?

In copy_siginfo_to_user32()

> 
> A temporary structure in copy_siginfo_to_user is pretty much required
> so that it can be zeroed to guarantee we don't pass a structure with
> holes to userspace.

Why ? We can zeroize the user structure directly, either with 
clear_user() or with some not yet existing unsafe_clear_user() or 
equivalent.

> 
> The implementation of copy_siginfo_to_user32 used to use the equivalent
> of user_access_begin() and user_access_end() and the code was a mess
> that was very difficult to reason about.  I recall their being holes
> in the structure that were being copied to userspace.
> 
> Meanwhile if you are going to set all of the bytes a cache hot temporary
> structure is quite cheap.

But how can we be sure it is cache hot ? As we are using memset() to 
zeroize it, it won't be loaded from memory as it will use dcbz 
instruction, but at some point in time it will get flushed back to 
memory, that's consuming anyway. Unless we invalidate it after the copy, 
but that becomes complex.

Christophe
diff mbox series

Patch

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0480ba4db592..4962b254e550 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -402,6 +402,7 @@  long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size);
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
+void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from);
 int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
diff --git a/kernel/signal.c b/kernel/signal.c
index e58a6c619824..578f196898cb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,90 +3235,106 @@  int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 }
 
 #ifdef CONFIG_COMPAT
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			   const struct kernel_siginfo *from)
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+				const struct kernel_siginfo *from)
 {
-	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
-}
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			     const struct kernel_siginfo *from, bool x32_ABI)
-#endif
-{
-	struct compat_siginfo new;
-	memset(&new, 0, sizeof(new));
+	/*
+	 * This function does not work properly for SIGCHLD on x32,
+	 * but it does not need to as SIGCHLD never causes a coredump.
+	 */
+	memset(to, 0, sizeof(*to));
 
-	new.si_signo = from->si_signo;
-	new.si_errno = from->si_errno;
-	new.si_code  = from->si_code;
+	to->si_signo = from->si_signo;
+	to->si_errno = from->si_errno;
+	to->si_code  = from->si_code;
 	switch(siginfo_layout(from->si_signo, from->si_code)) {
 	case SIL_KILL:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
 		break;
 	case SIL_TIMER:
-		new.si_tid     = from->si_tid;
-		new.si_overrun = from->si_overrun;
-		new.si_int     = from->si_int;
+		to->si_tid     = from->si_tid;
+		to->si_overrun = from->si_overrun;
+		to->si_int     = from->si_int;
 		break;
 	case SIL_POLL:
-		new.si_band = from->si_band;
-		new.si_fd   = from->si_fd;
+		to->si_band = from->si_band;
+		to->si_fd   = from->si_fd;
 		break;
 	case SIL_FAULT:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
 		break;
 	case SIL_FAULT_MCEERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_addr_lsb = from->si_addr_lsb;
+		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_lower = ptr_to_compat(from->si_lower);
-		new.si_upper = ptr_to_compat(from->si_upper);
+		to->si_lower = ptr_to_compat(from->si_lower);
+		to->si_upper = ptr_to_compat(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_pkey = from->si_pkey;
+		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_CHLD:
-		new.si_pid    = from->si_pid;
-		new.si_uid    = from->si_uid;
-		new.si_status = from->si_status;
+		to->si_pid    = from->si_pid;
+		to->si_uid    = from->si_uid;
+		to->si_status = from->si_status;
+		to->si_utime = from->si_utime;
+		to->si_stime = from->si_stime;
 #ifdef CONFIG_X86_X32_ABI
 		if (x32_ABI) {
-			new._sifields._sigchld_x32._utime = from->si_utime;
-			new._sifields._sigchld_x32._stime = from->si_stime;
+			to->_sifields._sigchld_x32._utime = from->si_utime;
+			to->_sifields._sigchld_x32._stime = from->si_stime;
 		} else
 #endif
 		{
-			new.si_utime = from->si_utime;
-			new.si_stime = from->si_stime;
 		}
 		break;
 	case SIL_RT:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
-		new.si_int = from->si_int;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
+		to->si_int = from->si_int;
 		break;
 	case SIL_SYS:
-		new.si_call_addr = ptr_to_compat(from->si_call_addr);
-		new.si_syscall   = from->si_syscall;
-		new.si_arch      = from->si_arch;
+		to->si_call_addr = ptr_to_compat(from->si_call_addr);
+		to->si_syscall   = from->si_syscall;
+		to->si_arch      = from->si_arch;
 		break;
 	}
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			   const struct kernel_siginfo *from)
+#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
+{
+	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
+}
+int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			     const struct kernel_siginfo *from, bool x32_ABI)
+#endif
+{
+	struct compat_siginfo new;
+	copy_siginfo_to_external32(&new, from);
+#ifdef CONFIG_X86_X32_ABI
+	if (x32_ABI && from->si_signo == SIGCHLD) {
+		new._sifields._sigchld_x32._utime = from->si_utime;
+		new._sifields._sigchld_x32._stime = from->si_stime;
+	}
+#endif
 
 	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
 		return -EFAULT;