diff mbox

[v1,1/4] syscalls: Restore address limit after a syscall

Message ID 20170308213844.131877-1-thgarnie@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Garnier March 8, 2017, 9:38 p.m. UTC
This patch prevents a syscall to modify the address limit of the
caller. The address limit is kept by the syscall wrapper and restored
just after the syscall ends.

For example, it would mitigation this bug:

- https://bugs.chromium.org/p/project-zero/issues/detail?id=990

By default, this change warns if the segment is incorrect while
returning to user-mode and fix it. The
CONFIG_VERIFY_PRE_USERMODE_STATE_BUG option can be enabled to halt
instead if needed.

The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
added so each architecture can optimize how the
verify_pre_usermode_state function is called.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170308
---
 include/linux/syscalls.h | 19 +++++++++++++++++++
 init/Kconfig             | 16 ++++++++++++++++
 kernel/sys.c             | 11 +++++++++++
 3 files changed, 46 insertions(+)

Comments

Kees Cook March 8, 2017, 9:57 p.m. UTC | #1
On Wed, Mar 8, 2017 at 1:38 PM, Thomas Garnier <thgarnie@google.com> wrote:
> This patch prevents a syscall to modify the address limit of the
> caller. The address limit is kept by the syscall wrapper and restored
> just after the syscall ends.
>
> For example, it would mitigation this bug:
>
> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>
> By default, this change warns if the segment is incorrect while
> returning to user-mode and fix it. The
> CONFIG_VERIFY_PRE_USERMODE_STATE_BUG option can be enabled to halt
> instead if needed.

Instead of this new config, please reuse the CHECK_DATA_CORRUPTION
test instead, which already controls very similar WARN vs BUG
behavior. Example below...

>
> The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
> added so each architecture can optimize how the
> verify_pre_usermode_state function is called.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> Based on next-20170308
> ---
>  include/linux/syscalls.h | 19 +++++++++++++++++++
>  init/Kconfig             | 16 ++++++++++++++++
>  kernel/sys.c             | 11 +++++++++++
>  3 files changed, 46 insertions(+)
>
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 980c3c9b06f8..78a2268ecd6e 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -191,6 +191,22 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>         SYSCALL_METADATA(sname, x, __VA_ARGS__)                 \
>         __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
>
> +asmlinkage void verify_pre_usermode_state(void);
> +
> +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +static inline bool has_user_ds(void) {
> +       bool ret = segment_eq(get_fs(), USER_DS);
> +       // Prevent re-ordering the call
> +       barrier();
> +       return ret;
> +}
> +#else
> +static inline bool has_user_ds(void) {
> +       return false;
> +}
> +#endif
> +
> +
>  #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
>  #define __SYSCALL_DEFINEx(x, name, ...)                                        \
>         asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))       \
> @@ -199,7 +215,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>         asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));      \
>         asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))       \
>         {                                                               \
> +               bool user_caller = has_user_ds();                       \
>                 long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));  \
> +               if (user_caller)                                        \
> +                       verify_pre_usermode_state();                    \
>                 __MAP(x,__SC_TEST,__VA_ARGS__);                         \
>                 __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));       \
>                 return ret;                                             \
> diff --git a/init/Kconfig b/init/Kconfig
> index c859c993c26f..ab958b59063f 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1929,6 +1929,22 @@ config PROFILING
>  config TRACEPOINTS
>         bool
>
> +#
> +# Set by each architecture that want to optimize how verify_pre_usermode_state
> +# is called.
> +#
> +config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
> +       bool
> +
> +config VERIFY_PRE_USERMODE_STATE_BUG
> +       bool "Halt on incorrect state on returning to user-mode"
> +       default n
> +       help
> +         By default a warning is logged and the state is fixed. This option
> +         crashes the kernel instead.
> +
> +         If unsure, say Y.
> +
>  source "arch/Kconfig"
>
>  endmenu                # General setup
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 196c7134bee6..cc2ebf7fae55 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2459,3 +2459,14 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
>         return 0;
>  }
>  #endif /* CONFIG_COMPAT */
> +
> +/* Called before coming back to user-mode */
> +asmlinkage void verify_pre_usermode_state(void)
> +{
> +#ifdef CONFIG_VERIFY_PRE_USERMODE_STATE_BUG
> +       BUG_ON(!segment_eq(get_fs(), USER_DS));
> +#else
> +       if (WARN_ON(!segment_eq(get_fs(), USER_DS)))
> +               set_fs(USER_DS);
> +#endif

I would just make this:

if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS))
    set_fs(USER_DS);

-Kees
Russell King (Oracle) March 8, 2017, 9:58 p.m. UTC | #2
On Wed, Mar 08, 2017 at 01:38:41PM -0800, Thomas Garnier wrote:
> This patch prevents a syscall to modify the address limit of the
> caller. The address limit is kept by the syscall wrapper and restored
> just after the syscall ends.

I would much rather architectures were given the opportunity to code up
checks like this efficiently (iow, inline in the exit path assembly),
rather than having to unconditionally call an additional function on
every syscall, with its register saving overheads.
Andy Lutomirski March 8, 2017, 10:20 p.m. UTC | #3
On Wed, Mar 8, 2017 at 1:58 PM, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Wed, Mar 08, 2017 at 01:38:41PM -0800, Thomas Garnier wrote:
>> This patch prevents a syscall to modify the address limit of the
>> caller. The address limit is kept by the syscall wrapper and restored
>> just after the syscall ends.
>
> I would much rather architectures were given the opportunity to code up
> checks like this efficiently (iow, inline in the exit path assembly),
> rather than having to unconditionally call an additional function on
> every syscall, with its register saving overheads.
>

Me too.  I think the two config choices should be:

(a) BUG_ON(!segment_eq(...));

(b) No generic check at all -- arch code will handle it

--Andy
Thomas Garnier March 8, 2017, 10:27 p.m. UTC | #4
That make sense. I will optimize each architecture to not require a call.

On Wed, Mar 8, 2017 at 2:20 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Mar 8, 2017 at 1:58 PM, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
>> On Wed, Mar 08, 2017 at 01:38:41PM -0800, Thomas Garnier wrote:
>>> This patch prevents a syscall to modify the address limit of the
>>> caller. The address limit is kept by the syscall wrapper and restored
>>> just after the syscall ends.
>>
>> I would much rather architectures were given the opportunity to code up
>> checks like this efficiently (iow, inline in the exit path assembly),
>> rather than having to unconditionally call an additional function on
>> every syscall, with its register saving overheads.
>>
>
> Me too.  I think the two config choices should be:
>
> (a) BUG_ON(!segment_eq(...));
>
> (b) No generic check at all -- arch code will handle it
>
> --Andy
Thomas Garnier March 9, 2017, 1:13 a.m. UTC | #5
On Wed, Mar 8, 2017 at 1:57 PM, Kees Cook <keescook@chromium.org> wrote:
> On Wed, Mar 8, 2017 at 1:38 PM, Thomas Garnier <thgarnie@google.com> wrote:
>> This patch prevents a syscall to modify the address limit of the
>> caller. The address limit is kept by the syscall wrapper and restored
>> just after the syscall ends.
>>
>> For example, it would mitigation this bug:
>>
>> - https://bugs.chromium.org/p/project-zero/issues/detail?id=990
>>
>> By default, this change warns if the segment is incorrect while
>> returning to user-mode and fix it. The
>> CONFIG_VERIFY_PRE_USERMODE_STATE_BUG option can be enabled to halt
>> instead if needed.
>
> Instead of this new config, please reuse the CHECK_DATA_CORRUPTION
> test instead, which already controls very similar WARN vs BUG
> behavior. Example below...
>
>>
>> The CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE option is also
>> added so each architecture can optimize how the
>> verify_pre_usermode_state function is called.
>>
>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>> ---
>> Based on next-20170308
>> ---
>>  include/linux/syscalls.h | 19 +++++++++++++++++++
>>  init/Kconfig             | 16 ++++++++++++++++
>>  kernel/sys.c             | 11 +++++++++++
>>  3 files changed, 46 insertions(+)
>>
>> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
>> index 980c3c9b06f8..78a2268ecd6e 100644
>> --- a/include/linux/syscalls.h
>> +++ b/include/linux/syscalls.h
>> @@ -191,6 +191,22 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>>         SYSCALL_METADATA(sname, x, __VA_ARGS__)                 \
>>         __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
>>
>> +asmlinkage void verify_pre_usermode_state(void);
>> +
>> +#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> +static inline bool has_user_ds(void) {
>> +       bool ret = segment_eq(get_fs(), USER_DS);
>> +       // Prevent re-ordering the call
>> +       barrier();
>> +       return ret;
>> +}
>> +#else
>> +static inline bool has_user_ds(void) {
>> +       return false;
>> +}
>> +#endif
>> +
>> +
>>  #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
>>  #define __SYSCALL_DEFINEx(x, name, ...)                                        \
>>         asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))       \
>> @@ -199,7 +215,10 @@ extern struct trace_event_functions exit_syscall_print_funcs;
>>         asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));      \
>>         asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))       \
>>         {                                                               \
>> +               bool user_caller = has_user_ds();                       \
>>                 long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));  \
>> +               if (user_caller)                                        \
>> +                       verify_pre_usermode_state();                    \
>>                 __MAP(x,__SC_TEST,__VA_ARGS__);                         \
>>                 __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));       \
>>                 return ret;                                             \
>> diff --git a/init/Kconfig b/init/Kconfig
>> index c859c993c26f..ab958b59063f 100644
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1929,6 +1929,22 @@ config PROFILING
>>  config TRACEPOINTS
>>         bool
>>
>> +#
>> +# Set by each architecture that want to optimize how verify_pre_usermode_state
>> +# is called.
>> +#
>> +config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>> +       bool
>> +
>> +config VERIFY_PRE_USERMODE_STATE_BUG
>> +       bool "Halt on incorrect state on returning to user-mode"
>> +       default n
>> +       help
>> +         By default a warning is logged and the state is fixed. This option
>> +         crashes the kernel instead.
>> +
>> +         If unsure, say Y.
>> +
>>  source "arch/Kconfig"
>>
>>  endmenu                # General setup
>> diff --git a/kernel/sys.c b/kernel/sys.c
>> index 196c7134bee6..cc2ebf7fae55 100644
>> --- a/kernel/sys.c
>> +++ b/kernel/sys.c
>> @@ -2459,3 +2459,14 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
>>         return 0;
>>  }
>>  #endif /* CONFIG_COMPAT */
>> +
>> +/* Called before coming back to user-mode */
>> +asmlinkage void verify_pre_usermode_state(void)
>> +{
>> +#ifdef CONFIG_VERIFY_PRE_USERMODE_STATE_BUG
>> +       BUG_ON(!segment_eq(get_fs(), USER_DS));
>> +#else
>> +       if (WARN_ON(!segment_eq(get_fs(), USER_DS)))
>> +               set_fs(USER_DS);
>> +#endif
>
> I would just make this:
>
> if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS))
>     set_fs(USER_DS);
>

Make sense, I will remove my custom CONFIG and use that one instead
(still doing inline assembly if not set).

> -Kees
>
>
> --
> Kees Cook
> Pixel Security
diff mbox

Patch

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 980c3c9b06f8..78a2268ecd6e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -191,6 +191,22 @@  extern struct trace_event_functions exit_syscall_print_funcs;
 	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
 	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
 
+asmlinkage void verify_pre_usermode_state(void);
+
+#ifndef CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+static inline bool has_user_ds(void) {
+	bool ret = segment_eq(get_fs(), USER_DS);
+	// Prevent re-ordering the call
+	barrier();
+	return ret;
+}
+#else
+static inline bool has_user_ds(void) {
+	return false;
+}
+#endif
+
+
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
 #define __SYSCALL_DEFINEx(x, name, ...)					\
 	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
@@ -199,7 +215,10 @@  extern struct trace_event_functions exit_syscall_print_funcs;
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
+		bool user_caller = has_user_ds();			\
 		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		if (user_caller)					\
+			verify_pre_usermode_state();			\
 		__MAP(x,__SC_TEST,__VA_ARGS__);				\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
diff --git a/init/Kconfig b/init/Kconfig
index c859c993c26f..ab958b59063f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1929,6 +1929,22 @@  config PROFILING
 config TRACEPOINTS
 	bool
 
+#
+# Set by each architecture that want to optimize how verify_pre_usermode_state
+# is called.
+#
+config ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
+	bool
+
+config VERIFY_PRE_USERMODE_STATE_BUG
+	bool "Halt on incorrect state on returning to user-mode"
+	default n
+	help
+	  By default a warning is logged and the state is fixed. This option
+	  crashes the kernel instead.
+
+	  If unsure, say Y.
+
 source "arch/Kconfig"
 
 endmenu		# General setup
diff --git a/kernel/sys.c b/kernel/sys.c
index 196c7134bee6..cc2ebf7fae55 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2459,3 +2459,14 @@  COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
 	return 0;
 }
 #endif /* CONFIG_COMPAT */
+
+/* Called before coming back to user-mode */
+asmlinkage void verify_pre_usermode_state(void)
+{
+#ifdef CONFIG_VERIFY_PRE_USERMODE_STATE_BUG
+	BUG_ON(!segment_eq(get_fs(), USER_DS));
+#else
+	if (WARN_ON(!segment_eq(get_fs(), USER_DS)))
+		set_fs(USER_DS);
+#endif
+}