diff mbox

Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state

Message ID CAJcbSZGrD2q7ymUSFNMtvwL+JUzbQ5WNA=MU6tpJ6XVnfJipcw@mail.gmail.com
State New, archived
Headers show

Commit Message

Thomas Garnier March 15, 2017, 5:43 p.m. UTC
Thanks for the feedback. I will look into inlining by default (looking
at code size on different arch), the updated patch for x86 in the
meantime:

Comments

Thomas Garnier March 22, 2017, 7:15 p.m. UTC | #1
On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
> Thanks for the feedback. I will look into inlining by default (looking
> at code size on different arch), the updated patch for x86 in the
> meantime:

I did couple checks and it doesn't seem worth it. I will send a v4
with the change below for additional feedback.

> ===========
>
> Implement specific usage of verify_pre_usermode_state for user-mode
> returns for x86.
> ---
> Based on next-20170308
> ---
>  arch/x86/Kconfig                        |  1 +
>  arch/x86/entry/common.c                 |  3 +++
>  arch/x86/entry/entry_64.S               |  8 ++++++++
>  arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
>  arch/x86/include/asm/processor.h        | 11 -----------
>  5 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 005df7c825f5..6d48e18e6f09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -63,6 +63,7 @@ config X86
>   select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
>   select ARCH_MIGHT_HAVE_PC_PARPORT
>   select ARCH_MIGHT_HAVE_PC_SERIO
> + select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
>   select ARCH_SUPPORTS_ATOMIC_RMW
>   select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
>   select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 370c42c7f046..525edbb77f03 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -22,6 +22,7 @@
>  #include <linux/context_tracking.h>
>  #include <linux/user-return-notifier.h>
>  #include <linux/uprobes.h>
> +#include <linux/syscalls.h>
>
>  #include <asm/desc.h>
>  #include <asm/traps.h>
> @@ -180,6 +181,8 @@ __visible inline void
> prepare_exit_to_usermode(struct pt_regs *regs)
>   struct thread_info *ti = current_thread_info();
>   u32 cached_flags;
>
> + verify_pre_usermode_state();
> +
>   if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
>   local_irq_disable();
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index d2b2a2948ffe..c079b010205c 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
>   testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
>   jnz 1f
>
> + /*
> + * If address limit is not based on user-mode, jump to slow path for
> + * additional security checks.
> + */
> + movq $TASK_SIZE_MAX, %rcx
> + cmp %rcx, TASK_addr_limit(%r11)
> + jnz 1f
> +
>   LOCKDEP_SYS_EXIT
>   TRACE_IRQS_ON /* user mode is traced as IRQs on */
>   movq RIP(%rsp), %rcx
> diff --git a/arch/x86/include/asm/pgtable_64_types.h
> b/arch/x86/include/asm/pgtable_64_types.h
> index 3a264200c62f..0fbbb79d058c 100644
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;
>
>  #define EARLY_DYNAMIC_PAGE_TABLES 64
>
> +/*
> + * User space process size. 47bits minus one guard page.  The guard
> + * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> + * the highest possible canonical userspace address, then that
> + * syscall will enter the kernel with a non-canonical return
> + * address, and SYSRET will explode dangerously.  We avoid this
> + * particular problem by preventing anything from being mapped
> + * at the maximum canonical address.
> + */
> +#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
> +
>  #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index f385eca5407a..9bc99d37133e 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
>  #define KSTK_ESP(task) (task_pt_regs(task)->sp)
>
>  #else
> -/*
> - * User space process size. 47bits minus one guard page.  The guard
> - * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> - * the highest possible canonical userspace address, then that
> - * syscall will enter the kernel with a non-canonical return
> - * address, and SYSRET will explode dangerously.  We avoid this
> - * particular problem by preventing anything from being mapped
> - * at the maximum canonical address.
> - */
> -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
> -
>  /* This decides where the kernel will search for a free chunk of vm
>   * space during mmap's.
>   */
> --
> 2.12.0.367.g23dc2f6d3c-goog
>
>
> On Tue, Mar 14, 2017 at 10:53 AM, H. Peter Anvin <hpa@zytor.com> wrote:
>> On 03/14/17 09:51, Thomas Garnier wrote:
>>>>
>>>> I wanted to comment on that thing: why on earth isn't
>>>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>>>> function adds pointless extra overhead to the C code when we are talking
>>>> about a few instructions.
>>>
>>> Because outside of arch specific implementation it is called by each
>>> syscall handler. it will increase the code size a lot.
>>>
>>
>> Don't assume that.  On a lot of architectures a function call can be
>> more expensive than a simple compare and branch, because the compiler
>> has to assume a whole bunch of registers are lost at that point.
>>
>> Either way, don't penalize the common architectures for it.  Not okay.
>>
>>         -hpa
>>
>
>
>
> --
> Thomas
H. Peter Anvin March 22, 2017, 8:21 p.m. UTC | #2
On 03/22/17 12:15, Thomas Garnier wrote:
> On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> Thanks for the feedback. I will look into inlining by default (looking
>> at code size on different arch), the updated patch for x86 in the
>> meantime:
> 
> I did couple checks and it doesn't seem worth it. I will send a v4
> with the change below for additional feedback.

Can you specify what that means?

On x86, where there is only one caller of this, it really seems like it
ought to reduce the overhead to almost zero (since it most likely is
hidden in the pipeline.)

I would like to suggest defining it inline if
CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
care about an architecture which doesn't have it.

Note that the upcoming 5-level paging (LA57) support will make
TASK_SIZE_MAX dependent on the CPU.  The best way to handle that is
probably to tag this immediate with an assembly alternative rather than
loading it from a memory variable (most likely taking a cache hit.)

	-hpa
Thomas Garnier March 22, 2017, 8:41 p.m. UTC | #3
On Wed, Mar 22, 2017 at 1:21 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 12:15, Thomas Garnier wrote:
>> On Wed, Mar 15, 2017 at 10:43 AM, Thomas Garnier <thgarnie@google.com> wrote:
>>> Thanks for the feedback. I will look into inlining by default (looking
>>> at code size on different arch), the updated patch for x86 in the
>>> meantime:
>>
>> I did couple checks and it doesn't seem worth it. I will send a v4
>> with the change below for additional feedback.
>
> Can you specify what that means?

If I set inline by default, the compiler chose not to inline it on
x86. If I force inline the size impact was actually bigger (without
the architecture specific code).

>
> On x86, where there is only one caller of this, it really seems like it
> ought to reduce the overhead to almost zero (since it most likely is
> hidden in the pipeline.)
>
> I would like to suggest defining it inline if
> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
> care about an architecture which doesn't have it.

But if there is only one caller, does the compiler is not suppose to
inline the function based on options?

The assembly will call it too, so I would need an inline and a
non-inline based on the caller.

>
> Note that the upcoming 5-level paging (LA57) support will make
> TASK_SIZE_MAX dependent on the CPU.  The best way to handle that is
> probably to tag this immediate with an assembly alternative rather than
> loading it from a memory variable (most likely taking a cache hit.)
>
>         -hpa
>
H. Peter Anvin March 22, 2017, 8:49 p.m. UTC | #4
On 03/22/17 13:41, Thomas Garnier wrote:
>>> with the change below for additional feedback.
>>
>> Can you specify what that means?
> 
> If I set inline by default, the compiler chose not to inline it on
> x86. If I force inline the size impact was actually bigger (without
> the architecture specific code).
> 

That's utterly bizarre.  Something strange is going on there.  I suspect
the right thing to do is to out-of-line the error case only, but even
that seems strange.  It should be something like four instructions inline.

>>
>> On x86, where there is only one caller of this, it really seems like it
>> ought to reduce the overhead to almost zero (since it most likely is
>> hidden in the pipeline.)
>>
>> I would like to suggest defining it inline if
>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
>> care about an architecture which doesn't have it.
> 
> But if there is only one caller, does the compiler is not suppose to
> inline the function based on options?

If it is marked static in the same file, yes, but you have it in a
different file from what I can tell.

> The assembly will call it too, so I would need an inline and a
> non-inline based on the caller.

Where?  I don't see that anywhere, at least for x86.

	-hpa
Thomas Garnier March 22, 2017, 9:11 p.m. UTC | #5
On Wed, Mar 22, 2017 at 1:49 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 03/22/17 13:41, Thomas Garnier wrote:
>>>> with the change below for additional feedback.
>>>
>>> Can you specify what that means?
>>
>> If I set inline by default, the compiler chose not to inline it on
>> x86. If I force inline the size impact was actually bigger (without
>> the architecture specific code).
>>
>
> That's utterly bizarre.  Something strange is going on there.  I suspect
> the right thing to do is to out-of-line the error case only, but even
> that seems strange.  It should be something like four instructions inline.
>

The compiler seemed to often inline other functions called by the
syscall handlers. I assume the growth was due to changes in code
optimization because the function is much larger at the end.

>>>
>>> On x86, where there is only one caller of this, it really seems like it
>>> ought to reduce the overhead to almost zero (since it most likely is
>>> hidden in the pipeline.)
>>>
>>> I would like to suggest defining it inline if
>>> CONFIG_ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE is set; I really don't
>>> care about an architecture which doesn't have it.
>>
>> But if there is only one caller, does the compiler is not suppose to
>> inline the function based on options?
>
> If it is marked static in the same file, yes, but you have it in a
> different file from what I can tell.

If we do global optimization, it should. Having it as a static inline
make it easier on all types of builds.

>
>> The assembly will call it too, so I would need an inline and a
>> non-inline based on the caller.
>
> Where?  I don't see that anywhere, at least for x86.

After the latest changes on x86, yes. On arm/arm64, we call it with
the CHECK_DATA_CORRUPTION config.

>
>         -hpa
>
diff mbox

Patch

===========

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170308
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 005df7c825f5..6d48e18e6f09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@  config X86
  select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  select ARCH_MIGHT_HAVE_PC_PARPORT
  select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
  select ARCH_SUPPORTS_ATOMIC_RMW
  select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
  select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..525edbb77f03 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@ 
 #include <linux/context_tracking.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>

 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@  __visible inline void
prepare_exit_to_usermode(struct pt_regs *regs)
  struct thread_info *ti = current_thread_info();
  u32 cached_flags;

+ verify_pre_usermode_state();
+
  if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
  local_irq_disable();

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..c079b010205c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@  entry_SYSCALL_64_fastpath:
  testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
  jnz 1f

+ /*
+ * If address limit is not based on user-mode, jump to slow path for
+ * additional security checks.
+ */
+ movq $TASK_SIZE_MAX, %rcx
+ cmp %rcx, TASK_addr_limit(%r11)
+ jnz 1f
+
  LOCKDEP_SYS_EXIT
  TRACE_IRQS_ON /* user mode is traced as IRQs on */
  movq RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h
b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..0fbbb79d058c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,4 +76,15 @@  typedef struct { pteval_t pte; } pte_t;

 #define EARLY_DYNAMIC_PAGE_TABLES 64

+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..9bc99d37133e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -829,17 +829,6 @@  static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)

 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */