diff mbox

[v6,2/4] x86/syscalls: Specific usage of verify_pre_usermode_state

Message ID 20170404174727.35478-2-thgarnie@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Garnier April 4, 2017, 5:47 p.m. UTC
Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170404
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

Comments

H. Peter Anvin April 4, 2017, 6:11 p.m. UTC | #1
On 04/04/17 10:47, Thomas Garnier wrote:
> Implement specific usage of verify_pre_usermode_state for user-mode
> returns for x86.
> 
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>  
> +	/*
> +	 * If address limit is not based on user-mode, jump to slow path for
> +	 * additional security checks.
> +	 */
> +	movq	$TASK_SIZE_MAX, %rcx
> +	cmp	%rcx, TASK_addr_limit(%r11)
> +	jnz	1f
> +
>  	LOCKDEP_SYS_EXIT

Nitpick: use jne not jnz when comparing for equality.  Same instruction
but more readable.

	-hpa
H. Peter Anvin April 4, 2017, 6:27 p.m. UTC | #2
On 04/04/17 10:47, Thomas Garnier wrote:
> diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
> index 516593e66bd6..12fa851c7fa8 100644
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -78,4 +78,15 @@ typedef struct { pteval_t pte; } pte_t;
>  
>  #define EARLY_DYNAMIC_PAGE_TABLES	64
>  
> +/*
> + * User space process size. 47bits minus one guard page.  The guard
> + * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> + * the highest possible canonical userspace address, then that
> + * syscall will enter the kernel with a non-canonical return
> + * address, and SYSRET will explode dangerously.  We avoid this
> + * particular problem by preventing anything from being mapped
> + * at the maximum canonical address.
> + */
> +#define TASK_SIZE_MAX	((_AC(1, UL) << 47) - PAGE_SIZE)
> +
>  #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 3cada998a402..e80822582d3e 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -825,17 +825,6 @@ static inline void spin_lock_prefetch(const void *x)
>  #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
>  
>  #else
> -/*
> - * User space process size. 47bits minus one guard page.  The guard
> - * page is necessary on Intel CPUs: if a SYSCALL instruction is at
> - * the highest possible canonical userspace address, then that
> - * syscall will enter the kernel with a non-canonical return
> - * address, and SYSRET will explode dangerously.  We avoid this
> - * particular problem by preventing anything from being mapped
> - * at the maximum canonical address.
> - */
> -#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
> -
>  /* This decides where the kernel will search for a free chunk of vm
>   * space during mmap's.
>   */
> 

This should be an entirely separate patch; if nothing else you need to
explain it in the comments.

Also, you say this is for "x86", but I still don't see any code for i386
whatsoever.  Have you verified *all* the i386 and i386-compat paths to
make sure they go via prepare_exit_to_usermode()?  [Cc: Andy]

Finally, I can't really believe I'm the only person for whom "Specific
usage of verity_pre_usermode_state" is completely opaque.

	-hpa
Borislav Petkov April 4, 2017, 6:54 p.m. UTC | #3
On Tue, Apr 04, 2017 at 11:27:07AM -0700, H. Peter Anvin wrote:
> Finally, I can't really believe I'm the only person for whom "Specific
> usage of verity_pre_usermode_state" is completely opaque.

No, you're not. I'm missing the usual layout of the commit message
"Problem is A, we need to do B, because of C." And this particular one
needs to be pretty verbose as it is tricky lowlevel, userspace return
blabla code and I'd prefer not to have to rhyme up together myself
what's going on and what we're fixing here.
Thomas Garnier April 4, 2017, 7:21 p.m. UTC | #4
On Tue, Apr 4, 2017 at 11:27 AM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 04/04/17 10:47, Thomas Garnier wrote:
>> diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
>> index 516593e66bd6..12fa851c7fa8 100644
>> --- a/arch/x86/include/asm/pgtable_64_types.h
>> +++ b/arch/x86/include/asm/pgtable_64_types.h
>> @@ -78,4 +78,15 @@ typedef struct { pteval_t pte; } pte_t;
>>
>>  #define EARLY_DYNAMIC_PAGE_TABLES    64
>>
>> +/*
>> + * User space process size. 47bits minus one guard page.  The guard
>> + * page is necessary on Intel CPUs: if a SYSCALL instruction is at
>> + * the highest possible canonical userspace address, then that
>> + * syscall will enter the kernel with a non-canonical return
>> + * address, and SYSRET will explode dangerously.  We avoid this
>> + * particular problem by preventing anything from being mapped
>> + * at the maximum canonical address.
>> + */
>> +#define TASK_SIZE_MAX        ((_AC(1, UL) << 47) - PAGE_SIZE)
>> +
>>  #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
>> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
>> index 3cada998a402..e80822582d3e 100644
>> --- a/arch/x86/include/asm/processor.h
>> +++ b/arch/x86/include/asm/processor.h
>> @@ -825,17 +825,6 @@ static inline void spin_lock_prefetch(const void *x)
>>  #define KSTK_ESP(task)               (task_pt_regs(task)->sp)
>>
>>  #else
>> -/*
>> - * User space process size. 47bits minus one guard page.  The guard
>> - * page is necessary on Intel CPUs: if a SYSCALL instruction is at
>> - * the highest possible canonical userspace address, then that
>> - * syscall will enter the kernel with a non-canonical return
>> - * address, and SYSRET will explode dangerously.  We avoid this
>> - * particular problem by preventing anything from being mapped
>> - * at the maximum canonical address.
>> - */
>> -#define TASK_SIZE_MAX        ((1UL << 47) - PAGE_SIZE)
>> -
>>  /* This decides where the kernel will search for a free chunk of vm
>>   * space during mmap's.
>>   */
>>
>
> This should be an entirely separate patch; if nothing else you need to
> explain it in the comments.

I will explain it in the commit message, it should be easier than a
separate patch.

>
> Also, you say this is for "x86", but I still don't see any code for i386
> whatsoever.  Have you verified *all* the i386 and i386-compat paths to
> make sure they go via prepare_exit_to_usermode()?  [Cc: Andy]

I did but I will do it again for the next iteration.

>
> Finally, I can't really believe I'm the only person for whom "Specific
> usage of verity_pre_usermode_state" is completely opaque.

I agree, I will improve it.

>
>         -hpa
>
diff mbox

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9a5af1e1cd61..3d7eb9298f2d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@  config X86
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index cdefcfdd9e63..76ef050255c9 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -23,6 +23,7 @@ 
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
 #include <linux/livepatch.h>
+#include <linux/syscalls.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -183,6 +184,8 @@  __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
+	verify_pre_usermode_state();
+
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
 		local_irq_disable();
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..c079b010205c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@  entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	1f
 
+	/*
+	 * If address limit is not based on user-mode, jump to slow path for
+	 * additional security checks.
+	 */
+	movq	$TASK_SIZE_MAX, %rcx
+	cmp	%rcx, TASK_addr_limit(%r11)
+	jnz	1f
+
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 516593e66bd6..12fa851c7fa8 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -78,4 +78,15 @@  typedef struct { pteval_t pte; } pte_t;
 
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX	((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..e80822582d3e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -825,17 +825,6 @@  static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */