diff mbox

[v3,3/4] arm/syscalls: Optimize address limit check

Message ID 20170814213732.104301-3-thgarnie@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Garnier Aug. 14, 2017, 9:37 p.m. UTC
Disable the generic address limit check in favor of an architecture
specific optimized implementation. The generic implementation using
pending work flags did not work well with ARM and alignment faults.

The address limit is checked on each syscall return path to user-mode
path as well as the irq user-mode return function. If the address limit
was changed, a function is called to report data corruption (stopping
the kernel or process based on configuration).

The address limit check has to be done before any pending work because
they can reset the address limit and the process is killed using a
SIGKILL signal. For example the lkdtm address limit check does not work
because the signal to kill the process will reset the user-mode address
limit.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
 arch/arm/kernel/entry-common.S | 11 +++++++++++
 arch/arm/kernel/signal.c       |  7 +++++++
 2 files changed, 18 insertions(+)

Comments

Thomas Garnier Aug. 22, 2017, 4:42 p.m. UTC | #1
On Mon, Aug 14, 2017 at 2:37 PM, Thomas Garnier <thgarnie@google.com> wrote:
> Disable the generic address limit check in favor of an architecture
> specific optimized implementation. The generic implementation using
> pending work flags did not work well with ARM and alignment faults.
>
> The address limit is checked on each syscall return path to user-mode
> path as well as the irq user-mode return function. If the address limit
> was changed, a function is called to report data corruption (stopping
> the kernel or process based on configuration).
>
> The address limit check has to be done before any pending work because
> they can reset the address limit and the process is killed using a
> SIGKILL signal. For example the lkdtm address limit check does not work
> because the signal to kill the process will reset the user-mode address
> limit.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>

Any feedback?

> ---
>  arch/arm/kernel/entry-common.S | 11 +++++++++++
>  arch/arm/kernel/signal.c       |  7 +++++++
>  2 files changed, 18 insertions(+)
>
> diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
> index 0b60adf4a5d9..99c908226065 100644
> --- a/arch/arm/kernel/entry-common.S
> +++ b/arch/arm/kernel/entry-common.S
> @@ -12,6 +12,7 @@
>  #include <asm/unistd.h>
>  #include <asm/ftrace.h>
>  #include <asm/unwind.h>
> +#include <asm/memory.h>
>  #ifdef CONFIG_AEABI
>  #include <asm/unistd-oabi.h>
>  #endif
> @@ -48,10 +49,14 @@ ret_fast_syscall:
>   UNWIND(.fnstart       )
>   UNWIND(.cantunwind    )
>         disable_irq_notrace                     @ disable interrupts
> +       ldr     r2, [tsk, #TI_ADDR_LIMIT]
> +       cmp     r2, #TASK_SIZE
> +       blne    addr_limit_check_failed
>         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
>         tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
>         bne     fast_work_pending
>
> +
>         /* perform architecture specific actions before user return */
>         arch_ret_to_user r1, lr
>
> @@ -74,6 +79,9 @@ ret_fast_syscall:
>   UNWIND(.cantunwind    )
>         str     r0, [sp, #S_R0 + S_OFF]!        @ save returned r0
>         disable_irq_notrace                     @ disable interrupts
> +       ldr     r2, [tsk, #TI_ADDR_LIMIT]
> +       cmp     r2, #TASK_SIZE
> +       blne    addr_limit_check_failed
>         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
>         tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
>         beq     no_work_pending
> @@ -106,6 +114,9 @@ ENTRY(ret_to_user)
>  ret_slow_syscall:
>         disable_irq_notrace                     @ disable interrupts
>  ENTRY(ret_to_user_from_irq)
> +       ldr     r2, [tsk, #TI_ADDR_LIMIT]
> +       cmp     r2, #TASK_SIZE
> +       blne    addr_limit_check_failed
>         ldr     r1, [tsk, #TI_FLAGS]
>         tst     r1, #_TIF_WORK_MASK
>         bne     slow_work_pending
> diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
> index 5814298ef0b7..b67ae12503f3 100644
> --- a/arch/arm/kernel/signal.c
> +++ b/arch/arm/kernel/signal.c
> @@ -14,6 +14,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/tracehook.h>
>  #include <linux/uprobes.h>
> +#include <linux/syscalls.h>
>
>  #include <asm/elf.h>
>  #include <asm/cacheflush.h>
> @@ -673,3 +674,9 @@ struct page *get_signal_page(void)
>
>         return page;
>  }
> +
> +/* Defer to generic check */
> +asmlinkage void addr_limit_check_failed(void)
> +{
> +       addr_limit_user_check();
> +}
> --
> 2.14.1.480.gb18f417b89-goog
>
Thomas Garnier Aug. 29, 2017, 2:32 p.m. UTC | #2
On Tue, Aug 22, 2017 at 9:42 AM, Thomas Garnier <thgarnie@google.com> wrote:
> On Mon, Aug 14, 2017 at 2:37 PM, Thomas Garnier <thgarnie@google.com> wrote:
>> Disable the generic address limit check in favor of an architecture
>> specific optimized implementation. The generic implementation using
>> pending work flags did not work well with ARM and alignment faults.
>>
>> The address limit is checked on each syscall return path to user-mode
>> path as well as the irq user-mode return function. If the address limit
>> was changed, a function is called to report data corruption (stopping
>> the kernel or process based on configuration).
>>
>> The address limit check has to be done before any pending work because
>> they can reset the address limit and the process is killed using a
>> SIGKILL signal. For example the lkdtm address limit check does not work
>> because the signal to kill the process will reset the user-mode address
>> limit.
>>
>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>
> Any feedback?

CCing LW@karo-electronics.de who experienced the same issue this patch
proposal fix.

Russel: Any feedback?

>
>> ---
>>  arch/arm/kernel/entry-common.S | 11 +++++++++++
>>  arch/arm/kernel/signal.c       |  7 +++++++
>>  2 files changed, 18 insertions(+)
>>
>> diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
>> index 0b60adf4a5d9..99c908226065 100644
>> --- a/arch/arm/kernel/entry-common.S
>> +++ b/arch/arm/kernel/entry-common.S
>> @@ -12,6 +12,7 @@
>>  #include <asm/unistd.h>
>>  #include <asm/ftrace.h>
>>  #include <asm/unwind.h>
>> +#include <asm/memory.h>
>>  #ifdef CONFIG_AEABI
>>  #include <asm/unistd-oabi.h>
>>  #endif
>> @@ -48,10 +49,14 @@ ret_fast_syscall:
>>   UNWIND(.fnstart       )
>>   UNWIND(.cantunwind    )
>>         disable_irq_notrace                     @ disable interrupts
>> +       ldr     r2, [tsk, #TI_ADDR_LIMIT]
>> +       cmp     r2, #TASK_SIZE
>> +       blne    addr_limit_check_failed
>>         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
>>         tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
>>         bne     fast_work_pending
>>
>> +
>>         /* perform architecture specific actions before user return */
>>         arch_ret_to_user r1, lr
>>
>> @@ -74,6 +79,9 @@ ret_fast_syscall:
>>   UNWIND(.cantunwind    )
>>         str     r0, [sp, #S_R0 + S_OFF]!        @ save returned r0
>>         disable_irq_notrace                     @ disable interrupts
>> +       ldr     r2, [tsk, #TI_ADDR_LIMIT]
>> +       cmp     r2, #TASK_SIZE
>> +       blne    addr_limit_check_failed
>>         ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
>>         tst     r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
>>         beq     no_work_pending
>> @@ -106,6 +114,9 @@ ENTRY(ret_to_user)
>>  ret_slow_syscall:
>>         disable_irq_notrace                     @ disable interrupts
>>  ENTRY(ret_to_user_from_irq)
>> +       ldr     r2, [tsk, #TI_ADDR_LIMIT]
>> +       cmp     r2, #TASK_SIZE
>> +       blne    addr_limit_check_failed
>>         ldr     r1, [tsk, #TI_FLAGS]
>>         tst     r1, #_TIF_WORK_MASK
>>         bne     slow_work_pending
>> diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
>> index 5814298ef0b7..b67ae12503f3 100644
>> --- a/arch/arm/kernel/signal.c
>> +++ b/arch/arm/kernel/signal.c
>> @@ -14,6 +14,7 @@
>>  #include <linux/uaccess.h>
>>  #include <linux/tracehook.h>
>>  #include <linux/uprobes.h>
>> +#include <linux/syscalls.h>
>>
>>  #include <asm/elf.h>
>>  #include <asm/cacheflush.h>
>> @@ -673,3 +674,9 @@ struct page *get_signal_page(void)
>>
>>         return page;
>>  }
>> +
>> +/* Defer to generic check */
>> +asmlinkage void addr_limit_check_failed(void)
>> +{
>> +       addr_limit_user_check();
>> +}
>> --
>> 2.14.1.480.gb18f417b89-goog
>>
>
>
>
> --
> Thomas
Kees Cook Aug. 29, 2017, 7:54 p.m. UTC | #3
On Tue, Aug 29, 2017 at 7:32 AM, Thomas Garnier <thgarnie@google.com> wrote:
> On Tue, Aug 22, 2017 at 9:42 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> On Mon, Aug 14, 2017 at 2:37 PM, Thomas Garnier <thgarnie@google.com> wrote:
>>> Disable the generic address limit check in favor of an architecture
>>> specific optimized implementation. The generic implementation using
>>> pending work flags did not work well with ARM and alignment faults.
>>>
>>> The address limit is checked on each syscall return path to user-mode
>>> path as well as the irq user-mode return function. If the address limit
>>> was changed, a function is called to report data corruption (stopping
>>> the kernel or process based on configuration).
>>>
>>> The address limit check has to be done before any pending work because
>>> they can reset the address limit and the process is killed using a
>>> SIGKILL signal. For example the lkdtm address limit check does not work
>>> because the signal to kill the process will reset the user-mode address
>>> limit.
>>>
>>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>>
>> Any feedback?
>
> CCing LW@karo-electronics.de who experienced the same issue this patch
> proposal fix.
>
> Russell: Any feedback?

These implement Russell's suggestion. An Ack here would be nice. :) I
can't throw these into the ARM patch tracker because they depend on
stuff in -next (and the commit that needs to be reverted is in tglx's
tree).

Regardless, these all test out correctly for me, so:

Reviewed-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>

In a perfect world, these 4 patches should go together with the other
address limit check patches in tglx's tree. Thomas (Gleixner), can you
update your tree for the merge window? At the very least, we need to
revert 73ac5d6a2b6ac ("arm/syscalls: Check address limit on user-mode
return"), which has caused infinite loops in some cases. Better to
take all 4 patches in this series, though.

Thanks!

-Kees
Leonard Crestez Sept. 5, 2017, 10:46 a.m. UTC | #4
On Tue, 2017-08-29 at 12:54 -0700, Kees Cook wrote:
> On Tue, Aug 29, 2017 at 7:32 AM, Thomas Garnier <thgarnie@google.com> wrote:
> > On Tue, Aug 22, 2017 at 9:42 AM, Thomas Garnier <thgarnie@google.com> wrote:
> > > On Mon, Aug 14, 2017 at 2:37 PM, Thomas Garnier <thgarnie@google.com> wrote:
> > > > 
> > > > Disable the generic address limit check in favor of an architecture
> > > > specific optimized implementation. The generic implementation using
> > > > pending work flags did not work well with ARM and alignment faults.
> > > > 
> > > > The address limit is checked on each syscall return path to user-mode
> > > > path as well as the irq user-mode return function. If the address limit
> > > > was changed, a function is called to report data corruption (stopping
> > > > the kernel or process based on configuration).
> > > > 
> > > > The address limit check has to be done before any pending work because
> > > > they can reset the address limit and the process is killed using a
> > > > SIGKILL signal. For example the lkdtm address limit check does not work
> > > > because the signal to kill the process will reset the user-mode address
> > > > limit.
> > > > 
> > > > Signed-off-by: Thomas Garnier <thgarnie@google.com>

> > > Any feedback?

> > CCing LW@karo-electronics.de who experienced the same issue this patch
> > proposal fix.
> > 
> > Russell: Any feedback?

> These implement Russell's suggestion. An Ack here would be nice. :) I
> can't throw these into the ARM patch tracker because they depend on
> stuff in -next (and the commit that needs to be reverted is in tglx's
> tree).
> 
> Regardless, these all test out correctly for me, so:
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Tested-by: Kees Cook <keescook@chromium.org>
> 
> In a perfect world, these 4 patches should go together with the other
> address limit check patches in tglx's tree. Thomas (Gleixner), can you
> update your tree for the merge window? At the very least, we need to
> revert 73ac5d6a2b6ac ("arm/syscalls: Check address limit on user-mode
> return"), which has caused infinite loops in some cases. Better to
> take all 4 patches in this series, though.

I also reported this infinite loop issue, several weeks ago:

https://lkml.org/lkml/2017/7/18/702

It seems that no fix was committed since then and the buggy patch made
it's way into Linus's tree after the 4.13 release.

Perhaps when there is long debate about the "proper" fix the original
patch should be reverted first, separately? In this particular case the
series fixing the bug actually includes the revert.

Anyway, I check that this v3 works on my board which was reproducing
the issue while booting from nfs (imx6sl-evk). The most likely reason
it's easy to reproduce here is a network driver issue where headers are
not correctly aligned to 4. This causes lots of alignment faults.

Tested-by: Leonard Crestez <leonard.crestez@nxp.com>

--
Regards,
Leonard
diff mbox

Patch

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 0b60adf4a5d9..99c908226065 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@ 
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -48,10 +49,14 @@  ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
 
+
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
 
@@ -74,6 +79,9 @@  ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -106,6 +114,9 @@  ENTRY(ret_to_user)
 ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5814298ef0b7..b67ae12503f3 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,6 +14,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -673,3 +674,9 @@  struct page *get_signal_page(void)
 
 	return page;
 }
+
+/* Defer to generic check */
+asmlinkage void addr_limit_check_failed(void)
+{
+	addr_limit_user_check();
+}