diff mbox

arm64: usercopy: Implement stack frame object validation

Message ID 1485351983-13873-1-git-send-email-kpark3469@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

kpark3469@gmail.com Jan. 25, 2017, 1:46 p.m. UTC
From: Sahara <keun-o.park@darkmatter.ae>

This implements arch_within_stack_frames() for arm64 that should
validate if a given object is contained by a kernel stack frame.

Signed-off-by: Sahara <keun-o.park@darkmatter.ae>
---
 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Will Deacon Jan. 25, 2017, 1:54 p.m. UTC | #1
[Adding Akashi, since he'a had fun and games with arm64 stack unwinding
 and I bet he can find a problem with this patch!]

On Wed, Jan 25, 2017 at 05:46:23PM +0400, kpark3469@gmail.com wrote:
> From: Sahara <keun-o.park@darkmatter.ae>
> 
> This implements arch_within_stack_frames() for arm64 that should
> validate if a given object is contained by a kernel stack frame.
> 
> Signed-off-by: Sahara <keun-o.park@darkmatter.ae>
> ---
>  arch/arm64/Kconfig                   |  1 +
>  arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 1117421..8bf70b4 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -97,6 +97,7 @@ config ARM64
>  	select HAVE_SYSCALL_TRACEPOINTS
>  	select HAVE_KPROBES
>  	select HAVE_KRETPROBES if HAVE_KPROBES
> +	select HAVE_ARCH_WITHIN_STACK_FRAMES
>  	select IOMMU_DMA if IOMMU_SUPPORT
>  	select IRQ_DOMAIN
>  	select IRQ_FORCED_THREADING
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index 46c3b93..f610c44 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -68,7 +68,62 @@ struct thread_info {
>  #define thread_saved_fp(tsk)	\
>  	((unsigned long)(tsk->thread.cpu_context.fp))
>  
> +/*
> + * Walks up the stack frames to make sure that the specified object is
> + * entirely contained by a single stack frame.
> + *
> + * Returns:
> + *		 1 if within a frame
> + *		-1 if placed across a frame boundary (or outside stack)
> + *		 0 unable to determine (no frame pointers, etc)
> + */
> +static inline int arch_within_stack_frames(const void * const stack,
> +					   const void * const stackend,
> +					   const void *obj, unsigned long len)
> +{
> +#if defined(CONFIG_FRAME_POINTER)
> +	const void *oldframe;
> +	const void *callee_fp = NULL;
> +	const void *caller_fp = NULL;
> +
> +	oldframe = __builtin_frame_address(1);
> +	if (oldframe) {
> +		callee_fp = __builtin_frame_address(2);
> +		if (callee_fp)
> +			caller_fp = __builtin_frame_address(3);
> +	}
> +	/*
> +	 * low ----------------------------------------------> high
> +	 * [callee_fp][lr][args][local vars][caller_fp'][lr']
> +	 *                ^----------------^
> +	 *               allow copies only within here
> +	 */

Which compilers have you tested this with? The GCC folks don't guarantee a
frame layout, and they have changed it in the past, so I suspect this is
pretty fragile. In particularly, if __builtin_frame_address just points
at the frame record, then I don't think you can make assumptions about the
placement of local variables and arguments with respect to that.

Will
kpark3469@gmail.com Jan. 25, 2017, 2:44 p.m. UTC | #2
On Wed, Jan 25, 2017 at 5:54 PM, Will Deacon <will.deacon@arm.com> wrote:
> [Adding Akashi, since he'a had fun and games with arm64 stack unwinding
>  and I bet he can find a problem with this patch!]
>
> On Wed, Jan 25, 2017 at 05:46:23PM +0400, kpark3469@gmail.com wrote:
>> From: Sahara <keun-o.park@darkmatter.ae>
>>
>> This implements arch_within_stack_frames() for arm64 that should
>> validate if a given object is contained by a kernel stack frame.
>>
>> Signed-off-by: Sahara <keun-o.park@darkmatter.ae>
>> ---
>>  arch/arm64/Kconfig                   |  1 +
>>  arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
>>  2 files changed, 56 insertions(+)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 1117421..8bf70b4 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -97,6 +97,7 @@ config ARM64
>>       select HAVE_SYSCALL_TRACEPOINTS
>>       select HAVE_KPROBES
>>       select HAVE_KRETPROBES if HAVE_KPROBES
>> +     select HAVE_ARCH_WITHIN_STACK_FRAMES
>>       select IOMMU_DMA if IOMMU_SUPPORT
>>       select IRQ_DOMAIN
>>       select IRQ_FORCED_THREADING
>> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
>> index 46c3b93..f610c44 100644
>> --- a/arch/arm64/include/asm/thread_info.h
>> +++ b/arch/arm64/include/asm/thread_info.h
>> @@ -68,7 +68,62 @@ struct thread_info {
>>  #define thread_saved_fp(tsk) \
>>       ((unsigned long)(tsk->thread.cpu_context.fp))
>>
>> +/*
>> + * Walks up the stack frames to make sure that the specified object is
>> + * entirely contained by a single stack frame.
>> + *
>> + * Returns:
>> + *            1 if within a frame
>> + *           -1 if placed across a frame boundary (or outside stack)
>> + *            0 unable to determine (no frame pointers, etc)
>> + */
>> +static inline int arch_within_stack_frames(const void * const stack,
>> +                                        const void * const stackend,
>> +                                        const void *obj, unsigned long len)
>> +{
>> +#if defined(CONFIG_FRAME_POINTER)
>> +     const void *oldframe;
>> +     const void *callee_fp = NULL;
>> +     const void *caller_fp = NULL;
>> +
>> +     oldframe = __builtin_frame_address(1);
>> +     if (oldframe) {
>> +             callee_fp = __builtin_frame_address(2);
>> +             if (callee_fp)
>> +                     caller_fp = __builtin_frame_address(3);
>> +     }
>> +     /*
>> +      * low ----------------------------------------------> high
>> +      * [callee_fp][lr][args][local vars][caller_fp'][lr']
>> +      *                ^----------------^
>> +      *               allow copies only within here
>> +      */
>
> Which compilers have you tested this with? The GCC folks don't guarantee a
> frame layout, and they have changed it in the past, so I suspect this is
> pretty fragile. In particularly, if __builtin_frame_address just points
> at the frame record, then I don't think you can make assumptions about the
> placement of local variables and arguments with respect to that.
>
> Will

$ aarch64-linux-android-gcc --version
aarch64-linux-android-gcc (GCC) 4.9 20150123 (prerelease)

I tested this with aosp 7.1 android toolchain on Pixel.
Maybe I need a suggestion to make this robust.

Thanks.

BR
Sahara
Kees Cook Jan. 26, 2017, 12:58 a.m. UTC | #3
On Wed, Jan 25, 2017 at 6:44 AM, Keun-O Park <kpark3469@gmail.com> wrote:
> On Wed, Jan 25, 2017 at 5:54 PM, Will Deacon <will.deacon@arm.com> wrote:
>> [Adding Akashi, since he'a had fun and games with arm64 stack unwinding
>>  and I bet he can find a problem with this patch!]
>>
>> On Wed, Jan 25, 2017 at 05:46:23PM +0400, kpark3469@gmail.com wrote:
>>> From: Sahara <keun-o.park@darkmatter.ae>
>>>
>>> This implements arch_within_stack_frames() for arm64 that should
>>> validate if a given object is contained by a kernel stack frame.
>>>
>>> Signed-off-by: Sahara <keun-o.park@darkmatter.ae>

Awesome! Thanks for working on this!

>>> ---
>>>  arch/arm64/Kconfig                   |  1 +
>>>  arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
>>>  2 files changed, 56 insertions(+)
>>>
>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>> index 1117421..8bf70b4 100644
>>> --- a/arch/arm64/Kconfig
>>> +++ b/arch/arm64/Kconfig
>>> @@ -97,6 +97,7 @@ config ARM64
>>>       select HAVE_SYSCALL_TRACEPOINTS
>>>       select HAVE_KPROBES
>>>       select HAVE_KRETPROBES if HAVE_KPROBES
>>> +     select HAVE_ARCH_WITHIN_STACK_FRAMES
>>>       select IOMMU_DMA if IOMMU_SUPPORT
>>>       select IRQ_DOMAIN
>>>       select IRQ_FORCED_THREADING
>>> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
>>> index 46c3b93..f610c44 100644
>>> --- a/arch/arm64/include/asm/thread_info.h
>>> +++ b/arch/arm64/include/asm/thread_info.h
>>> @@ -68,7 +68,62 @@ struct thread_info {
>>>  #define thread_saved_fp(tsk) \
>>>       ((unsigned long)(tsk->thread.cpu_context.fp))
>>>
>>> +/*
>>> + * Walks up the stack frames to make sure that the specified object is
>>> + * entirely contained by a single stack frame.
>>> + *
>>> + * Returns:
>>> + *            1 if within a frame
>>> + *           -1 if placed across a frame boundary (or outside stack)
>>> + *            0 unable to determine (no frame pointers, etc)
>>> + */
>>> +static inline int arch_within_stack_frames(const void * const stack,
>>> +                                        const void * const stackend,
>>> +                                        const void *obj, unsigned long len)
>>> +{
>>> +#if defined(CONFIG_FRAME_POINTER)
>>> +     const void *oldframe;
>>> +     const void *callee_fp = NULL;
>>> +     const void *caller_fp = NULL;
>>> +
>>> +     oldframe = __builtin_frame_address(1);
>>> +     if (oldframe) {
>>> +             callee_fp = __builtin_frame_address(2);
>>> +             if (callee_fp)
>>> +                     caller_fp = __builtin_frame_address(3);
>>> +     }
>>> +     /*
>>> +      * low ----------------------------------------------> high
>>> +      * [callee_fp][lr][args][local vars][caller_fp'][lr']
>>> +      *                ^----------------^
>>> +      *               allow copies only within here
>>> +      */
>>
>> Which compilers have you tested this with? The GCC folks don't guarantee a
>> frame layout, and they have changed it in the past, so I suspect this is
>> pretty fragile. In particularly, if __builtin_frame_address just points
>> at the frame record, then I don't think you can make assumptions about the
>> placement of local variables and arguments with respect to that.

How often has it changed in the past? That seems like a strange thing
to change; either it's aligned and efficiently organized, or ... not?

>>
>> Will
>
> $ aarch64-linux-android-gcc --version
> aarch64-linux-android-gcc (GCC) 4.9 20150123 (prerelease)
>
> I tested this with aosp 7.1 android toolchain on Pixel.
> Maybe I need a suggestion to make this robust.

I wonder if some kind of BUILD_BUG_ON() magic could be used to
validate the relative positions of things on the stack? Or in the
worst case, a boot-time BUG() check...

Did you happen to test the lkdtm USERCOPY_STACK_FRAME_TO and
USERCOPY_STACK_FRAME_FROM tests to make sure they tripped correctly?

-Kees
AKASHI Takahiro Jan. 26, 2017, 7:10 a.m. UTC | #4
On Wed, Jan 25, 2017 at 01:54:10PM +0000, Will Deacon wrote:
> [Adding Akashi, since he'a had fun and games with arm64 stack unwinding
>  and I bet he can find a problem with this patch!]

I have had hard time to play with that :)

> On Wed, Jan 25, 2017 at 05:46:23PM +0400, kpark3469@gmail.com wrote:
> > From: Sahara <keun-o.park@darkmatter.ae>
> > 
> > This implements arch_within_stack_frames() for arm64 that should
> > validate if a given object is contained by a kernel stack frame.
> > 
> > Signed-off-by: Sahara <keun-o.park@darkmatter.ae>
> > ---
> >  arch/arm64/Kconfig                   |  1 +
> >  arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
> >  2 files changed, 56 insertions(+)
> > 
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 1117421..8bf70b4 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -97,6 +97,7 @@ config ARM64
> >  	select HAVE_SYSCALL_TRACEPOINTS
> >  	select HAVE_KPROBES
> >  	select HAVE_KRETPROBES if HAVE_KPROBES
> > +	select HAVE_ARCH_WITHIN_STACK_FRAMES
> >  	select IOMMU_DMA if IOMMU_SUPPORT
> >  	select IRQ_DOMAIN
> >  	select IRQ_FORCED_THREADING
> > diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> > index 46c3b93..f610c44 100644
> > --- a/arch/arm64/include/asm/thread_info.h
> > +++ b/arch/arm64/include/asm/thread_info.h
> > @@ -68,7 +68,62 @@ struct thread_info {
> >  #define thread_saved_fp(tsk)	\
> >  	((unsigned long)(tsk->thread.cpu_context.fp))
> >  
> > +/*
> > + * Walks up the stack frames to make sure that the specified object is
> > + * entirely contained by a single stack frame.
> > + *
> > + * Returns:
> > + *		 1 if within a frame
> > + *		-1 if placed across a frame boundary (or outside stack)
> > + *		 0 unable to determine (no frame pointers, etc)
> > + */
> > +static inline int arch_within_stack_frames(const void * const stack,
> > +					   const void * const stackend,
> > +					   const void *obj, unsigned long len)
> > +{
> > +#if defined(CONFIG_FRAME_POINTER)

nitpick: s/#if defined()/#ifdef/, or just remove this guard?

> > +	const void *oldframe;
> > +	const void *callee_fp = NULL;
> > +	const void *caller_fp = NULL;
> > +
> > +	oldframe = __builtin_frame_address(1);
> > +	if (oldframe) {
> > +		callee_fp = __builtin_frame_address(2);
> > +		if (callee_fp)
> > +			caller_fp = __builtin_frame_address(3);
> > +	}
> > +	/*
> > +	 * low ----------------------------------------------> high
> > +	 * [callee_fp][lr][args][local vars][caller_fp'][lr']
> > +	 *                ^----------------^
> > +	 *               allow copies only within here
> > +	 */
> 
> Which compilers have you tested this with? The GCC folks don't guarantee a
> frame layout, and they have changed it in the past,

I don't know whether any changes have been made before or not, but

> so I suspect this is
> pretty fragile. In particularly, if __builtin_frame_address just points
> at the frame record, then I don't think you can make assumptions about the
> placement of local variables and arguments with respect to that.
> 
> Will

Yes and no.

AAPCS64 says,
- The stack implementation is full-descending (5.2.2)
- A process may only access (for reading or writing) the closed interval
  of the entire stack delimited by [SP, stack-base - 1]. (5.2.2.1)
- The location of the frame record within a stack frame is not specified
  (5.2.3)

To my best knowledge, dynamically allocated object (local variable) may be
allocated below the current frame pointer, decrementing the stack pointer.
Take a look at a simple example:

===8<===
#include <stdio.h>
#include <stdlib.h>

int main(int ac, char **av) {
	int array_size;
	register unsigned long sp asm("sp");

	if (ac < 2) {
		printf("No argument\n");
		return 1;
	}
	array_size = atoi(av[1]);
	printf("frame pointer: %lx\n", __builtin_frame_address(0));
	printf("Before dynamic alloc: %lx\n", sp);
	{
		long array[array_size];

		printf("After dynamic alloc: %lx\n", sp);
	}

	return 0;
}
===>8===

and the result (with gcc 5.3) is:
  frame pointer:        ffffe32bacd0
  Before dynamic alloc: ffffe32bacd0
  After dynamic alloc:  ffffe32bac70

Given this fact,

> > +	/*
> > +	 * low ----------------------------------------------> high
> > +	 * [callee_fp][lr][args][local vars][caller_fp'][lr']
> > +	 *                ^----------------^
> > +	 *               allow copies only within here
> > +	 */

this picture may not always be precise in that "local variables" are
local to the callee, OR possibly local to the *caller*.
However, the range check is done here in a while loop that walks through
the whole callstack chain, and so I assume that it would work in most cases
except the case that a callee function hits that usage.

I think there are a few (not many) places of such code in the kernel,
(around net IIRC, but don' know they call usercopy functions or not).

Thanks,
-Takahiro AKASHI
James Morse Jan. 26, 2017, 3:23 p.m. UTC | #5
Hi Sahara,

On 25/01/17 13:46, kpark3469@gmail.com wrote:
> From: Sahara <keun-o.park@darkmatter.ae>
> 
> This implements arch_within_stack_frames() for arm64 that should
> validate if a given object is contained by a kernel stack frame.

Thanks for wade-ing into this, walking the stack is horribly tricky!


> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index 46c3b93..f610c44 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -68,7 +68,62 @@ struct thread_info {
>  #define thread_saved_fp(tsk)	\
>  	((unsigned long)(tsk->thread.cpu_context.fp))
>  
> +/*
> + * Walks up the stack frames to make sure that the specified object is
> + * entirely contained by a single stack frame.
> + *
> + * Returns:
> + *		 1 if within a frame
> + *		-1 if placed across a frame boundary (or outside stack)
> + *		 0 unable to determine (no frame pointers, etc)

(Shame the enum in mm/usercopy.c that explains these isn't exposed)


> + */
> +static inline int arch_within_stack_frames(const void * const stack,
> +					   const void * const stackend,
> +					   const void *obj, unsigned long len)
> +{
> +#if defined(CONFIG_FRAME_POINTER)
> +	const void *oldframe;
> +	const void *callee_fp = NULL;
> +	const void *caller_fp = NULL;
> +
> +	oldframe = __builtin_frame_address(1);

So we always discard _our_ callers frame. This will vary depending on the
compilers choice on whether or not to inline this function. Its either
check_stack_object() (which is declared noinline) or __check_object_size().
I think this is fine either way as obj should never be in the stack-frames of
these functions.


> +	if (oldframe) {
> +		callee_fp = __builtin_frame_address(2);
> +		if (callee_fp)
> +			caller_fp = __builtin_frame_address(3);
> +	}
> +	/*
> +	 * low ----------------------------------------------> high
> +	 * [callee_fp][lr][args][local vars][caller_fp'][lr']

These are the caller's args and local_vars right?


> +	 *                ^----------------^
> +	 *               allow copies only within here
> +	 */
> +	while (stack <= callee_fp && callee_fp < stackend) {
> +		/*
> +		 * If obj + len extends past the caller frame, this
> +		 * check won't pass and the next frame will be 0,
> +		 * causing us to bail out and correctly report
> +		 * the copy as invalid.
> +		 */
> +		if (!caller_fp) {

> +			if (obj + len <= stackend)

Isn't this always true? check_stack_object() does this:
>	if (obj < stack || stackend < obj + len)
>		return BAD_STACK;


> +				return (obj >= callee_fp + 2 * sizeof(void *)) ?
> +					1 : -1;

You do this twice (and its pretty complicated), can you make it a macro with an
explanatory name? (I think its calculating caller_frame_end from callee_fp,
having something like caller_frame_start and caller_frame_end would make this
easier to follow).


> +			else
> +				return -1;
> +		}
> +		if (obj + len <= caller_fp)
> +			return (obj >= callee_fp + 2 * sizeof(void *)) ? 1 : -1;
> +		callee_fp = caller_fp;
> +		caller_fp = *(const void * const *)caller_fp;

You test caller_fp lies within the stack next time round the loop, what happens
if it is miss-aligned? unwind_frame() tests the new fp to check it was 16-byte
aligned. If not, its probably a corrupted stack frame.

I wonder if you can make use of unwind_frame()? We have existing machinery for
walking up the stack, (it takes a callback and you can stop the walk early).
If you move this function into arch/arm64/kernel/stacktrace.c, you could make
use of walk_stackframe().

I guess you aren't using it because it doesn't give you start/end ranges for
each stack frame, I think you can get away without this, the callback would only
needs to test that the provided frame->fp doesn't lie between obj and (obj+len).
Calculating the frame start and end is extra work for the bounds test - we
already know obj is contained by the stack so its just a question of whether it
overlaps an fp record.


> +	}
> +	return -1;

check_stack_object()'s use of task_stack_page(current) means you will never see
this run on the irq-stack, so no need to worry about stepping between stacks.

This looks correct to me, walking the stack is unavoidably complex, I wonder if
we can avoid having another stack walker by using the existing framework?




Thanks,

James
Yann Droneaud Jan. 26, 2017, 4:40 p.m. UTC | #6
Hi,

Le mercredi 25 janvier 2017 à 13:54 +0000, Will Deacon a écrit :
> diff --git a/arch/arm64/include/asm/thread_info.h
> > b/arch/arm64/include/asm/thread_info.h
> > index 46c3b93..f610c44 100644
> > --- a/arch/arm64/include/asm/thread_info.h
> > +++ b/arch/arm64/include/asm/thread_info.h
> > @@ -68,7 +68,62 @@ struct thread_info {
> > +	const void *oldframe;
> > +	const void *callee_fp = NULL;
> > +	const void *caller_fp = NULL;
> > +
> > +	oldframe = __builtin_frame_address(1);
> > +	if (oldframe) {
> > +		callee_fp = __builtin_frame_address(2);
> > +		if (callee_fp)
> > +			caller_fp = __builtin_frame_address(3);
> > +	}
> > 
> Which compilers have you tested this with? The GCC folks don't
> guarantee a frame layout, and they have changed it in the past, so I
> suspect this is pretty fragile. In particularly, if
> __builtin_frame_address just points at the frame record, then I don't
> think you can make assumptions about the placement of local variables
> and arguments with respect to that.
> 

https://gcc.gnu.org/onlinedocs/gcc-6.3.0/gcc/Return-Address.html#index-
g_t_005f_005fbuiltin_005fframe_005faddress-3701

"Calling this function with a nonzero argument can have unpredictable 
 effects, including crashing the calling program. As a result, calls 
 that are considered unsafe are diagnosed when the -Wframe-address 
 option is in effect. Such calls should only be made in debugging 
 situations."
Kees Cook Jan. 26, 2017, 5:36 p.m. UTC | #7
On Thu, Jan 26, 2017 at 8:40 AM, Yann Droneaud <ydroneaud@opteya.com> wrote:
> Hi,
>
> Le mercredi 25 janvier 2017 à 13:54 +0000, Will Deacon a écrit :
>> diff --git a/arch/arm64/include/asm/thread_info.h
>> > b/arch/arm64/include/asm/thread_info.h
>> > index 46c3b93..f610c44 100644
>> > --- a/arch/arm64/include/asm/thread_info.h
>> > +++ b/arch/arm64/include/asm/thread_info.h
>> > @@ -68,7 +68,62 @@ struct thread_info {
>> > +   const void *oldframe;
>> > +   const void *callee_fp = NULL;
>> > +   const void *caller_fp = NULL;
>> > +
>> > +   oldframe = __builtin_frame_address(1);
>> > +   if (oldframe) {
>> > +           callee_fp = __builtin_frame_address(2);
>> > +           if (callee_fp)
>> > +                   caller_fp = __builtin_frame_address(3);
>> > +   }
>> >
>> Which compilers have you tested this with? The GCC folks don't
>> guarantee a frame layout, and they have changed it in the past, so I
>> suspect this is pretty fragile. In particularly, if
>> __builtin_frame_address just points at the frame record, then I don't
>> think you can make assumptions about the placement of local variables
>> and arguments with respect to that.
>>
>
> https://gcc.gnu.org/onlinedocs/gcc-6.3.0/gcc/Return-Address.html#index-
> g_t_005f_005fbuiltin_005fframe_005faddress-3701
>
> "Calling this function with a nonzero argument can have unpredictable
>  effects, including crashing the calling program. As a result, calls
>  that are considered unsafe are diagnosed when the -Wframe-address
>  option is in effect. Such calls should only be made in debugging
>  situations."

It does work, though, and given the CONFIG_FRAME_POINTER check, I
think this is fine. The kernel explicitly disables -Wframe-address
since it gets used in a number of places.

-Kees
Will Deacon Jan. 26, 2017, 5:47 p.m. UTC | #8
On Thu, Jan 26, 2017 at 09:36:44AM -0800, Kees Cook wrote:
> On Thu, Jan 26, 2017 at 8:40 AM, Yann Droneaud <ydroneaud@opteya.com> wrote:
> > Le mercredi 25 janvier 2017 à 13:54 +0000, Will Deacon a écrit :
> >> diff --git a/arch/arm64/include/asm/thread_info.h
> >> > b/arch/arm64/include/asm/thread_info.h
> >> > index 46c3b93..f610c44 100644
> >> > --- a/arch/arm64/include/asm/thread_info.h
> >> > +++ b/arch/arm64/include/asm/thread_info.h
> >> > @@ -68,7 +68,62 @@ struct thread_info {
> >> > +   const void *oldframe;
> >> > +   const void *callee_fp = NULL;
> >> > +   const void *caller_fp = NULL;
> >> > +
> >> > +   oldframe = __builtin_frame_address(1);
> >> > +   if (oldframe) {
> >> > +           callee_fp = __builtin_frame_address(2);
> >> > +           if (callee_fp)
> >> > +                   caller_fp = __builtin_frame_address(3);
> >> > +   }
> >> >
> >> Which compilers have you tested this with? The GCC folks don't
> >> guarantee a frame layout, and they have changed it in the past, so I
> >> suspect this is pretty fragile. In particularly, if
> >> __builtin_frame_address just points at the frame record, then I don't
> >> think you can make assumptions about the placement of local variables
> >> and arguments with respect to that.
> >>
> >
> > https://gcc.gnu.org/onlinedocs/gcc-6.3.0/gcc/Return-Address.html#index-
> > g_t_005f_005fbuiltin_005fframe_005faddress-3701
> >
> > "Calling this function with a nonzero argument can have unpredictable
> >  effects, including crashing the calling program. As a result, calls
> >  that are considered unsafe are diagnosed when the -Wframe-address
> >  option is in effect. Such calls should only be made in debugging
> >  situations."
> 
> It does work, though, and given the CONFIG_FRAME_POINTER check, I
> think this is fine. The kernel explicitly disables -Wframe-address
> since it gets used in a number of places.

I would prefer to use the existing unwind_frame, as suggested by James,
if possible. I really don't like relying on unpredictable compiler behaviour
if we don't have to!

Will
kpark3469@gmail.com Jan. 30, 2017, 11:26 a.m. UTC | #9
Hello Kees,

Thanks for the suggestion about lkdtm. Yes, it worked correctly.
provoke-crash# echo USERCOPY_STACK_FRAME_TO > DIRECT
[11388.369172] lkdtm: Performing direct entry USERCOPY_STACK_FRAME_TO
[11388.369259] lkdtm: attempting good copy_to_user of local stack
[11388.369366] lkdtm: attempting bad copy_to_user of distant stack
[11388.369453] usercopy: kernel memory exposure attempt detected from
ffffffc87985fd60 (<process stack>) (32 bytes)

provoke-crash# echo USERCOPY_STACK_FRAME_FROM > DIRECT
[12687.156830] lkdtm: Performing direct entry USERCOPY_STACK_FRAME_FROM
[12687.156918] lkdtm: attempting good copy_from_user of local stack
[12687.156995] lkdtm: attempting bad copy_from_user of distant stack
[12687.157082] usercopy: kernel memory overwrite attempt detected to
ffffffc87985fd60 (<process stack>) (32 bytes)

One thing I want to ask is..
Does USERCOPY_HEAP_FLAG_FROM/TO work correctly in latest kernel?
Both on Pixel(v3.18) and on emulator(v4.10-rc5)
In these two cases the bad attempt passed. I guess the code for this
test might not be ready. Am I right?

Thanks.

BR
Sahara


On Thu, Jan 26, 2017 at 4:58 AM, Kees Cook <keescook@chromium.org> wrote:
> On Wed, Jan 25, 2017 at 6:44 AM, Keun-O Park <kpark3469@gmail.com> wrote:
>> On Wed, Jan 25, 2017 at 5:54 PM, Will Deacon <will.deacon@arm.com> wrote:
>>> [Adding Akashi, since he'a had fun and games with arm64 stack unwinding
>>>  and I bet he can find a problem with this patch!]
>>>
>>> On Wed, Jan 25, 2017 at 05:46:23PM +0400, kpark3469@gmail.com wrote:
>>>> From: Sahara <keun-o.park@darkmatter.ae>
>>>>
>>>> This implements arch_within_stack_frames() for arm64 that should
>>>> validate if a given object is contained by a kernel stack frame.
>>>>
>>>> Signed-off-by: Sahara <keun-o.park@darkmatter.ae>
>
> Awesome! Thanks for working on this!
>
>>>> ---
>>>>  arch/arm64/Kconfig                   |  1 +
>>>>  arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
>>>>  2 files changed, 56 insertions(+)
>>>>
>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>>> index 1117421..8bf70b4 100644
>>>> --- a/arch/arm64/Kconfig
>>>> +++ b/arch/arm64/Kconfig
>>>> @@ -97,6 +97,7 @@ config ARM64
>>>>       select HAVE_SYSCALL_TRACEPOINTS
>>>>       select HAVE_KPROBES
>>>>       select HAVE_KRETPROBES if HAVE_KPROBES
>>>> +     select HAVE_ARCH_WITHIN_STACK_FRAMES
>>>>       select IOMMU_DMA if IOMMU_SUPPORT
>>>>       select IRQ_DOMAIN
>>>>       select IRQ_FORCED_THREADING
>>>> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
>>>> index 46c3b93..f610c44 100644
>>>> --- a/arch/arm64/include/asm/thread_info.h
>>>> +++ b/arch/arm64/include/asm/thread_info.h
>>>> @@ -68,7 +68,62 @@ struct thread_info {
>>>>  #define thread_saved_fp(tsk) \
>>>>       ((unsigned long)(tsk->thread.cpu_context.fp))
>>>>
>>>> +/*
>>>> + * Walks up the stack frames to make sure that the specified object is
>>>> + * entirely contained by a single stack frame.
>>>> + *
>>>> + * Returns:
>>>> + *            1 if within a frame
>>>> + *           -1 if placed across a frame boundary (or outside stack)
>>>> + *            0 unable to determine (no frame pointers, etc)
>>>> + */
>>>> +static inline int arch_within_stack_frames(const void * const stack,
>>>> +                                        const void * const stackend,
>>>> +                                        const void *obj, unsigned long len)
>>>> +{
>>>> +#if defined(CONFIG_FRAME_POINTER)
>>>> +     const void *oldframe;
>>>> +     const void *callee_fp = NULL;
>>>> +     const void *caller_fp = NULL;
>>>> +
>>>> +     oldframe = __builtin_frame_address(1);
>>>> +     if (oldframe) {
>>>> +             callee_fp = __builtin_frame_address(2);
>>>> +             if (callee_fp)
>>>> +                     caller_fp = __builtin_frame_address(3);
>>>> +     }
>>>> +     /*
>>>> +      * low ----------------------------------------------> high
>>>> +      * [callee_fp][lr][args][local vars][caller_fp'][lr']
>>>> +      *                ^----------------^
>>>> +      *               allow copies only within here
>>>> +      */
>>>
>>> Which compilers have you tested this with? The GCC folks don't guarantee a
>>> frame layout, and they have changed it in the past, so I suspect this is
>>> pretty fragile. In particularly, if __builtin_frame_address just points
>>> at the frame record, then I don't think you can make assumptions about the
>>> placement of local variables and arguments with respect to that.
>
> How often has it changed in the past? That seems like a strange thing
> to change; either it's aligned and efficiently organized, or ... not?
>
>>>
>>> Will
>>
>> $ aarch64-linux-android-gcc --version
>> aarch64-linux-android-gcc (GCC) 4.9 20150123 (prerelease)
>>
>> I tested this with aosp 7.1 android toolchain on Pixel.
>> Maybe I need a suggestion to make this robust.
>
> I wonder if some kind of BUILD_BUG_ON() magic could be used to
> validate the relative positions of things on the stack? Or in the
> worst case, a boot-time BUG() check...
>
> Did you happen to test the lkdtm USERCOPY_STACK_FRAME_TO and
> USERCOPY_STACK_FRAME_FROM tests to make sure they tripped correctly?
>
> -Kees
>
> --
> Kees Cook
> Nexus Security
kpark3469@gmail.com Jan. 30, 2017, 12:42 p.m. UTC | #10
On Thu, Jan 26, 2017 at 11:10 AM, AKASHI Takahiro
<takahiro.akashi@linaro.org> wrote:
> On Wed, Jan 25, 2017 at 01:54:10PM +0000, Will Deacon wrote:
>> [Adding Akashi, since he'a had fun and games with arm64 stack unwinding
>>  and I bet he can find a problem with this patch!]
>
> I have had hard time to play with that :)
>
>> On Wed, Jan 25, 2017 at 05:46:23PM +0400, kpark3469@gmail.com wrote:
>> > From: Sahara <keun-o.park@darkmatter.ae>
>> >
>> > This implements arch_within_stack_frames() for arm64 that should
>> > validate if a given object is contained by a kernel stack frame.
>> >
>> > Signed-off-by: Sahara <keun-o.park@darkmatter.ae>
>> > ---
>> >  arch/arm64/Kconfig                   |  1 +
>> >  arch/arm64/include/asm/thread_info.h | 55 ++++++++++++++++++++++++++++++++++++
>> >  2 files changed, 56 insertions(+)
>> >
>> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> > index 1117421..8bf70b4 100644
>> > --- a/arch/arm64/Kconfig
>> > +++ b/arch/arm64/Kconfig
>> > @@ -97,6 +97,7 @@ config ARM64
>> >     select HAVE_SYSCALL_TRACEPOINTS
>> >     select HAVE_KPROBES
>> >     select HAVE_KRETPROBES if HAVE_KPROBES
>> > +   select HAVE_ARCH_WITHIN_STACK_FRAMES
>> >     select IOMMU_DMA if IOMMU_SUPPORT
>> >     select IRQ_DOMAIN
>> >     select IRQ_FORCED_THREADING
>> > diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
>> > index 46c3b93..f610c44 100644
>> > --- a/arch/arm64/include/asm/thread_info.h
>> > +++ b/arch/arm64/include/asm/thread_info.h
>> > @@ -68,7 +68,62 @@ struct thread_info {
>> >  #define thread_saved_fp(tsk)       \
>> >     ((unsigned long)(tsk->thread.cpu_context.fp))
>> >
>> > +/*
>> > + * Walks up the stack frames to make sure that the specified object is
>> > + * entirely contained by a single stack frame.
>> > + *
>> > + * Returns:
>> > + *          1 if within a frame
>> > + *         -1 if placed across a frame boundary (or outside stack)
>> > + *          0 unable to determine (no frame pointers, etc)
>> > + */
>> > +static inline int arch_within_stack_frames(const void * const stack,
>> > +                                      const void * const stackend,
>> > +                                      const void *obj, unsigned long len)
>> > +{
>> > +#if defined(CONFIG_FRAME_POINTER)
>
> nitpick: s/#if defined()/#ifdef/, or just remove this guard?
>
>> > +   const void *oldframe;
>> > +   const void *callee_fp = NULL;
>> > +   const void *caller_fp = NULL;
>> > +
>> > +   oldframe = __builtin_frame_address(1);
>> > +   if (oldframe) {
>> > +           callee_fp = __builtin_frame_address(2);
>> > +           if (callee_fp)
>> > +                   caller_fp = __builtin_frame_address(3);
>> > +   }
>> > +   /*
>> > +    * low ----------------------------------------------> high
>> > +    * [callee_fp][lr][args][local vars][caller_fp'][lr']
>> > +    *                ^----------------^
>> > +    *               allow copies only within here
>> > +    */
>>
>> Which compilers have you tested this with? The GCC folks don't guarantee a
>> frame layout, and they have changed it in the past,
>
> I don't know whether any changes have been made before or not, but
>
>> so I suspect this is
>> pretty fragile. In particularly, if __builtin_frame_address just points
>> at the frame record, then I don't think you can make assumptions about the
>> placement of local variables and arguments with respect to that.
>>
>> Will
>
> Yes and no.
>
> AAPCS64 says,
> - The stack implementation is full-descending (5.2.2)
> - A process may only access (for reading or writing) the closed interval
>   of the entire stack delimited by [SP, stack-base - 1]. (5.2.2.1)
> - The location of the frame record within a stack frame is not specified
>   (5.2.3)
>
> To my best knowledge, dynamically allocated object (local variable) may be
> allocated below the current frame pointer, decrementing the stack pointer.
> Take a look at a simple example:
>
> ===8<===
> #include <stdio.h>
> #include <stdlib.h>
>
> int main(int ac, char **av) {
>         int array_size;
>         register unsigned long sp asm("sp");
>
>         if (ac < 2) {
>                 printf("No argument\n");
>                 return 1;
>         }
>         array_size = atoi(av[1]);
>         printf("frame pointer: %lx\n", __builtin_frame_address(0));
>         printf("Before dynamic alloc: %lx\n", sp);
>         {
>                 long array[array_size];
>
>                 printf("After dynamic alloc: %lx\n", sp);
>         }
>
>         return 0;
> }
> ===>8===
>
> and the result (with gcc 5.3) is:
>   frame pointer:        ffffe32bacd0
>   Before dynamic alloc: ffffe32bacd0
>   After dynamic alloc:  ffffe32bac70
>
> Given this fact,
>
>> > +   /*
>> > +    * low ----------------------------------------------> high
>> > +    * [callee_fp][lr][args][local vars][caller_fp'][lr']
>> > +    *                ^----------------^
>> > +    *               allow copies only within here
>> > +    */
>
> this picture may not always be precise in that "local variables" are
> local to the callee, OR possibly local to the *caller*.
> However, the range check is done here in a while loop that walks through
> the whole callstack chain, and so I assume that it would work in most cases
> except the case that a callee function hits that usage.
>
> I think there are a few (not many) places of such code in the kernel,
> (around net IIRC, but don' know they call usercopy functions or not).

Hello AKASHI,

Thanks so much for the example code. Basically I totally missed this case.
I modified do_usercopy_stack() slightly following your code snippet.
Like your comment, I could see the similar result.
....
        array_size = get_random_int() & 0x0F;
        if (to_user) {
                unsigned char array[array_size];
....
                pr_info("attempting bad copy_to_user of distant stack 2\n");
                if (copy_to_user((void __user *)user_addr, array,
                                 unconst + sizeof(array))) {
                        pr_warn("copy_to_user failed, but lacked Oops\n");
                        goto free_user;
                }
....
# echo USERCOPY_STACK_FRAME_TO > DIRECT
[ 1999.832209] Before dynamic alloc: ffffffc079013d40
[ 1999.832309] After dynamic alloc: ffffffc079013d40
[ 1999.832370] lkdtm: attempting good copy_to_user of local stack
[ 1999.832476] lkdtm: attempting bad copy_to_user of distant stack
[ 1999.832562] usercopy: kernel memory exposure attempt detected from
ffffffc079013d20 (<process stack>) (32 bytes)
[ 1999.832636] usercopy: BUG()!!!
[ 1999.832693] lkdtm: attempting bad copy_to_user of distant stack 2
[ 1999.832779] usercopy: kernel memory exposure attempt detected from
ffffffc079013d30 (<process stack>) (6 bytes)
[ 1999.832853] usercopy: BUG()!!!

This is output of GCC 4.9, so maybe the sp value is not expected one.
Anyway it looks to me that the object should be scanned from oldframe.

Thank you.

BR
Sahara

>
> Thanks,
> -Takahiro AKASHI
Kees Cook Jan. 30, 2017, 10:15 p.m. UTC | #11
On Mon, Jan 30, 2017 at 3:26 AM, Keun-O Park <kpark3469@gmail.com> wrote:
> Hello Kees,
>
> Thanks for the suggestion about lkdtm. Yes, it worked correctly.
> provoke-crash# echo USERCOPY_STACK_FRAME_TO > DIRECT
> [11388.369172] lkdtm: Performing direct entry USERCOPY_STACK_FRAME_TO
> [11388.369259] lkdtm: attempting good copy_to_user of local stack
> [11388.369366] lkdtm: attempting bad copy_to_user of distant stack
> [11388.369453] usercopy: kernel memory exposure attempt detected from
> ffffffc87985fd60 (<process stack>) (32 bytes)
>
> provoke-crash# echo USERCOPY_STACK_FRAME_FROM > DIRECT
> [12687.156830] lkdtm: Performing direct entry USERCOPY_STACK_FRAME_FROM
> [12687.156918] lkdtm: attempting good copy_from_user of local stack
> [12687.156995] lkdtm: attempting bad copy_from_user of distant stack
> [12687.157082] usercopy: kernel memory overwrite attempt detected to
> ffffffc87985fd60 (<process stack>) (32 bytes)
>
> One thing I want to ask is..
> Does USERCOPY_HEAP_FLAG_FROM/TO work correctly in latest kernel?

No, this protection (the whitelisting flag) isn't implemented yet in
upstream. (You're more than welcome to dig into it, if you want!)

> Both on Pixel(v3.18) and on emulator(v4.10-rc5)
> In these two cases the bad attempt passed. I guess the code for this
> test might not be ready. Am I right?

Correct.

-Kees
Kees Cook Jan. 30, 2017, 10:19 p.m. UTC | #12
On Mon, Jan 30, 2017 at 4:42 AM, Keun-O Park <kpark3469@gmail.com> wrote:
> Thanks so much for the example code. Basically I totally missed this case.
> I modified do_usercopy_stack() slightly following your code snippet.
> Like your comment, I could see the similar result.
> ....
>         array_size = get_random_int() & 0x0F;
>         if (to_user) {
>                 unsigned char array[array_size];
> ....
>                 pr_info("attempting bad copy_to_user of distant stack 2\n");
>                 if (copy_to_user((void __user *)user_addr, array,
>                                  unconst + sizeof(array))) {
>                         pr_warn("copy_to_user failed, but lacked Oops\n");
>                         goto free_user;
>                 }
> ....
> # echo USERCOPY_STACK_FRAME_TO > DIRECT
> [ 1999.832209] Before dynamic alloc: ffffffc079013d40
> [ 1999.832309] After dynamic alloc: ffffffc079013d40
> [ 1999.832370] lkdtm: attempting good copy_to_user of local stack
> [ 1999.832476] lkdtm: attempting bad copy_to_user of distant stack
> [ 1999.832562] usercopy: kernel memory exposure attempt detected from
> ffffffc079013d20 (<process stack>) (32 bytes)
> [ 1999.832636] usercopy: BUG()!!!
> [ 1999.832693] lkdtm: attempting bad copy_to_user of distant stack 2
> [ 1999.832779] usercopy: kernel memory exposure attempt detected from
> ffffffc079013d30 (<process stack>) (6 bytes)
> [ 1999.832853] usercopy: BUG()!!!
>
> This is output of GCC 4.9, so maybe the sp value is not expected one.
> Anyway it looks to me that the object should be scanned from oldframe.

Am I correct in understanding that your code worked correctly? I.e.
Access to "array" worked, but stepping beyond it failed? (Does
sizeof() work with dynamic stack allocations?)

-Kees
kpark3469@gmail.com Jan. 31, 2017, 9:10 a.m. UTC | #13
On Tue, Jan 31, 2017 at 2:19 AM, Kees Cook <keescook@chromium.org> wrote:
> On Mon, Jan 30, 2017 at 4:42 AM, Keun-O Park <kpark3469@gmail.com> wrote:
>> Thanks so much for the example code. Basically I totally missed this case.
>> I modified do_usercopy_stack() slightly following your code snippet.
>> Like your comment, I could see the similar result.
>> ....
>>         array_size = get_random_int() & 0x0F;
>>         if (to_user) {
>>                 unsigned char array[array_size];
>> ....
>>                 pr_info("attempting bad copy_to_user of distant stack 2\n");
>>                 if (copy_to_user((void __user *)user_addr, array,
>>                                  unconst + sizeof(array))) {
>>                         pr_warn("copy_to_user failed, but lacked Oops\n");
>>                         goto free_user;
>>                 }
>> ....
>> # echo USERCOPY_STACK_FRAME_TO > DIRECT
>> [ 1999.832209] Before dynamic alloc: ffffffc079013d40
>> [ 1999.832309] After dynamic alloc: ffffffc079013d40
>> [ 1999.832370] lkdtm: attempting good copy_to_user of local stack
>> [ 1999.832476] lkdtm: attempting bad copy_to_user of distant stack
>> [ 1999.832562] usercopy: kernel memory exposure attempt detected from
>> ffffffc079013d20 (<process stack>) (32 bytes)
>> [ 1999.832636] usercopy: BUG()!!!
>> [ 1999.832693] lkdtm: attempting bad copy_to_user of distant stack 2
>> [ 1999.832779] usercopy: kernel memory exposure attempt detected from
>> ffffffc079013d30 (<process stack>) (6 bytes)
>> [ 1999.832853] usercopy: BUG()!!!
>>
>> This is output of GCC 4.9, so maybe the sp value is not expected one.
>> Anyway it looks to me that the object should be scanned from oldframe.
>
> Am I correct in understanding that your code worked correctly? I.e.
> Access to "array" worked, but stepping beyond it failed? (Does
> sizeof() work with dynamic stack allocations?)

My implementation can not detect this case. LKDTM stack test regards
that this array is out of stackframe.
So BUG() is called.

sizeof() works fine with dynamic stack allocations for me.

Thanks.

BR
Sahara
Kees Cook Jan. 31, 2017, 5:56 p.m. UTC | #14
On Tue, Jan 31, 2017 at 1:10 AM, Keun-O Park <kpark3469@gmail.com> wrote:
> On Tue, Jan 31, 2017 at 2:19 AM, Kees Cook <keescook@chromium.org> wrote:
>> On Mon, Jan 30, 2017 at 4:42 AM, Keun-O Park <kpark3469@gmail.com> wrote:
>>> Thanks so much for the example code. Basically I totally missed this case.
>>> I modified do_usercopy_stack() slightly following your code snippet.
>>> Like your comment, I could see the similar result.
>>> ....
>>>         array_size = get_random_int() & 0x0F;
>>>         if (to_user) {
>>>                 unsigned char array[array_size];
>>> ....
>>>                 pr_info("attempting bad copy_to_user of distant stack 2\n");
>>>                 if (copy_to_user((void __user *)user_addr, array,
>>>                                  unconst + sizeof(array))) {
>>>                         pr_warn("copy_to_user failed, but lacked Oops\n");
>>>                         goto free_user;
>>>                 }
>>> ....
>>> # echo USERCOPY_STACK_FRAME_TO > DIRECT
>>> [ 1999.832209] Before dynamic alloc: ffffffc079013d40
>>> [ 1999.832309] After dynamic alloc: ffffffc079013d40
>>> [ 1999.832370] lkdtm: attempting good copy_to_user of local stack
>>> [ 1999.832476] lkdtm: attempting bad copy_to_user of distant stack
>>> [ 1999.832562] usercopy: kernel memory exposure attempt detected from
>>> ffffffc079013d20 (<process stack>) (32 bytes)
>>> [ 1999.832636] usercopy: BUG()!!!
>>> [ 1999.832693] lkdtm: attempting bad copy_to_user of distant stack 2
>>> [ 1999.832779] usercopy: kernel memory exposure attempt detected from
>>> ffffffc079013d30 (<process stack>) (6 bytes)
>>> [ 1999.832853] usercopy: BUG()!!!
>>>
>>> This is output of GCC 4.9, so maybe the sp value is not expected one.
>>> Anyway it looks to me that the object should be scanned from oldframe.
>>
>> Am I correct in understanding that your code worked correctly? I.e.
>> Access to "array" worked, but stepping beyond it failed? (Does
>> sizeof() work with dynamic stack allocations?)
>
> My implementation can not detect this case. LKDTM stack test regards
> that this array is out of stackframe.
> So BUG() is called.
>
> sizeof() works fine with dynamic stack allocations for me.

Can you send the patch you used to extends the LKDTM test? The stack
layout looks the same as x86; shouldn't x86 fail in the same way?

-Kees
kpark3469@gmail.com Feb. 2, 2017, 1:34 p.m. UTC | #15
Hello James,


On Thu, Jan 26, 2017 at 7:23 PM, James Morse <james.morse@arm.com> wrote:
> Hi Sahara,
>
> On 25/01/17 13:46, kpark3469@gmail.com wrote:
>> From: Sahara <keun-o.park@darkmatter.ae>
>>
>> This implements arch_within_stack_frames() for arm64 that should
>> validate if a given object is contained by a kernel stack frame.
>
> Thanks for wade-ing into this, walking the stack is horribly tricky!
>
>
>> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
>> index 46c3b93..f610c44 100644
>> --- a/arch/arm64/include/asm/thread_info.h
>> +++ b/arch/arm64/include/asm/thread_info.h
>> @@ -68,7 +68,62 @@ struct thread_info {
>>  #define thread_saved_fp(tsk) \
>>       ((unsigned long)(tsk->thread.cpu_context.fp))
>>
>> +/*
>> + * Walks up the stack frames to make sure that the specified object is
>> + * entirely contained by a single stack frame.
>> + *
>> + * Returns:
>> + *            1 if within a frame
>> + *           -1 if placed across a frame boundary (or outside stack)
>> + *            0 unable to determine (no frame pointers, etc)
>
> (Shame the enum in mm/usercopy.c that explains these isn't exposed)

I exposed the enum in mm/usercopy.c by creating and moving to
usercopy.h in v2 patch


>
>
>> + */
>> +static inline int arch_within_stack_frames(const void * const stack,
>> +                                        const void * const stackend,
>> +                                        const void *obj, unsigned long len)
>> +{
>> +#if defined(CONFIG_FRAME_POINTER)
>> +     const void *oldframe;
>> +     const void *callee_fp = NULL;
>> +     const void *caller_fp = NULL;
>> +
>> +     oldframe = __builtin_frame_address(1);
>
> So we always discard _our_ callers frame. This will vary depending on the
> compilers choice on whether or not to inline this function. Its either
> check_stack_object() (which is declared noinline) or __check_object_size().
> I think this is fine either way as obj should never be in the stack-frames of
> these functions.

Unfortunately Akashi reported that obj can be placed at the
stack-frame of these functions,
when dynamic array is used.

>
>
>> +     if (oldframe) {
>> +             callee_fp = __builtin_frame_address(2);
>> +             if (callee_fp)
>> +                     caller_fp = __builtin_frame_address(3);
>> +     }
>> +     /*
>> +      * low ----------------------------------------------> high
>> +      * [callee_fp][lr][args][local vars][caller_fp'][lr']
>
> These are the caller's args and local_vars right?

No, unless my test is wrong, these are callee's args and local vars.

>
>
>> +      *                ^----------------^
>> +      *               allow copies only within here
>> +      */
>> +     while (stack <= callee_fp && callee_fp < stackend) {
>> +             /*
>> +              * If obj + len extends past the caller frame, this
>> +              * check won't pass and the next frame will be 0,
>> +              * causing us to bail out and correctly report
>> +              * the copy as invalid.
>> +              */
>> +             if (!caller_fp) {
>
>> +                     if (obj + len <= stackend)
>
> Isn't this always true? check_stack_object() does this:
>>       if (obj < stack || stackend < obj + len)
>>               return BAD_STACK;
>

You're right. This is redundant check. Thanks.

>
>> +                             return (obj >= callee_fp + 2 * sizeof(void *)) ?
>> +                                     1 : -1;
>
> You do this twice (and its pretty complicated), can you make it a macro with an
> explanatory name? (I think its calculating caller_frame_end from callee_fp,
> having something like caller_frame_start and caller_frame_end would make this
> easier to follow).
>

As your suggestion, I added a macro like get_stack_start()


>
>> +                     else
>> +                             return -1;
>> +             }
>> +             if (obj + len <= caller_fp)
>> +                     return (obj >= callee_fp + 2 * sizeof(void *)) ? 1 : -1;
>> +             callee_fp = caller_fp;
>> +             caller_fp = *(const void * const *)caller_fp;
>
> You test caller_fp lies within the stack next time round the loop, what happens
> if it is miss-aligned? unwind_frame() tests the new fp to check it was 16-byte
> aligned. If not, its probably a corrupted stack frame.

I added missing check for misaligned frame pointer. Thanks.

>
> I wonder if you can make use of unwind_frame()? We have existing machinery for
> walking up the stack, (it takes a callback and you can stop the walk early).
> If you move this function into arch/arm64/kernel/stacktrace.c, you could make
> use of walk_stackframe().

Without modifying existing x86 code for arch_within_stack_frames and
mm/usercopy.c,
it looks to me that it might not be feasible to use unwind_frame().
But, I didn't meticulously consider this point.

>
> I guess you aren't using it because it doesn't give you start/end ranges for
> each stack frame, I think you can get away without this, the callback would only
> needs to test that the provided frame->fp doesn't lie between obj and (obj+len).
> Calculating the frame start and end is extra work for the bounds test - we
> already know obj is contained by the stack so its just a question of whether it
> overlaps an fp record.

Exactly. :-)

>
>
>> +     }
>> +     return -1;
>
> check_stack_object()'s use of task_stack_page(current) means you will never see
> this run on the irq-stack, so no need to worry about stepping between stacks.
>
> This looks correct to me, walking the stack is unavoidably complex, I wonder if
> we can avoid having another stack walker by using the existing framework?
>
>
>
>
> Thanks,
>
> James
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1117421..8bf70b4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -97,6 +97,7 @@  config ARM64
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES if HAVE_KPROBES
+	select HAVE_ARCH_WITHIN_STACK_FRAMES
 	select IOMMU_DMA if IOMMU_SUPPORT
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 46c3b93..f610c44 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -68,7 +68,62 @@  struct thread_info {
 #define thread_saved_fp(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.fp))
 
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ *		 1 if within a frame
+ *		-1 if placed across a frame boundary (or outside stack)
+ *		 0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+					   const void * const stackend,
+					   const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+	const void *oldframe;
+	const void *callee_fp = NULL;
+	const void *caller_fp = NULL;
+
+	oldframe = __builtin_frame_address(1);
+	if (oldframe) {
+		callee_fp = __builtin_frame_address(2);
+		if (callee_fp)
+			caller_fp = __builtin_frame_address(3);
+	}
+	/*
+	 * low ----------------------------------------------> high
+	 * [callee_fp][lr][args][local vars][caller_fp'][lr']
+	 *                ^----------------^
+	 *               allow copies only within here
+	 */
+	while (stack <= callee_fp && callee_fp < stackend) {
+		/*
+		 * If obj + len extends past the caller frame, this
+		 * check won't pass and the next frame will be 0,
+		 * causing us to bail out and correctly report
+		 * the copy as invalid.
+		 */
+		if (!caller_fp) {
+			if (obj + len <= stackend)
+				return (obj >= callee_fp + 2 * sizeof(void *)) ?
+					1 : -1;
+			else
+				return -1;
+		}
+		if (obj + len <= caller_fp)
+			return (obj >= callee_fp + 2 * sizeof(void *)) ? 1 : -1;
+		callee_fp = caller_fp;
+		caller_fp = *(const void * const *)caller_fp;
+	}
+	return -1;
+#else
+	return 0;
 #endif
+}
+
+#endif /* !__ASSEMBLY__ */
 
 /*
  * thread information flags: