diff mbox series

[3/3] arm64: stacktrace: better handle corrupted stacks

Message ID 20190606125402.10229-4-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: stacktrace: improve robustness | expand

Commit Message

Mark Rutland June 6, 2019, 12:54 p.m. UTC
The arm64 stacktrace code is careful to only dereference frame records
in valid stack ranges, ensuring that a corrupted frame record won't
result in a faulting access.

However, it's still possible for corrupt frame records to result in
infinite loops in the stacktrace code, which is also undesirable.

This patch ensures that we complete a stacktrace in finite time, by
keeping track of which stacks we have already completed unwinding, and
verifying that if the next frame record is on the same stack, it is at a
higher address.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Tengfei Fan <tengfeif@codeaurora.org>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/stacktrace.h | 34 ++++++++++++++++++++++++++--------
 arch/arm64/kernel/process.c         |  2 +-
 arch/arm64/kernel/stacktrace.c      | 16 +++++++++++++++-
 arch/arm64/kernel/time.c            |  2 +-
 arch/arm64/kernel/traps.c           |  4 ++--
 5 files changed, 45 insertions(+), 13 deletions(-)

Comments

Dave Martin June 21, 2019, 4:37 p.m. UTC | #1
On Thu, Jun 06, 2019 at 01:54:02PM +0100, Mark Rutland wrote:
> The arm64 stacktrace code is careful to only dereference frame records
> in valid stack ranges, ensuring that a corrupted frame record won't
> result in a faulting access.
> 
> However, it's still possible for corrupt frame records to result in
> infinite loops in the stacktrace code, which is also undesirable.
> 
> This patch ensures that we complete a stacktrace in finite time, by
> keeping track of which stacks we have already completed unwinding, and
> verifying that if the next frame record is on the same stack, it is at a
> higher address.
> 
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Dave Martin <dave.martin@arm.com>
> Cc: James Morse <james.morse@arm.com>
> Cc: Tengfei Fan <tengfeif@codeaurora.org>
> Cc: Will Deacon <will.deacon@arm.com>
> ---
>  arch/arm64/include/asm/stacktrace.h | 34 ++++++++++++++++++++++++++--------
>  arch/arm64/kernel/process.c         |  2 +-
>  arch/arm64/kernel/stacktrace.c      | 16 +++++++++++++++-
>  arch/arm64/kernel/time.c            |  2 +-
>  arch/arm64/kernel/traps.c           |  4 ++--
>  5 files changed, 45 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index 18f90bf1385c..4ebf8a8997b0 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -19,19 +19,12 @@
>  #include <linux/percpu.h>
>  #include <linux/sched.h>
>  #include <linux/sched/task_stack.h>
> +#include <linux/types.h>
>  
>  #include <asm/memory.h>
>  #include <asm/ptrace.h>
>  #include <asm/sdei.h>
>  
> -struct stackframe {
> -	unsigned long fp;
> -	unsigned long pc;
> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> -	int graph;
> -#endif
> -};
> -
>  enum stack_type {
>  	STACK_TYPE_UNKNOWN,
>  	STACK_TYPE_TASK,
> @@ -39,6 +32,7 @@ enum stack_type {
>  	STACK_TYPE_OVERFLOW,
>  	STACK_TYPE_SDEI_NORMAL,
>  	STACK_TYPE_SDEI_CRITICAL,
> +	__NR_STACK_TYPES

The number of stack types is actually 1 less than this, and the zeroth
bit in stacks_done doesn't get used if we use this enum as an index.

Would STACK_TYPE_UNKNOWN = 0 fix this, or would that break something
elsewhere?

>  };
>  
>  struct stack_info {
> @@ -47,6 +41,16 @@ struct stack_info {
>  	enum stack_type type;
>  };
>  
> +struct stackframe {
> +	unsigned long fp;
> +	unsigned long pc;
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +	int graph;
> +#endif
> +	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
> +	enum stack_type stack_current;
> +};
> +
>  extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
>  extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
>  			    int (*fn)(struct stackframe *, void *), void *data);
> @@ -128,6 +132,9 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
>  				       unsigned long sp,
>  				       struct stack_info *info)
>  {
> +	if (info)
> +		info->type = STACK_TYPE_UNKNOWN;
> +
>  	if (on_task_stack(tsk, sp, info))
>  		return true;
>  	if (tsk != current || preemptible())
> @@ -143,13 +150,24 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
>  }
>  
>  static inline void start_backtrace(struct stackframe *frame,
> +				   struct task_struct *tsk,
>  				   unsigned long fp, unsigned long pc)
>  {
> +	struct stack_info info;
> +
>  	frame->fp = fp;
>  	frame->pc = pc;
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  	frame->graph = 0;
>  #endif
> +	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
> +
> +	/*
> +	 * We need to prime stack_current for the first unwind, but we can
> +	 * ignore the accessibility until the unwind occurs.
> +	 */
> +	on_accessible_stack(tsk, fp, &info);
> +	frame->stack_current = info.type;
>  }
>  
>  #endif	/* __ASM_STACKTRACE_H */
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 122d88fccd13..ba9441982573 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -509,7 +509,7 @@ unsigned long get_wchan(struct task_struct *p)
>  	if (!stack_page)
>  		return 0;
>  
> -	start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
> +	start_backtrace(&frame, p, thread_saved_fp(p), thread_saved_pc(p));
>  	do {
>  		if (unwind_frame(p, &frame))
>  			goto out;
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index b00ec7d483d1..1c45b33c7474 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -43,6 +43,8 @@
>  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  {
>  	unsigned long fp = frame->fp;
> +	bool changed_stack = false;
> +	struct stack_info info;
>  
>  	if (fp & 0xf)
>  		return -EINVAL;
> @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  	if (!tsk)
>  		tsk = current;
>  
> -	if (!on_accessible_stack(tsk, fp, NULL))
> +	if (!on_accessible_stack(tsk, fp, &info))
>  		return -EINVAL;
>  
> +	if (test_bit(info.type, frame->stacks_done))
> +		return -EINVAL;

Doesn't this fire when we unwind a sequence of frames on the same stack
(i.e., the common case)?

I may be missing something obvious here.

> +
> +	if (frame->stack_current != info.type) {
> +		set_bit(frame->stack_current, frame->stacks_done);

Oh, right, stacks_done is the set of stacks we have been on, excluding
the current one?  If so, a comment somewhere explaining that, or some
more explicit name, like "past_stacks" might make sense.

> +		frame->stack_current = info.type;
> +		changed_stack = true;
> +	}
> +
>  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
>  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
>  
> +	if (!changed_stack && frame->fp <= fp)
> +		return -EINVAL;
> +

[...]

Otherwise, seems to make sense.

Cheers
---Dave
James Morse June 24, 2019, 11:34 a.m. UTC | #2
Hi Mark,

On 06/06/2019 13:54, Mark Rutland wrote:
> The arm64 stacktrace code is careful to only dereference frame records
> in valid stack ranges, ensuring that a corrupted frame record won't
> result in a faulting access.
> 
> However, it's still possible for corrupt frame records to result in
> infinite loops in the stacktrace code, which is also undesirable.
> 
> This patch ensures that we complete a stacktrace in finite time, by
> keeping track of which stacks we have already completed unwinding, and
> verifying that if the next frame record is on the same stack, it is at a
> higher address.

This looks good, I tried to take it for a spin to test SDEI stack tracing ... but it
wouldn't boot, it panic()s before earlycon.

defconfig doesn't do this, defconfig+CONFIG_PROVE_LOCKING does.
Toggling CONFIG_DEBUG_LOCK_ALLOC is the smallest config change to make this show up.

Its taking a translation fault:
| <__ll_sc_arch_atomic64_or>:
|        f9800031        prfm    pstl1strm, [x1]
|        c85f7c31        ldxr    x17, [x1]		(faulting instruction)
|        aa000231        orr     x17, x17, x0
|        c8107c31        stxr    w16, x17, [x1]
|        35ffffb0        cbnz    w16, ffff000010c7d19c <__ll_sc_a
|        d65f03c0        ret

x0: 0x0000000000000100
x1: 0xffff0000137399e8			(far_el2)

If x1 were part of 'frame' in __save_stack_trace it should be on the stack, but at
fault-time sp is 0xffff0000114a3a50. This happens before the linear map has been set up....

The lr points just after the set_bit() call in unwind_frame().


> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index b00ec7d483d1..1c45b33c7474 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -43,6 +43,8 @@
>  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  {
>  	unsigned long fp = frame->fp;
> +	bool changed_stack = false;
> +	struct stack_info info;
>  
>  	if (fp & 0xf)
>  		return -EINVAL;
> @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  	if (!tsk)
>  		tsk = current;
>  
> -	if (!on_accessible_stack(tsk, fp, NULL))
> +	if (!on_accessible_stack(tsk, fp, &info))
>  		return -EINVAL;
>  
> +	if (test_bit(info.type, frame->stacks_done))
> +		return -EINVAL;
> +
> +	if (frame->stack_current != info.type) {

> +		set_bit(frame->stack_current, frame->stacks_done);


Changing this line:
| -               set_bit(frame->stack_current, frame->stacks_done);
| +               *frame->stacks_done |= (1 << frame->stack_current);
works fine.

But it doesn't cause a stacktrace to be printed, so I can't work out how
CONFIG_DEBUG_LOCK_ALLOC is relevant.


... this makes no sense, can anyone else reproduce it?


Thanks,

James


> +		frame->stack_current = info.type;
> +		changed_stack = true;
> +	}
> +
>  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
>  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
>  
> +	if (!changed_stack && frame->fp <= fp)
> +		return -EINVAL;
> +
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  	if (tsk->ret_stack &&
>  			(frame->pc == (unsigned long)return_to_handler)) {
James Morse June 25, 2019, 10:28 a.m. UTC | #3
Hi Mark,

On 24/06/2019 12:34, James Morse wrote:
> On 06/06/2019 13:54, Mark Rutland wrote:
>> The arm64 stacktrace code is careful to only dereference frame records
>> in valid stack ranges, ensuring that a corrupted frame record won't
>> result in a faulting access.
>>
>> However, it's still possible for corrupt frame records to result in
>> infinite loops in the stacktrace code, which is also undesirable.
>>
>> This patch ensures that we complete a stacktrace in finite time, by
>> keeping track of which stacks we have already completed unwinding, and
>> verifying that if the next frame record is on the same stack, it is at a
>> higher address.
> 
> This looks good, I tried to take it for a spin to test SDEI stack tracing ... but it
> wouldn't boot, it panic()s before earlycon.
> 
> defconfig doesn't do this, defconfig+CONFIG_PROVE_LOCKING does.
> Toggling CONFIG_DEBUG_LOCK_ALLOC is the smallest config change to make this show up.
> 
> Its taking a translation fault:
> | <__ll_sc_arch_atomic64_or>:
> |        f9800031        prfm    pstl1strm, [x1]
> |        c85f7c31        ldxr    x17, [x1]		(faulting instruction)
> |        aa000231        orr     x17, x17, x0
> |        c8107c31        stxr    w16, x17, [x1]
> |        35ffffb0        cbnz    w16, ffff000010c7d19c <__ll_sc_a
> |        d65f03c0        ret
> 
> x0: 0x0000000000000100
> x1: 0xffff0000137399e8			(far_el2)
> 
> If x1 were part of 'frame' in __save_stack_trace it should be on the stack, but at
> fault-time sp is 0xffff0000114a3a50. This happens before the linear map has been set up....
> 
> The lr points just after the set_bit() call in unwind_frame().

frame->stack_current is uninitialized/junk, so the calculated bit to set is outside of
mapped memory.

Lockdep is relevant because it uses save_stack_trace() which doesn't use the
start_backtrace() helper that initialises the new metadata.
DEBUG_LOCK_ALLOC was a red-herring, it was perturbing the stack layout so this code ate a
pointer instead of a much more believable 0.

Patch below, this needs to come before patch 3.
I'll give this a spin with the SDEI firmware.


Thanks,

James

----------------------------------%<----------------------------------
From: James Morse <james.morse@arm.com>
Date:   Tue Jun 25 11:05:33 2019 +0100

arm64: stacktrace: use start_backtrace() consistently

unwind_frame() is about to get smart when it comes to validating
stack frames and stepping between stacks without going round in
circles.

All this relies on new parameters in struct stackframe being
initialised. Before we can do this, we need all users of
struct stackframe to use start_backtrace(), instead of packing
the values manually.

Signed-off-by: James Morse <james.morse@arm.com>
---
 arch/arm64/kernel/perf_callchain.c |  7 +------
 arch/arm64/kernel/return_address.c |  8 +++-----
 arch/arm64/kernel/stacktrace.c     | 20 ++++++--------------
 3 files changed, 10 insertions(+), 25 deletions(-)

---
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callcha
in.c
index 61d983f5756f..1510ccbd7cb2 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -165,12 +165,7 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx
*entry,
                return;
        }

-       frame.fp = regs->regs[29];
-       frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       frame.graph = 0;
-#endif
-
+       start_backtrace(&frame, current, regs->regs[29], regs->pc);
        walk_stackframe(current, &frame, callchain_trace, entry);
 }

diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 53c40196b607..36f8888a5e76 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -41,11 +41,9 @@ void *return_address(unsigned int level)
        data.level = level + 2;
        data.addr = NULL;

-       frame.fp = (unsigned long)__builtin_frame_address(0);
-       frame.pc = (unsigned long)return_address; /* dummy */
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       frame.graph = 0;
-#endif
+       start_backtrace(&frame, current,
+                       (unsigned long)__builtin_frame_address(0),
+                       (unsigned long)return_address);

        walk_stackframe(current, &frame, save_return_addr, &data);

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 1c45b33c7474..8a1fa90c784d 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -147,12 +147,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace
*trace)
        data.skip = trace->skip;
        data.no_sched_functions = 0;

-       frame.fp = regs->regs[29];
-       frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       frame.graph = 0;
-#endif
-
+       start_backtrace(&frame, current, regs->regs[29], regs->pc);
        walk_stackframe(current, &frame, save_trace, &data);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_regs);
@@ -171,18 +166,15 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
        data.no_sched_functions = nosched;

        if (tsk != current) {
-               frame.fp = thread_saved_fp(tsk);
-               frame.pc = thread_saved_pc(tsk);
+               start_backtrace(&frame, tsk, thread_saved_fp(tsk),
+                               thread_saved_pc(tsk));
        } else {
                /* We don't want this function nor the caller */
                data.skip += 2;
-               frame.fp = (unsigned long)__builtin_frame_address(0);
-               frame.pc = (unsigned long)__save_stack_trace;
+               start_backtrace(&frame, tsk,
+                               (unsigned long)__builtin_frame_address(0),
+                               (unsigned long)__save_stack_trace);
        }
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       frame.graph = 0;
-#endif
-
        walk_stackframe(tsk, &frame, save_trace, &data);

        put_task_stack(tsk);

----------------------------------%<----------------------------------
James Morse June 27, 2019, 4:24 p.m. UTC | #4
Hi Mark,

On 06/06/2019 13:54, Mark Rutland wrote:
> The arm64 stacktrace code is careful to only dereference frame records
> in valid stack ranges, ensuring that a corrupted frame record won't
> result in a faulting access.
> 
> However, it's still possible for corrupt frame records to result in
> infinite loops in the stacktrace code, which is also undesirable.
> 
> This patch ensures that we complete a stacktrace in finite time, by
> keeping track of which stacks we have already completed unwinding, and
> verifying that if the next frame record is on the same stack, it is at a
> higher address.

I see this truncate stacks when walking from the SDEI handler...


> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index b00ec7d483d1..1c45b33c7474 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -43,6 +43,8 @@
>  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  {
>  	unsigned long fp = frame->fp;
> +	bool changed_stack = false;
> +	struct stack_info info;
>  
>  	if (fp & 0xf)
>  		return -EINVAL;
> @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  	if (!tsk)
>  		tsk = current;
>  
> -	if (!on_accessible_stack(tsk, fp, NULL))
> +	if (!on_accessible_stack(tsk, fp, &info))
>  		return -EINVAL;
>  
> +	if (test_bit(info.type, frame->stacks_done))
> +		return -EINVAL;
> +
> +	if (frame->stack_current != info.type) {
> +		set_bit(frame->stack_current, frame->stacks_done);
> +		frame->stack_current = info.type;
> +		changed_stack = true;
> +	}
> +
>  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
>  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));


> +	if (!changed_stack && frame->fp <= fp)
> +		return -EINVAL;

This is where it goes wrong. changed_stack is only true for the first frame on a newly
visited stack. This means for the last frame of the previous stack, (which must point to
the next stack), we still require 'frame->fp <= fp'.

It think like this just happens to be true for the irq stacks as they are allocated early.
(whereas the SDEI stacks are allocated late).


This hunk fixes it for me:
------------------------------------%<------------------------------------
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 8a1fa90c784d..cb5dee233ede 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -43,7 +43,6 @@
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
        unsigned long fp = frame->fp;
-       bool changed_stack = false;
        struct stack_info info;

        if (fp & 0xf)
@@ -61,14 +60,16 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe
*frame)
        if (frame->stack_current != info.type) {
                set_bit(frame->stack_current, frame->stacks_done);
                frame->stack_current = info.type;
-               changed_stack = true;
        }

        frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
        frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));

-       if (!changed_stack && frame->fp <= fp)
-               return -EINVAL;
+       if (info.low <= frame->fp && frame->fp <= info.high) {
+               /* within stack bounds: the next frame must be older */
+               if (frame->fp <= fp)
+                       return -EINVAL;
+       }

 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (tsk->ret_stack &&
------------------------------------%<------------------------------------

I've given this kick with SDEI, perf and ftrace.
Let me know if its easier for me to post a v2 with all these bits assembled.

(If this is is picked onto arm64's for-next/core: The function_graph tracer works with
this series on v5.2-rc6, but it locks up on v5.2-rc4 even without this series.)


Thanks,

James
Dave Martin June 28, 2019, 11:15 a.m. UTC | #5
On Thu, Jun 27, 2019 at 05:24:06PM +0100, James Morse wrote:
> Hi Mark,
> 
> On 06/06/2019 13:54, Mark Rutland wrote:
> > The arm64 stacktrace code is careful to only dereference frame records
> > in valid stack ranges, ensuring that a corrupted frame record won't
> > result in a faulting access.
> > 
> > However, it's still possible for corrupt frame records to result in
> > infinite loops in the stacktrace code, which is also undesirable.
> > 
> > This patch ensures that we complete a stacktrace in finite time, by
> > keeping track of which stacks we have already completed unwinding, and
> > verifying that if the next frame record is on the same stack, it is at a
> > higher address.
> 
> I see this truncate stacks when walking from the SDEI handler...
> 
> 
> > diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> > index b00ec7d483d1..1c45b33c7474 100644
> > --- a/arch/arm64/kernel/stacktrace.c
> > +++ b/arch/arm64/kernel/stacktrace.c
> > @@ -43,6 +43,8 @@
> >  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> >  {
> >  	unsigned long fp = frame->fp;
> > +	bool changed_stack = false;
> > +	struct stack_info info;
> >  
> >  	if (fp & 0xf)
> >  		return -EINVAL;
> > @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> >  	if (!tsk)
> >  		tsk = current;
> >  
> > -	if (!on_accessible_stack(tsk, fp, NULL))
> > +	if (!on_accessible_stack(tsk, fp, &info))
> >  		return -EINVAL;
> >  
> > +	if (test_bit(info.type, frame->stacks_done))
> > +		return -EINVAL;
> > +
> > +	if (frame->stack_current != info.type) {
> > +		set_bit(frame->stack_current, frame->stacks_done);
> > +		frame->stack_current = info.type;
> > +		changed_stack = true;
> > +	}
> > +
> >  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
> >  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> 
> 
> > +	if (!changed_stack && frame->fp <= fp)
> > +		return -EINVAL;
> 
> This is where it goes wrong. changed_stack is only true for the first
> frame on a newly visited stack. This means for the last frame of the
> previous stack, (which must point to the next stack), we still
> require 'frame->fp <= fp'.
> 
> It think like this just happens to be true for the irq stacks as they
> are allocated early.  (whereas the SDEI stacks are allocated late).

I don't understand this.

Either we are on an edge frame (changed_stack is true) or not (false).

If not, the two FPs are on the same stack and we should compare them.
Otherwise they're on different stacks and such a comparison is nonsense.

I don't see any third situation.

So, what's wrong here?

> This hunk fixes it for me:
> ------------------------------------%<------------------------------------
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c

[...]

> @@ -61,14 +60,16 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe
> *frame)
>         if (frame->stack_current != info.type) {
>                 set_bit(frame->stack_current, frame->stacks_done);
>                 frame->stack_current = info.type;
> -               changed_stack = true;
>         }
> 
>         frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
>         frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> 
> -       if (!changed_stack && frame->fp <= fp)
> -               return -EINVAL;
> +       if (info.low <= frame->fp && frame->fp <= info.high) {
> +               /* within stack bounds: the next frame must be older */

Doesn't on_accessible_stack() already do this check?  This is how we
determined what stack we tranitioned to in the first place.

Not saying there isn't a problem here, but I don't yet understand what
goes wrong...

[...]

Cheers
---Dave
Mark Rutland June 28, 2019, 11:32 a.m. UTC | #6
On Fri, Jun 21, 2019 at 05:37:21PM +0100, Dave Martin wrote:
> On Thu, Jun 06, 2019 at 01:54:02PM +0100, Mark Rutland wrote:
> > The arm64 stacktrace code is careful to only dereference frame records
> > in valid stack ranges, ensuring that a corrupted frame record won't
> > result in a faulting access.
> > 
> > However, it's still possible for corrupt frame records to result in
> > infinite loops in the stacktrace code, which is also undesirable.
> > 
> > This patch ensures that we complete a stacktrace in finite time, by
> > keeping track of which stacks we have already completed unwinding, and
> > verifying that if the next frame record is on the same stack, it is at a
> > higher address.
> > 
> > Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Dave Martin <dave.martin@arm.com>
> > Cc: James Morse <james.morse@arm.com>
> > Cc: Tengfei Fan <tengfeif@codeaurora.org>
> > Cc: Will Deacon <will.deacon@arm.com>
> > ---
> >  arch/arm64/include/asm/stacktrace.h | 34 ++++++++++++++++++++++++++--------
> >  arch/arm64/kernel/process.c         |  2 +-
> >  arch/arm64/kernel/stacktrace.c      | 16 +++++++++++++++-
> >  arch/arm64/kernel/time.c            |  2 +-
> >  arch/arm64/kernel/traps.c           |  4 ++--
> >  5 files changed, 45 insertions(+), 13 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> > index 18f90bf1385c..4ebf8a8997b0 100644
> > --- a/arch/arm64/include/asm/stacktrace.h
> > +++ b/arch/arm64/include/asm/stacktrace.h
> > @@ -19,19 +19,12 @@
> >  #include <linux/percpu.h>
> >  #include <linux/sched.h>
> >  #include <linux/sched/task_stack.h>
> > +#include <linux/types.h>
> >  
> >  #include <asm/memory.h>
> >  #include <asm/ptrace.h>
> >  #include <asm/sdei.h>
> >  
> > -struct stackframe {
> > -	unsigned long fp;
> > -	unsigned long pc;
> > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > -	int graph;
> > -#endif
> > -};
> > -
> >  enum stack_type {
> >  	STACK_TYPE_UNKNOWN,
> >  	STACK_TYPE_TASK,
> > @@ -39,6 +32,7 @@ enum stack_type {
> >  	STACK_TYPE_OVERFLOW,
> >  	STACK_TYPE_SDEI_NORMAL,
> >  	STACK_TYPE_SDEI_CRITICAL,
> > +	__NR_STACK_TYPES
> 
> The number of stack types is actually 1 less than this, and the zeroth
> bit in stacks_done doesn't get used if we use this enum as an index.
> 
> Would STACK_TYPE_UNKNOWN = 0 fix this, or would that break something
> elsewhere?

Huh? STACK_TYPE_UNKNOWN is 0, as it's the first entry in the enum.

__NR_STACK types is used for the bitmap, where I rely on being able to
set the STACK_TYPE_UNKNOWN bit. I apprecaite it's one more than the
number of real stack types, but I wasn't able to come up with an
obviously better name.

> > @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> >  	if (!tsk)
> >  		tsk = current;
> >  
> > -	if (!on_accessible_stack(tsk, fp, NULL))
> > +	if (!on_accessible_stack(tsk, fp, &info))
> >  		return -EINVAL;
> >  
> > +	if (test_bit(info.type, frame->stacks_done))
> > +		return -EINVAL;
> 
> Doesn't this fire when we unwind a sequence of frames on the same stack
> (i.e., the common case)?
> 
> I may be missing something obvious here.
> 
> > +
> > +	if (frame->stack_current != info.type) {
> > +		set_bit(frame->stack_current, frame->stacks_done);
> 
> Oh, right, stacks_done is the set of stacks we have been on, excluding
> the current one?  If so, a comment somewhere explaining that, or some
> more explicit name, like "past_stacks" might make sense.

I thought that stacks_done was sufficient, but I guess I could
rename that to stacks_prev, to match the stack_current naming.

Thanks,
Mark.
Mark Rutland June 28, 2019, 1:02 p.m. UTC | #7
On Fri, Jun 28, 2019 at 12:15:03PM +0100, Dave Martin wrote:
> On Thu, Jun 27, 2019 at 05:24:06PM +0100, James Morse wrote:
> > Hi Mark,
> > 
> > On 06/06/2019 13:54, Mark Rutland wrote:
> > > The arm64 stacktrace code is careful to only dereference frame records
> > > in valid stack ranges, ensuring that a corrupted frame record won't
> > > result in a faulting access.
> > > 
> > > However, it's still possible for corrupt frame records to result in
> > > infinite loops in the stacktrace code, which is also undesirable.
> > > 
> > > This patch ensures that we complete a stacktrace in finite time, by
> > > keeping track of which stacks we have already completed unwinding, and
> > > verifying that if the next frame record is on the same stack, it is at a
> > > higher address.
> > 
> > I see this truncate stacks when walking from the SDEI handler...
> > 
> > 
> > > diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> > > index b00ec7d483d1..1c45b33c7474 100644
> > > --- a/arch/arm64/kernel/stacktrace.c
> > > +++ b/arch/arm64/kernel/stacktrace.c
> > > @@ -43,6 +43,8 @@
> > >  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> > >  {
> > >  	unsigned long fp = frame->fp;
> > > +	bool changed_stack = false;
> > > +	struct stack_info info;
> > >  
> > >  	if (fp & 0xf)
> > >  		return -EINVAL;
> > > @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> > >  	if (!tsk)
> > >  		tsk = current;
> > >  
> > > -	if (!on_accessible_stack(tsk, fp, NULL))
> > > +	if (!on_accessible_stack(tsk, fp, &info))
> > >  		return -EINVAL;
> > >  
> > > +	if (test_bit(info.type, frame->stacks_done))
> > > +		return -EINVAL;
> > > +
> > > +	if (frame->stack_current != info.type) {
> > > +		set_bit(frame->stack_current, frame->stacks_done);
> > > +		frame->stack_current = info.type;
> > > +		changed_stack = true;
> > > +	}
> > > +
> > >  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
> > >  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> > 
> > 
> > > +	if (!changed_stack && frame->fp <= fp)
> > > +		return -EINVAL;
> > 
> > This is where it goes wrong. changed_stack is only true for the first
> > frame on a newly visited stack. This means for the last frame of the
> > previous stack, (which must point to the next stack), we still
> > require 'frame->fp <= fp'.
> > 
> > It think like this just happens to be true for the irq stacks as they
> > are allocated early.  (whereas the SDEI stacks are allocated late).
> 
> I don't understand this.

I ended up drawing a diagram to figure this out, and realised James is
right.

> Either we are on an edge frame (changed_stack is true) or not (false).
> 
> If not, the two FPs are on the same stack and we should compare them.
> Otherwise they're on different stacks and such a comparison is nonsense.
> 
> I don't see any third situation.
> 
> So, what's wrong here?

The problem is that we unwind one frame, then check the fp of that
frame.

Say we have three stack frames, A->B->C, where A and B are on the IRQ
stack, and C is on the task stack.

At entry to unwind_frame(), frame describes A, and frame->fp points at
B. Thus frame->stack_current == info.type, and changed_stack == false.

Then we sample B:

	frame->fp = READ_ONCE(fp); // points at C on the task tasck

Then we do:

	if (!changed_stack && frame->fp <= fp)

... where changed_stack describes the A->B transition (false), but
frame->fp <= fp is the B->C transition, and B and C are on different
stacks!

Thanks,
Mark.
Mark Rutland June 28, 2019, 3:35 p.m. UTC | #8
On Thu, Jun 27, 2019 at 05:24:06PM +0100, James Morse wrote:
> Hi Mark,
> 
> On 06/06/2019 13:54, Mark Rutland wrote:
> > The arm64 stacktrace code is careful to only dereference frame records
> > in valid stack ranges, ensuring that a corrupted frame record won't
> > result in a faulting access.
> > 
> > However, it's still possible for corrupt frame records to result in
> > infinite loops in the stacktrace code, which is also undesirable.
> > 
> > This patch ensures that we complete a stacktrace in finite time, by
> > keeping track of which stacks we have already completed unwinding, and
> > verifying that if the next frame record is on the same stack, it is at a
> > higher address.
> 
> I see this truncate stacks when walking from the SDEI handler...
> 
> 
> > diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> > index b00ec7d483d1..1c45b33c7474 100644
> > --- a/arch/arm64/kernel/stacktrace.c
> > +++ b/arch/arm64/kernel/stacktrace.c
> > @@ -43,6 +43,8 @@
> >  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> >  {
> >  	unsigned long fp = frame->fp;
> > +	bool changed_stack = false;
> > +	struct stack_info info;
> >  
> >  	if (fp & 0xf)
> >  		return -EINVAL;
> > @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> >  	if (!tsk)
> >  		tsk = current;
> >  
> > -	if (!on_accessible_stack(tsk, fp, NULL))
> > +	if (!on_accessible_stack(tsk, fp, &info))
> >  		return -EINVAL;
> >  
> > +	if (test_bit(info.type, frame->stacks_done))
> > +		return -EINVAL;
> > +
> > +	if (frame->stack_current != info.type) {
> > +		set_bit(frame->stack_current, frame->stacks_done);
> > +		frame->stack_current = info.type;
> > +		changed_stack = true;
> > +	}
> > +
> >  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
> >  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> 
> 
> > +	if (!changed_stack && frame->fp <= fp)
> > +		return -EINVAL;
> 
> This is where it goes wrong. changed_stack is only true for the first frame on a newly
> visited stack. This means for the last frame of the previous stack, (which must point to
> the next stack), we still require 'frame->fp <= fp'.
> 
> It think like this just happens to be true for the irq stacks as they are allocated early.
> (whereas the SDEI stacks are allocated late).
> 
> 
> This hunk fixes it for me:
> ------------------------------------%<------------------------------------
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index 8a1fa90c784d..cb5dee233ede 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -43,7 +43,6 @@
>  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
>  {
>         unsigned long fp = frame->fp;
> -       bool changed_stack = false;
>         struct stack_info info;
> 
>         if (fp & 0xf)
> @@ -61,14 +60,16 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe
> *frame)
>         if (frame->stack_current != info.type) {
>                 set_bit(frame->stack_current, frame->stacks_done);
>                 frame->stack_current = info.type;
> -               changed_stack = true;
>         }
> 
>         frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
>         frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> 
> -       if (!changed_stack && frame->fp <= fp)
> -               return -EINVAL;
> +       if (info.low <= frame->fp && frame->fp <= info.high) {
> +               /* within stack bounds: the next frame must be older */
> +               if (frame->fp <= fp)
> +                       return -EINVAL;
> +       }

This fixes the cross-stack case, but it retains the check on the unwound
frame's fp, which may or may not be problematic, but it highlights that
we do something weird.

For frames A->B->C, we unwind A->B, then if C is on the same stack is B
we check whether B->C is sane.

I think that falls apart for cases which are already bad, e.g for:

	+---+
	| B |
	+---+
	| C |
	+---+
	| A |
	+---+

... we'd refuse to unwind A->B, whereas I think we should unwind A->B but
refuse to unwind B->C.

I think we need to keep track of the previous fp, and check that as part
of unwinding A->B. For the first unwind we can prime the frame with
STACK_TYPE_UNKNOWN, since any valid FP will have be treated as a
transition from that stack.

That seems to work in local testing, so I'll have a v2 shortly...

Thanks,
Mark.
Dave Martin July 1, 2019, 10:48 a.m. UTC | #9
On Fri, Jun 28, 2019 at 02:02:55PM +0100, Mark Rutland wrote:
> On Fri, Jun 28, 2019 at 12:15:03PM +0100, Dave Martin wrote:
> > On Thu, Jun 27, 2019 at 05:24:06PM +0100, James Morse wrote:
> > > Hi Mark,
> > > 
> > > On 06/06/2019 13:54, Mark Rutland wrote:
> > > > The arm64 stacktrace code is careful to only dereference frame records
> > > > in valid stack ranges, ensuring that a corrupted frame record won't
> > > > result in a faulting access.
> > > > 
> > > > However, it's still possible for corrupt frame records to result in
> > > > infinite loops in the stacktrace code, which is also undesirable.
> > > > 
> > > > This patch ensures that we complete a stacktrace in finite time, by
> > > > keeping track of which stacks we have already completed unwinding, and
> > > > verifying that if the next frame record is on the same stack, it is at a
> > > > higher address.
> > > 
> > > I see this truncate stacks when walking from the SDEI handler...
> > > 
> > > 
> > > > diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> > > > index b00ec7d483d1..1c45b33c7474 100644
> > > > --- a/arch/arm64/kernel/stacktrace.c
> > > > +++ b/arch/arm64/kernel/stacktrace.c
> > > > @@ -43,6 +43,8 @@
> > > >  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> > > >  {
> > > >  	unsigned long fp = frame->fp;
> > > > +	bool changed_stack = false;
> > > > +	struct stack_info info;
> > > >  
> > > >  	if (fp & 0xf)
> > > >  		return -EINVAL;
> > > > @@ -50,12 +52,24 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
> > > >  	if (!tsk)
> > > >  		tsk = current;
> > > >  
> > > > -	if (!on_accessible_stack(tsk, fp, NULL))
> > > > +	if (!on_accessible_stack(tsk, fp, &info))
> > > >  		return -EINVAL;
> > > >  
> > > > +	if (test_bit(info.type, frame->stacks_done))
> > > > +		return -EINVAL;
> > > > +
> > > > +	if (frame->stack_current != info.type) {
> > > > +		set_bit(frame->stack_current, frame->stacks_done);
> > > > +		frame->stack_current = info.type;
> > > > +		changed_stack = true;
> > > > +	}
> > > > +
> > > >  	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
> > > >  	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
> > > 
> > > 
> > > > +	if (!changed_stack && frame->fp <= fp)
> > > > +		return -EINVAL;
> > > 
> > > This is where it goes wrong. changed_stack is only true for the first
> > > frame on a newly visited stack. This means for the last frame of the
> > > previous stack, (which must point to the next stack), we still
> > > require 'frame->fp <= fp'.
> > > 
> > > It think like this just happens to be true for the irq stacks as they
> > > are allocated early.  (whereas the SDEI stacks are allocated late).
> > 
> > I don't understand this.
> 
> I ended up drawing a diagram to figure this out, and realised James is
> right.
> 
> > Either we are on an edge frame (changed_stack is true) or not (false).
> > 
> > If not, the two FPs are on the same stack and we should compare them.
> > Otherwise they're on different stacks and such a comparison is nonsense.
> > 
> > I don't see any third situation.
> > 
> > So, what's wrong here?
> 
> The problem is that we unwind one frame, then check the fp of that
> frame.
> 
> Say we have three stack frames, A->B->C, where A and B are on the IRQ
> stack, and C is on the task stack.
> 
> At entry to unwind_frame(), frame describes A, and frame->fp points at
> B. Thus frame->stack_current == info.type, and changed_stack == false.
> 
> Then we sample B:
> 
> 	frame->fp = READ_ONCE(fp); // points at C on the task tasck
> 
> Then we do:
> 
> 	if (!changed_stack && frame->fp <= fp)
> 
> ... where changed_stack describes the A->B transition (false), but
> frame->fp <= fp is the B->C transition, and B and C are on different
> stacks!

OK, if I've understood that right, it looks like frame->stack_current
describes where the contents of frame were fetched from, not the frame
at frame->fp.

This is actually pretty confusing: the frame stack_current refers to is
already history: we have no pointer to it any more anyway.

I wonder whether this can be refactored so that stack_current doesn't
lag behind: say, call it fp_stack (the stack frame->fp points at).

That's just one option though.  I'll take a look at the repost.

Cheers
---Dave
Mark Rutland July 1, 2019, 11:22 a.m. UTC | #10
On Mon, Jul 01, 2019 at 11:48:21AM +0100, Dave Martin wrote:
> On Fri, Jun 28, 2019 at 02:02:55PM +0100, Mark Rutland wrote:
> > The problem is that we unwind one frame, then check the fp of that
> > frame.
> > 
> > Say we have three stack frames, A->B->C, where A and B are on the IRQ
> > stack, and C is on the task stack.
> > 
> > At entry to unwind_frame(), frame describes A, and frame->fp points at
> > B. Thus frame->stack_current == info.type, and changed_stack == false.
> > 
> > Then we sample B:
> > 
> > 	frame->fp = READ_ONCE(fp); // points at C on the task tasck
> > 
> > Then we do:
> > 
> > 	if (!changed_stack && frame->fp <= fp)
> > 
> > ... where changed_stack describes the A->B transition (false), but
> > frame->fp <= fp is the B->C transition, and B and C are on different
> > stacks!
> 
> OK, if I've understood that right, it looks like frame->stack_current
> describes where the contents of frame were fetched from, not the frame
> at frame->fp.
> 
> This is actually pretty confusing: the frame stack_current refers to is
> already history: we have no pointer to it any more anyway.
> 
> I wonder whether this can be refactored so that stack_current doesn't
> lag behind: say, call it fp_stack (the stack frame->fp points at).
> 
> That's just one option though.  I'll take a look at the repost.

For v2 I added prev_fp, and renamed stack_current to prev_type.

We need the prev_fp so that we can do the intra-stack monotonicity
check. We can derive prev_type from prev_fp, but it was simpler to store
prev_type than to recalculate it.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 18f90bf1385c..4ebf8a8997b0 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -19,19 +19,12 @@ 
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/types.h>
 
 #include <asm/memory.h>
 #include <asm/ptrace.h>
 #include <asm/sdei.h>
 
-struct stackframe {
-	unsigned long fp;
-	unsigned long pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	int graph;
-#endif
-};
-
 enum stack_type {
 	STACK_TYPE_UNKNOWN,
 	STACK_TYPE_TASK,
@@ -39,6 +32,7 @@  enum stack_type {
 	STACK_TYPE_OVERFLOW,
 	STACK_TYPE_SDEI_NORMAL,
 	STACK_TYPE_SDEI_CRITICAL,
+	__NR_STACK_TYPES
 };
 
 struct stack_info {
@@ -47,6 +41,16 @@  struct stack_info {
 	enum stack_type type;
 };
 
+struct stackframe {
+	unsigned long fp;
+	unsigned long pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	int graph;
+#endif
+	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+	enum stack_type stack_current;
+};
+
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
 extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
@@ -128,6 +132,9 @@  static inline bool on_accessible_stack(const struct task_struct *tsk,
 				       unsigned long sp,
 				       struct stack_info *info)
 {
+	if (info)
+		info->type = STACK_TYPE_UNKNOWN;
+
 	if (on_task_stack(tsk, sp, info))
 		return true;
 	if (tsk != current || preemptible())
@@ -143,13 +150,24 @@  static inline bool on_accessible_stack(const struct task_struct *tsk,
 }
 
 static inline void start_backtrace(struct stackframe *frame,
+				   struct task_struct *tsk,
 				   unsigned long fp, unsigned long pc)
 {
+	struct stack_info info;
+
 	frame->fp = fp;
 	frame->pc = pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame->graph = 0;
 #endif
+	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+
+	/*
+	 * We need to prime stack_current for the first unwind, but we can
+	 * ignore the accessibility until the unwind occurs.
+	 */
+	on_accessible_stack(tsk, fp, &info);
+	frame->stack_current = info.type;
 }
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 122d88fccd13..ba9441982573 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -509,7 +509,7 @@  unsigned long get_wchan(struct task_struct *p)
 	if (!stack_page)
 		return 0;
 
-	start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
+	start_backtrace(&frame, p, thread_saved_fp(p), thread_saved_pc(p));
 	do {
 		if (unwind_frame(p, &frame))
 			goto out;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b00ec7d483d1..1c45b33c7474 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -43,6 +43,8 @@ 
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
 	unsigned long fp = frame->fp;
+	bool changed_stack = false;
+	struct stack_info info;
 
 	if (fp & 0xf)
 		return -EINVAL;
@@ -50,12 +52,24 @@  int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	if (!tsk)
 		tsk = current;
 
-	if (!on_accessible_stack(tsk, fp, NULL))
+	if (!on_accessible_stack(tsk, fp, &info))
 		return -EINVAL;
 
+	if (test_bit(info.type, frame->stacks_done))
+		return -EINVAL;
+
+	if (frame->stack_current != info.type) {
+		set_bit(frame->stack_current, frame->stacks_done);
+		frame->stack_current = info.type;
+		changed_stack = true;
+	}
+
 	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
 	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
 
+	if (!changed_stack && frame->fp <= fp)
+		return -EINVAL;
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk->ret_stack &&
 			(frame->pc == (unsigned long)return_to_handler)) {
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index aa3489f3a452..83f08c7e9464 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -49,7 +49,7 @@  unsigned long profile_pc(struct pt_regs *regs)
 	if (!in_lock_functions(regs->pc))
 		return regs->pc;
 
-	start_backtrace(&frame, regs->regs[29], regs->pc);
+	start_backtrace(&frame, current, regs->regs[29], regs->pc);
 	do {
 		int ret = unwind_frame(NULL, &frame);
 		if (ret < 0)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8053bbed8776..3bbe1992259e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -119,14 +119,14 @@  void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		return;
 
 	if (tsk == current) {
-		start_backtrace(&frame,
+		start_backtrace(&frame, tsk,
 				(unsigned long)__builtin_frame_address(0),
 				(unsigned long)dump_backtrace);
 	} else {
 		/*
 		 * task blocked in __switch_to
 		 */
-		start_backtrace(&frame,
+		start_backtrace(&frame, tsk,
 				thread_saved_fp(tsk),
 				thread_saved_pc(tsk));
 	}