diff mbox series

arm64: efi: Account for the EFI runtime stack in stack unwinder

Message ID 20221209133414.3330761-1-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: efi: Account for the EFI runtime stack in stack unwinder | expand

Commit Message

Ard Biesheuvel Dec. 9, 2022, 1:34 p.m. UTC
The EFI runtime services run from a dedicated stack now, and so the
stack unwinder needs to be informed about this.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---

I realised while looking into this that comparing current_work() against
efi_rts_work.work is not sufficient to decide whether current is running
EFI code, given that the ACPI subsystem will call efi_call_virt_pointer()
directly.

So instead, we can check whether the stashed thread stack pointer value
matches current's thread stack if the EFI runtime stack is currently in
use:

#define current_in_efi()                                               \
       (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
        on_task_stack(current, efi_rt_stack_top[-1], 1))

but this will be folded into the preceding patch, which I am not
reproducing here.

 arch/arm64/include/asm/stacktrace.h | 15 +++++++++++++++
 arch/arm64/kernel/stacktrace.c      | 12 ++++++++++++
 2 files changed, 27 insertions(+)

Comments

Mark Rutland Dec. 9, 2022, 2:37 p.m. UTC | #1
On Fri, Dec 09, 2022 at 02:34:14PM +0100, Ard Biesheuvel wrote:
> The EFI runtime services run from a dedicated stack now, and so the
> stack unwinder needs to be informed about this.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> 
> I realised while looking into this that comparing current_work() against
> efi_rts_work.work is not sufficient to decide whether current is running
> EFI code, given that the ACPI subsystem will call efi_call_virt_pointer()
> directly.
> 
> So instead, we can check whether the stashed thread stack pointer value
> matches current's thread stack if the EFI runtime stack is currently in
> use:
> 
> #define current_in_efi()                                               \
>        (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
>         on_task_stack(current, efi_rt_stack_top[-1], 1))

Unless you're overwriting task_struct::stack (which seems scary to me), that
doesn't look right; on_task_stack() checks whether a given base + size is on
the stack allocated for the task (i.e. task_struct::stack + THREAD_SIZE), not
the stack the task is currently using.

I would expect this to be something like:

#define current_in_efi()						\
	(!preemptible() && spin_is_locked(&efi_rt_lock) &&		\
	 stackinfo_on_stack(stackinfo_get_efi(), current_stack_pointer, 1))

... or an inline function given this is sufficiently painful as a macro.

... unless I've confused myself?

FWIW, the patch belows looks good to me!

Mark.

> but this will be folded into the preceding patch, which I am not
> reproducing here.
> 
>  arch/arm64/include/asm/stacktrace.h | 15 +++++++++++++++
>  arch/arm64/kernel/stacktrace.c      | 12 ++++++++++++
>  2 files changed, 27 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> index 5a0edb064ea478bb..327cdcfcb1db0ad5 100644
> --- a/arch/arm64/include/asm/stacktrace.h
> +++ b/arch/arm64/include/asm/stacktrace.h
> @@ -104,4 +104,19 @@ static inline struct stack_info stackinfo_get_sdei_critical(void)
>  #define stackinfo_get_sdei_critical()	stackinfo_get_unknown()
>  #endif
>  
> +#ifdef CONFIG_EFI
> +extern u64 *efi_rt_stack_top;
> +
> +static inline struct stack_info stackinfo_get_efi(void)
> +{
> +	unsigned long high = (u64)efi_rt_stack_top;
> +	unsigned long low = high - THREAD_SIZE;
> +
> +	return (struct stack_info) {
> +		.low = low,
> +		.high = high,
> +	};
> +}
> +#endif
> +
>  #endif	/* __ASM_STACKTRACE_H */
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index 634279b3b03d1b07..ee9fd2018cd75ed2 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -5,6 +5,7 @@
>   * Copyright (C) 2012 ARM Ltd.
>   */
>  #include <linux/kernel.h>
> +#include <linux/efi.h>
>  #include <linux/export.h>
>  #include <linux/ftrace.h>
>  #include <linux/sched.h>
> @@ -12,6 +13,7 @@
>  #include <linux/sched/task_stack.h>
>  #include <linux/stacktrace.h>
>  
> +#include <asm/efi.h>
>  #include <asm/irq.h>
>  #include <asm/stack_pointer.h>
>  #include <asm/stacktrace.h>
> @@ -186,6 +188,13 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
>  			: stackinfo_get_unknown();		\
>  	})
>  
> +#define STACKINFO_EFI						\
> +	({							\
> +		((task == current) && current_in_efi())		\
> +			? stackinfo_get_efi()			\
> +			: stackinfo_get_unknown();		\
> +	})
> +
>  noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
>  			      void *cookie, struct task_struct *task,
>  			      struct pt_regs *regs)
> @@ -199,6 +208,9 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
>  #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
>  		STACKINFO_SDEI(normal),
>  		STACKINFO_SDEI(critical),
> +#endif
> +#ifdef CONFIG_EFI
> +		STACKINFO_EFI,
>  #endif
>  	};
>  	struct unwind_state state = {
> -- 
> 2.35.1
>
Ard Biesheuvel Dec. 9, 2022, 2:46 p.m. UTC | #2
On Fri, 9 Dec 2022 at 15:37, Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Fri, Dec 09, 2022 at 02:34:14PM +0100, Ard Biesheuvel wrote:
> > The EFI runtime services run from a dedicated stack now, and so the
> > stack unwinder needs to be informed about this.
> >
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> >
> > I realised while looking into this that comparing current_work() against
> > efi_rts_work.work is not sufficient to decide whether current is running
> > EFI code, given that the ACPI subsystem will call efi_call_virt_pointer()
> > directly.
> >
> > So instead, we can check whether the stashed thread stack pointer value
> > matches current's thread stack if the EFI runtime stack is currently in
> > use:
> >
> > #define current_in_efi()                                               \
> >        (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
> >         on_task_stack(current, efi_rt_stack_top[-1], 1))
>
> Unless you're overwriting task_struct::stack (which seems scary to me), that
> doesn't look right; on_task_stack() checks whether a given base + size is on
> the stack allocated for the task (i.e. task_struct::stack + THREAD_SIZE), not
> the stack the task is currently using.
>

Note the [-1].

efi_rt_stack_top[-1] contains the value the stack pointer had before
switching to the EFI runtime stack. If that value is an address
covered by current's thread stack, current must be the task that has a
live call frame inside the EFI code at the time the call stack is
captured.

> I would expect this to be something like:
>
> #define current_in_efi()                                                \
>         (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
>          stackinfo_on_stack(stackinfo_get_efi(), current_stack_pointer, 1))
>
> ... or an inline function given this is sufficiently painful as a macro.
>

current_stack_pointer is the actual value of SP at the time this code
is called. So if we are unwinding from a sync exception taken while
handling an IRQ that arrived while running the EFI code, that SP value
has nothing to do with the EFI stack.


> ... unless I've confused myself?
>

I think you might have ... :-)

> FWIW, the patch belows looks good to me!
>
> Mark.
>

Cheers



> > but this will be folded into the preceding patch, which I am not
> > reproducing here.
> >
> >  arch/arm64/include/asm/stacktrace.h | 15 +++++++++++++++
> >  arch/arm64/kernel/stacktrace.c      | 12 ++++++++++++
> >  2 files changed, 27 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
> > index 5a0edb064ea478bb..327cdcfcb1db0ad5 100644
> > --- a/arch/arm64/include/asm/stacktrace.h
> > +++ b/arch/arm64/include/asm/stacktrace.h
> > @@ -104,4 +104,19 @@ static inline struct stack_info stackinfo_get_sdei_critical(void)
> >  #define stackinfo_get_sdei_critical()        stackinfo_get_unknown()
> >  #endif
> >
> > +#ifdef CONFIG_EFI
> > +extern u64 *efi_rt_stack_top;
> > +
> > +static inline struct stack_info stackinfo_get_efi(void)
> > +{
> > +     unsigned long high = (u64)efi_rt_stack_top;
> > +     unsigned long low = high - THREAD_SIZE;
> > +
> > +     return (struct stack_info) {
> > +             .low = low,
> > +             .high = high,
> > +     };
> > +}
> > +#endif
> > +
> >  #endif       /* __ASM_STACKTRACE_H */
> > diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> > index 634279b3b03d1b07..ee9fd2018cd75ed2 100644
> > --- a/arch/arm64/kernel/stacktrace.c
> > +++ b/arch/arm64/kernel/stacktrace.c
> > @@ -5,6 +5,7 @@
> >   * Copyright (C) 2012 ARM Ltd.
> >   */
> >  #include <linux/kernel.h>
> > +#include <linux/efi.h>
> >  #include <linux/export.h>
> >  #include <linux/ftrace.h>
> >  #include <linux/sched.h>
> > @@ -12,6 +13,7 @@
> >  #include <linux/sched/task_stack.h>
> >  #include <linux/stacktrace.h>
> >
> > +#include <asm/efi.h>
> >  #include <asm/irq.h>
> >  #include <asm/stack_pointer.h>
> >  #include <asm/stacktrace.h>
> > @@ -186,6 +188,13 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
> >                       : stackinfo_get_unknown();              \
> >       })
> >
> > +#define STACKINFO_EFI                                                \
> > +     ({                                                      \
> > +             ((task == current) && current_in_efi())         \
> > +                     ? stackinfo_get_efi()                   \
> > +                     : stackinfo_get_unknown();              \
> > +     })
> > +
> >  noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> >                             void *cookie, struct task_struct *task,
> >                             struct pt_regs *regs)
> > @@ -199,6 +208,9 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> >  #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
> >               STACKINFO_SDEI(normal),
> >               STACKINFO_SDEI(critical),
> > +#endif
> > +#ifdef CONFIG_EFI
> > +             STACKINFO_EFI,
> >  #endif
> >       };
> >       struct unwind_state state = {
> > --
> > 2.35.1
> >
Mark Rutland Dec. 9, 2022, 3 p.m. UTC | #3
On Fri, Dec 09, 2022 at 03:46:48PM +0100, Ard Biesheuvel wrote:
> On Fri, 9 Dec 2022 at 15:37, Mark Rutland <mark.rutland@arm.com> wrote:
> >
> > On Fri, Dec 09, 2022 at 02:34:14PM +0100, Ard Biesheuvel wrote:
> > > The EFI runtime services run from a dedicated stack now, and so the
> > > stack unwinder needs to be informed about this.
> > >
> > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > ---
> > >
> > > I realised while looking into this that comparing current_work() against
> > > efi_rts_work.work is not sufficient to decide whether current is running
> > > EFI code, given that the ACPI subsystem will call efi_call_virt_pointer()
> > > directly.
> > >
> > > So instead, we can check whether the stashed thread stack pointer value
> > > matches current's thread stack if the EFI runtime stack is currently in
> > > use:
> > >
> > > #define current_in_efi()                                               \
> > >        (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
> > >         on_task_stack(current, efi_rt_stack_top[-1], 1))
> >
> > Unless you're overwriting task_struct::stack (which seems scary to me), that
> > doesn't look right; on_task_stack() checks whether a given base + size is on
> > the stack allocated for the task (i.e. task_struct::stack + THREAD_SIZE), not
> > the stack the task is currently using.
> >
> 
> Note the [-1].
> 
> efi_rt_stack_top[-1] contains the value the stack pointer had before
> switching to the EFI runtime stack. If that value is an address
> covered by current's thread stack, current must be the task that has a
> live call frame inside the EFI code at the time the call stack is
> captured.

Ah, I had missed that subtlety.

Would you mind if we add that first sentence as a comment for that code, i.e.

| /*
|  * efi_rt_stack_top[-1] contains the value the stack pointer had before
|  * switching to the EFI runtime stack.
|  */
|  #define current_in_efi()                                               \
|         (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
|          on_task_stack(current, efi_rt_stack_top[-1], 1))

... that way when I look at this in 3 to 6 months time I won't fall into the
same trap. :)

I assume that the EFI trampoline code clobbers the value on the way out so it
doesn't spruriously match later.

> > I would expect this to be something like:
> >
> > #define current_in_efi()                                                \
> >         (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
> >          stackinfo_on_stack(stackinfo_get_efi(), current_stack_pointer, 1))
> >
> > ... or an inline function given this is sufficiently painful as a macro.
> 
> current_stack_pointer is the actual value of SP at the time this code
> is called. So if we are unwinding from a sync exception taken while
> handling an IRQ that arrived while running the EFI code, that SP value
> has nothing to do with the EFI stack.

Yes, good point.

> > ... unless I've confused myself?
> >
> 
> I think you might have ... :-)

:)

Mark.
Ard Biesheuvel Dec. 9, 2022, 3:10 p.m. UTC | #4
On Fri, 9 Dec 2022 at 16:00, Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Fri, Dec 09, 2022 at 03:46:48PM +0100, Ard Biesheuvel wrote:
> > On Fri, 9 Dec 2022 at 15:37, Mark Rutland <mark.rutland@arm.com> wrote:
> > >
> > > On Fri, Dec 09, 2022 at 02:34:14PM +0100, Ard Biesheuvel wrote:
> > > > The EFI runtime services run from a dedicated stack now, and so the
> > > > stack unwinder needs to be informed about this.
> > > >
> > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > > ---
> > > >
> > > > I realised while looking into this that comparing current_work() against
> > > > efi_rts_work.work is not sufficient to decide whether current is running
> > > > EFI code, given that the ACPI subsystem will call efi_call_virt_pointer()
> > > > directly.
> > > >
> > > > So instead, we can check whether the stashed thread stack pointer value
> > > > matches current's thread stack if the EFI runtime stack is currently in
> > > > use:
> > > >
> > > > #define current_in_efi()                                               \
> > > >        (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
> > > >         on_task_stack(current, efi_rt_stack_top[-1], 1))
> > >
> > > Unless you're overwriting task_struct::stack (which seems scary to me), that
> > > doesn't look right; on_task_stack() checks whether a given base + size is on
> > > the stack allocated for the task (i.e. task_struct::stack + THREAD_SIZE), not
> > > the stack the task is currently using.
> > >
> >
> > Note the [-1].
> >
> > efi_rt_stack_top[-1] contains the value the stack pointer had before
> > switching to the EFI runtime stack. If that value is an address
> > covered by current's thread stack, current must be the task that has a
> > live call frame inside the EFI code at the time the call stack is
> > captured.
>
> Ah, I had missed that subtlety.
>
> Would you mind if we add that first sentence as a comment for that code, i.e.
>
> | /*
> |  * efi_rt_stack_top[-1] contains the value the stack pointer had before
> |  * switching to the EFI runtime stack.
> |  */
> |  #define current_in_efi()                                               \
> |         (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
> |          on_task_stack(current, efi_rt_stack_top[-1], 1))
>
> ... that way when I look at this in 3 to 6 months time I won't fall into the
> same trap. :)
>

Will do.

> I assume that the EFI trampoline code clobbers the value on the way out so it
> doesn't spruriously match later.
>

Not currently, no. But that's easily added.


> > > I would expect this to be something like:
> > >
> > > #define current_in_efi()                                                \
> > >         (!preemptible() && spin_is_locked(&efi_rt_lock) &&              \
> > >          stackinfo_on_stack(stackinfo_get_efi(), current_stack_pointer, 1))
> > >
> > > ... or an inline function given this is sufficiently painful as a macro.
> >
> > current_stack_pointer is the actual value of SP at the time this code
> > is called. So if we are unwinding from a sync exception taken while
> > handling an IRQ that arrived while running the EFI code, that SP value
> > has nothing to do with the EFI stack.
>
> Yes, good point.
>
> > > ... unless I've confused myself?
> > >
> >
> > I think you might have ... :-)
>
> :)
>
> Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 5a0edb064ea478bb..327cdcfcb1db0ad5 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -104,4 +104,19 @@  static inline struct stack_info stackinfo_get_sdei_critical(void)
 #define stackinfo_get_sdei_critical()	stackinfo_get_unknown()
 #endif
 
+#ifdef CONFIG_EFI
+extern u64 *efi_rt_stack_top;
+
+static inline struct stack_info stackinfo_get_efi(void)
+{
+	unsigned long high = (u64)efi_rt_stack_top;
+	unsigned long low = high - THREAD_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+#endif
+
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 634279b3b03d1b07..ee9fd2018cd75ed2 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -5,6 +5,7 @@ 
  * Copyright (C) 2012 ARM Ltd.
  */
 #include <linux/kernel.h>
+#include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/ftrace.h>
 #include <linux/sched.h>
@@ -12,6 +13,7 @@ 
 #include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 
+#include <asm/efi.h>
 #include <asm/irq.h>
 #include <asm/stack_pointer.h>
 #include <asm/stacktrace.h>
@@ -186,6 +188,13 @@  void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
 			: stackinfo_get_unknown();		\
 	})
 
+#define STACKINFO_EFI						\
+	({							\
+		((task == current) && current_in_efi())		\
+			? stackinfo_get_efi()			\
+			: stackinfo_get_unknown();		\
+	})
+
 noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
 			      void *cookie, struct task_struct *task,
 			      struct pt_regs *regs)
@@ -199,6 +208,9 @@  noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
 #if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
 		STACKINFO_SDEI(normal),
 		STACKINFO_SDEI(critical),
+#endif
+#ifdef CONFIG_EFI
+		STACKINFO_EFI,
 #endif
 	};
 	struct unwind_state state = {