diff mbox series

[v9,5/7] arm64: ipi_debug: Add support for backtrace using the debug IPI

Message ID 20230601143109.v9.5.I65981105e1f62550b0316625dd1e599deaf9e1aa@changeid (mailing list archive)
State New, archived
Headers show
Series arm64: Add debug IPI for backtraces / kgdb; try to use NMI for it | expand

Commit Message

Doug Anderson June 1, 2023, 9:31 p.m. UTC
From: Sumit Garg <sumit.garg@linaro.org>

Enable arch_trigger_cpumask_backtrace() support on arm64 using the new
debug IPI. With this arm64 can now get backtraces in cases where
callers of the trigger_xyz_backtrace() class of functions don't check
the return code and implement a fallback. One example is
`kernel.softlockup_all_cpu_backtrace`. This also allows us to
backtrace hard locked up CPUs in cases where the debug IPI is backed
by an NMI (or pseudo NMI).

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

Changes in v9:
- Added comments that we might not be using NMI always.
- Renamed "NMI IPI" to "debug IPI" since it might not be backed by NMI.
- arch_trigger_cpumask_backtrace() no longer returns bool

Changes in v8:
- Removed "#ifdef CONFIG_SMP" since arm64 is always SMP

 arch/arm64/include/asm/irq.h  |  3 +++
 arch/arm64/kernel/ipi_debug.c | 25 +++++++++++++++++++++++--
 2 files changed, 26 insertions(+), 2 deletions(-)

Comments

Mark Rutland Aug. 7, 2023, 10:23 a.m. UTC | #1
On Thu, Jun 01, 2023 at 02:31:49PM -0700, Douglas Anderson wrote:
> From: Sumit Garg <sumit.garg@linaro.org>
> 
> Enable arch_trigger_cpumask_backtrace() support on arm64 using the new
> debug IPI. With this arm64 can now get backtraces in cases where
> callers of the trigger_xyz_backtrace() class of functions don't check
> the return code and implement a fallback. One example is
> `kernel.softlockup_all_cpu_backtrace`. This also allows us to
> backtrace hard locked up CPUs in cases where the debug IPI is backed
> by an NMI (or pseudo NMI).
> 
> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
> 
> Changes in v9:
> - Added comments that we might not be using NMI always.
> - Renamed "NMI IPI" to "debug IPI" since it might not be backed by NMI.
> - arch_trigger_cpumask_backtrace() no longer returns bool
> 
> Changes in v8:
> - Removed "#ifdef CONFIG_SMP" since arm64 is always SMP
> 
>  arch/arm64/include/asm/irq.h  |  3 +++
>  arch/arm64/kernel/ipi_debug.c | 25 +++++++++++++++++++++++--
>  2 files changed, 26 insertions(+), 2 deletions(-)

As with prior patches, I'd prefer if this lived in smp.c with the other IPI
logic.

> 
> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
> index fac08e18bcd5..be2d103f316e 100644
> --- a/arch/arm64/include/asm/irq.h
> +++ b/arch/arm64/include/asm/irq.h
> @@ -6,6 +6,9 @@
>  
>  #include <asm-generic/irq.h>
>  
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self);
> +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> +
>  struct pt_regs;
>  
>  int set_handle_irq(void (*handle_irq)(struct pt_regs *));
> diff --git a/arch/arm64/kernel/ipi_debug.c b/arch/arm64/kernel/ipi_debug.c
> index b57833e31eaf..6984ed507e1f 100644
> --- a/arch/arm64/kernel/ipi_debug.c
> +++ b/arch/arm64/kernel/ipi_debug.c
> @@ -8,6 +8,7 @@
>  
>  #include <linux/interrupt.h>
>  #include <linux/irq.h>
> +#include <linux/nmi.h>
>  #include <linux/smp.h>
>  
>  #include "ipi_debug.h"
> @@ -24,11 +25,31 @@ void arm64_debug_ipi(cpumask_t *mask)
>  	__ipi_send_mask(ipi_debug_desc, mask);
>  }
>  
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +{
> +	/*
> +	 * NOTE: though nmi_trigger_cpumask_backtrace has "nmi_" in the name,
> +	 * nothing about it truly needs to be backed by an NMI, it's just that
> +	 * it's _allowed_ to be called from an NMI. If set_smp_debug_ipi()
> +	 * failed to get an NMI (or pseudo-NMI) this will just be backed by a
> +	 * regular IPI.
> +	 */
> +	nmi_trigger_cpumask_backtrace(mask, exclude_self, arm64_debug_ipi);
> +}
> +
>  static irqreturn_t ipi_debug_handler(int irq, void *data)
>  {
> -	/* nop, NMI handlers for special features can be added here. */
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	/*
> +	 * NOTE: Just like in arch_trigger_cpumask_backtrace(), we're calling
> +	 * a function with "nmi_" in the name but it works fine even if we
> +	 * are using a regulaor IPI.
> +	 */
> +	if (nmi_cpu_backtrace(get_irq_regs()))
> +		ret = IRQ_HANDLED;
>  

Does this need the printk_deferred_{enter,exit}() that 32-bit arm has?

Thanks,
Mark.

> -	return IRQ_NONE;
> +	return ret;
>  }
>  
>  void debug_ipi_setup(void)
> -- 
> 2.41.0.rc2.161.g9c6817b8e7-goog
>
Doug Anderson Aug. 22, 2023, 12:06 a.m. UTC | #2
Hi,

On Mon, Aug 7, 2023 at 3:23 AM Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Thu, Jun 01, 2023 at 02:31:49PM -0700, Douglas Anderson wrote:
> > From: Sumit Garg <sumit.garg@linaro.org>
> >
> > Enable arch_trigger_cpumask_backtrace() support on arm64 using the new
> > debug IPI. With this arm64 can now get backtraces in cases where
> > callers of the trigger_xyz_backtrace() class of functions don't check
> > the return code and implement a fallback. One example is
> > `kernel.softlockup_all_cpu_backtrace`. This also allows us to
> > backtrace hard locked up CPUs in cases where the debug IPI is backed
> > by an NMI (or pseudo NMI).
> >
> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > ---
> >
> > Changes in v9:
> > - Added comments that we might not be using NMI always.
> > - Renamed "NMI IPI" to "debug IPI" since it might not be backed by NMI.
> > - arch_trigger_cpumask_backtrace() no longer returns bool
> >
> > Changes in v8:
> > - Removed "#ifdef CONFIG_SMP" since arm64 is always SMP
> >
> >  arch/arm64/include/asm/irq.h  |  3 +++
> >  arch/arm64/kernel/ipi_debug.c | 25 +++++++++++++++++++++++--
> >  2 files changed, 26 insertions(+), 2 deletions(-)
>
> As with prior patches, I'd prefer if this lived in smp.c with the other IPI
> logic.
>
> >
> > diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
> > index fac08e18bcd5..be2d103f316e 100644
> > --- a/arch/arm64/include/asm/irq.h
> > +++ b/arch/arm64/include/asm/irq.h
> > @@ -6,6 +6,9 @@
> >
> >  #include <asm-generic/irq.h>
> >
> > +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self);
> > +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> > +
> >  struct pt_regs;
> >
> >  int set_handle_irq(void (*handle_irq)(struct pt_regs *));
> > diff --git a/arch/arm64/kernel/ipi_debug.c b/arch/arm64/kernel/ipi_debug.c
> > index b57833e31eaf..6984ed507e1f 100644
> > --- a/arch/arm64/kernel/ipi_debug.c
> > +++ b/arch/arm64/kernel/ipi_debug.c
> > @@ -8,6 +8,7 @@
> >
> >  #include <linux/interrupt.h>
> >  #include <linux/irq.h>
> > +#include <linux/nmi.h>
> >  #include <linux/smp.h>
> >
> >  #include "ipi_debug.h"
> > @@ -24,11 +25,31 @@ void arm64_debug_ipi(cpumask_t *mask)
> >       __ipi_send_mask(ipi_debug_desc, mask);
> >  }
> >
> > +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> > +{
> > +     /*
> > +      * NOTE: though nmi_trigger_cpumask_backtrace has "nmi_" in the name,
> > +      * nothing about it truly needs to be backed by an NMI, it's just that
> > +      * it's _allowed_ to be called from an NMI. If set_smp_debug_ipi()
> > +      * failed to get an NMI (or pseudo-NMI) this will just be backed by a
> > +      * regular IPI.
> > +      */
> > +     nmi_trigger_cpumask_backtrace(mask, exclude_self, arm64_debug_ipi);
> > +}
> > +
> >  static irqreturn_t ipi_debug_handler(int irq, void *data)
> >  {
> > -     /* nop, NMI handlers for special features can be added here. */
> > +     irqreturn_t ret = IRQ_NONE;
> > +
> > +     /*
> > +      * NOTE: Just like in arch_trigger_cpumask_backtrace(), we're calling
> > +      * a function with "nmi_" in the name but it works fine even if we
> > +      * are using a regulaor IPI.
> > +      */
> > +     if (nmi_cpu_backtrace(get_irq_regs()))
> > +             ret = IRQ_HANDLED;
> >
>
> Does this need the printk_deferred_{enter,exit}() that 32-bit arm has?

I don't _think_ so, but I also don't _think_ it's needed for arm32.
;-) Let me explain my logic and you can tell me if it sounds right to
you.

If we're doing the backtrace in pseudo-NMI context then we definitely
don't need it. Specifically, the printk_deferred_{enter,exit}() just
manages the per-cpu "printk_context" value. That value doesn't matter
if "in_nmi()" returns true.

If we're _not_ doing the backtrace in pseudo-NMI context then I think
we also don't need it. After all, if we're not in pseudo-NMI context
then it's perfectly fine to print, right?

While it's hard to know 100% for sure, my best guess is that in arm
this might have helped prevent stack traces from getting interspersed
among one another. I guess this is no longer needed as of commit
55d6af1d6688 ("lib/nmi_backtrace: explicitly serialize banner and
regs")? In any case, when I tested this earlier things seemed to
printout fine without it...

That being said, it wouldn't hurt to include it here and I'll do it if you want.

-Doug
Mark Rutland Aug. 22, 2023, 6:35 a.m. UTC | #3
On Mon, Aug 21, 2023 at 05:06:50PM -0700, Doug Anderson wrote:
> On Mon, Aug 7, 2023 at 3:23 AM Mark Rutland <mark.rutland@arm.com> wrote:
> > On Thu, Jun 01, 2023 at 02:31:49PM -0700, Douglas Anderson wrote:
> > > From: Sumit Garg <sumit.garg@linaro.org>

> > >  static irqreturn_t ipi_debug_handler(int irq, void *data)
> > >  {
> > > -     /* nop, NMI handlers for special features can be added here. */
> > > +     irqreturn_t ret = IRQ_NONE;
> > > +
> > > +     /*
> > > +      * NOTE: Just like in arch_trigger_cpumask_backtrace(), we're calling
> > > +      * a function with "nmi_" in the name but it works fine even if we
> > > +      * are using a regulaor IPI.
> > > +      */
> > > +     if (nmi_cpu_backtrace(get_irq_regs()))
> > > +             ret = IRQ_HANDLED;
> > >
> >
> > Does this need the printk_deferred_{enter,exit}() that 32-bit arm has?
> 
> I don't _think_ so, but I also don't _think_ it's needed for arm32.
> ;-) Let me explain my logic and you can tell me if it sounds right to
> you.
> 
> If we're doing the backtrace in pseudo-NMI context then we definitely
> don't need it. Specifically, the printk_deferred_{enter,exit}() just
> manages the per-cpu "printk_context" value. That value doesn't matter
> if "in_nmi()" returns true.
> 
> If we're _not_ doing the backtrace in pseudo-NMI context then I think
> we also don't need it. After all, if we're not in pseudo-NMI context
> then it's perfectly fine to print, right?
> 
> While it's hard to know 100% for sure, my best guess is that in arm
> this might have helped prevent stack traces from getting interspersed
> among one another. I guess this is no longer needed as of commit
> 55d6af1d6688 ("lib/nmi_backtrace: explicitly serialize banner and
> regs")? In any case, when I tested this earlier things seemed to
> printout fine without it...

Thanks for that explanation; that makes sense to me! Looking around a bit I see
that x86 doesn't bother either.

> That being said, it wouldn't hurt to include it here and I'll do it if you
> want.

No need -- I'm happy without it.

Thanks,
Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index fac08e18bcd5..be2d103f316e 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -6,6 +6,9 @@ 
 
 #include <asm-generic/irq.h>
 
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
 struct pt_regs;
 
 int set_handle_irq(void (*handle_irq)(struct pt_regs *));
diff --git a/arch/arm64/kernel/ipi_debug.c b/arch/arm64/kernel/ipi_debug.c
index b57833e31eaf..6984ed507e1f 100644
--- a/arch/arm64/kernel/ipi_debug.c
+++ b/arch/arm64/kernel/ipi_debug.c
@@ -8,6 +8,7 @@ 
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/nmi.h>
 #include <linux/smp.h>
 
 #include "ipi_debug.h"
@@ -24,11 +25,31 @@  void arm64_debug_ipi(cpumask_t *mask)
 	__ipi_send_mask(ipi_debug_desc, mask);
 }
 
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	/*
+	 * NOTE: though nmi_trigger_cpumask_backtrace has "nmi_" in the name,
+	 * nothing about it truly needs to be backed by an NMI, it's just that
+	 * it's _allowed_ to be called from an NMI. If set_smp_debug_ipi()
+	 * failed to get an NMI (or pseudo-NMI) this will just be backed by a
+	 * regular IPI.
+	 */
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, arm64_debug_ipi);
+}
+
 static irqreturn_t ipi_debug_handler(int irq, void *data)
 {
-	/* nop, NMI handlers for special features can be added here. */
+	irqreturn_t ret = IRQ_NONE;
+
+	/*
+	 * NOTE: Just like in arch_trigger_cpumask_backtrace(), we're calling
+	 * a function with "nmi_" in the name but it works fine even if we
+	 * are using a regulaor IPI.
+	 */
+	if (nmi_cpu_backtrace(get_irq_regs()))
+		ret = IRQ_HANDLED;
 
-	return IRQ_NONE;
+	return ret;
 }
 
 void debug_ipi_setup(void)