diff mbox series

[v5,5/5] arm64: ipi_nmi: Add support for NMI backtrace

Message ID 1602673931-28782-6-git-send-email-sumit.garg@linaro.org (mailing list archive)
State New, archived
Headers show
Series arm64: Add framework to turn an IPI as NMI | expand

Commit Message

Sumit Garg Oct. 14, 2020, 11:12 a.m. UTC
Enable NMI backtrace support on arm64 using IPI turned as an NMI
leveraging pseudo NMIs support. It is now possible for users to get a
backtrace of a CPU stuck in hard-lockup using magic SYSRQ.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
---
 arch/arm64/include/asm/irq.h |  6 ++++++
 arch/arm64/kernel/ipi_nmi.c  | 12 +++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

Comments

Masayoshi Mizuma Oct. 15, 2020, 1:17 a.m. UTC | #1
On Wed, Oct 14, 2020 at 04:42:11PM +0530, Sumit Garg wrote:
> Enable NMI backtrace support on arm64 using IPI turned as an NMI
> leveraging pseudo NMIs support. It is now possible for users to get a
> backtrace of a CPU stuck in hard-lockup using magic SYSRQ.
> 
> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> ---
>  arch/arm64/include/asm/irq.h |  6 ++++++
>  arch/arm64/kernel/ipi_nmi.c  | 12 +++++++++++-
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
> index b2b0c64..e840bf1 100644
> --- a/arch/arm64/include/asm/irq.h
> +++ b/arch/arm64/include/asm/irq.h
> @@ -6,6 +6,12 @@
>  
>  #include <asm-generic/irq.h>
>  
> +#ifdef CONFIG_SMP
> +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> +					   bool exclude_self);
> +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> +#endif
> +
>  struct pt_regs;
>  
>  static inline int nr_legacy_irqs(void)
> diff --git a/arch/arm64/kernel/ipi_nmi.c b/arch/arm64/kernel/ipi_nmi.c
> index e0a9e03..e1dc19d 100644
> --- a/arch/arm64/kernel/ipi_nmi.c
> +++ b/arch/arm64/kernel/ipi_nmi.c
> @@ -9,6 +9,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/irq.h>
>  #include <linux/kgdb.h>
> +#include <linux/nmi.h>
>  #include <linux/smp.h>
>  
>  #include <asm/nmi.h>
> @@ -25,12 +26,21 @@ void arch_send_call_nmi_func_ipi_mask(cpumask_t *mask)
>  	__ipi_send_mask(ipi_desc, mask);
>  }
>  
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
> +{
> +	nmi_trigger_cpumask_backtrace(mask, exclude_self,
> +				      arch_send_call_nmi_func_ipi_mask);
> +}
> +
>  static irqreturn_t ipi_nmi_handler(int irq, void *data)
>  {
>  	unsigned int cpu = smp_processor_id();
>  
> -	ipi_kgdb_nmicallback(cpu, get_irq_regs());
> +	if (nmi_cpu_backtrace(get_irq_regs()))
> +		goto out;
>  
> +	ipi_kgdb_nmicallback(cpu, get_irq_regs());
> +out:
>  	return IRQ_HANDLED;
>  }
>  
> -- 

It works well. Please feel free to add:

        Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>

Thanks!
Masa
Marc Zyngier Oct. 19, 2020, 12:20 p.m. UTC | #2
On 2020-10-14 12:12, Sumit Garg wrote:
> Enable NMI backtrace support on arm64 using IPI turned as an NMI
> leveraging pseudo NMIs support. It is now possible for users to get a
> backtrace of a CPU stuck in hard-lockup using magic SYSRQ.
> 
> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> ---
>  arch/arm64/include/asm/irq.h |  6 ++++++
>  arch/arm64/kernel/ipi_nmi.c  | 12 +++++++++++-
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/irq.h 
> b/arch/arm64/include/asm/irq.h
> index b2b0c64..e840bf1 100644
> --- a/arch/arm64/include/asm/irq.h
> +++ b/arch/arm64/include/asm/irq.h
> @@ -6,6 +6,12 @@
> 
>  #include <asm-generic/irq.h>
> 
> +#ifdef CONFIG_SMP
> +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> +					   bool exclude_self);
> +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> +#endif
> +
>  struct pt_regs;
> 
>  static inline int nr_legacy_irqs(void)
> diff --git a/arch/arm64/kernel/ipi_nmi.c b/arch/arm64/kernel/ipi_nmi.c
> index e0a9e03..e1dc19d 100644
> --- a/arch/arm64/kernel/ipi_nmi.c
> +++ b/arch/arm64/kernel/ipi_nmi.c
> @@ -9,6 +9,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/irq.h>
>  #include <linux/kgdb.h>
> +#include <linux/nmi.h>
>  #include <linux/smp.h>
> 
>  #include <asm/nmi.h>
> @@ -25,12 +26,21 @@ void arch_send_call_nmi_func_ipi_mask(cpumask_t 
> *mask)
>  	__ipi_send_mask(ipi_desc, mask);
>  }
> 
> +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool 
> exclude_self)
> +{
> +	nmi_trigger_cpumask_backtrace(mask, exclude_self,
> +				      arch_send_call_nmi_func_ipi_mask);
> +}
> +
>  static irqreturn_t ipi_nmi_handler(int irq, void *data)
>  {
>  	unsigned int cpu = smp_processor_id();
> 
> -	ipi_kgdb_nmicallback(cpu, get_irq_regs());
> +	if (nmi_cpu_backtrace(get_irq_regs()))
> +		goto out;
> 
> +	ipi_kgdb_nmicallback(cpu, get_irq_regs());
> +out:
>  	return IRQ_HANDLED;
>  }

Can't you have *both* a request for a backtrace and a KGDB call?
It really shouldn't be either/or. It also outlines how well shared
interrupts work with edge triggered signalling...

         M.
Sumit Garg Oct. 20, 2020, 9:13 a.m. UTC | #3
On Mon, 19 Oct 2020 at 17:50, Marc Zyngier <maz@kernel.org> wrote:
>
> On 2020-10-14 12:12, Sumit Garg wrote:
> > Enable NMI backtrace support on arm64 using IPI turned as an NMI
> > leveraging pseudo NMIs support. It is now possible for users to get a
> > backtrace of a CPU stuck in hard-lockup using magic SYSRQ.
> >
> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> > ---
> >  arch/arm64/include/asm/irq.h |  6 ++++++
> >  arch/arm64/kernel/ipi_nmi.c  | 12 +++++++++++-
> >  2 files changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/arm64/include/asm/irq.h
> > b/arch/arm64/include/asm/irq.h
> > index b2b0c64..e840bf1 100644
> > --- a/arch/arm64/include/asm/irq.h
> > +++ b/arch/arm64/include/asm/irq.h
> > @@ -6,6 +6,12 @@
> >
> >  #include <asm-generic/irq.h>
> >
> > +#ifdef CONFIG_SMP
> > +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> > +                                        bool exclude_self);
> > +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> > +#endif
> > +
> >  struct pt_regs;
> >
> >  static inline int nr_legacy_irqs(void)
> > diff --git a/arch/arm64/kernel/ipi_nmi.c b/arch/arm64/kernel/ipi_nmi.c
> > index e0a9e03..e1dc19d 100644
> > --- a/arch/arm64/kernel/ipi_nmi.c
> > +++ b/arch/arm64/kernel/ipi_nmi.c
> > @@ -9,6 +9,7 @@
> >  #include <linux/interrupt.h>
> >  #include <linux/irq.h>
> >  #include <linux/kgdb.h>
> > +#include <linux/nmi.h>
> >  #include <linux/smp.h>
> >
> >  #include <asm/nmi.h>
> > @@ -25,12 +26,21 @@ void arch_send_call_nmi_func_ipi_mask(cpumask_t
> > *mask)
> >       __ipi_send_mask(ipi_desc, mask);
> >  }
> >
> > +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool
> > exclude_self)
> > +{
> > +     nmi_trigger_cpumask_backtrace(mask, exclude_self,
> > +                                   arch_send_call_nmi_func_ipi_mask);
> > +}
> > +
> >  static irqreturn_t ipi_nmi_handler(int irq, void *data)
> >  {
> >       unsigned int cpu = smp_processor_id();
> >
> > -     ipi_kgdb_nmicallback(cpu, get_irq_regs());
> > +     if (nmi_cpu_backtrace(get_irq_regs()))
> > +             goto out;
> >
> > +     ipi_kgdb_nmicallback(cpu, get_irq_regs());
> > +out:
> >       return IRQ_HANDLED;
> >  }
>
> Can't you have *both* a request for a backtrace and a KGDB call?
> It really shouldn't be either/or. It also outlines how well shared
> interrupts work with edge triggered signalling...

Unfortunately, NMIs doesn't seem to support shared mode [1].

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq/manage.c#n1480

-Sumit

>
>          M.
> --
> Jazz is not dead. It just smells funny...
Marc Zyngier Oct. 21, 2020, 10:32 a.m. UTC | #4
On 2020-10-20 10:13, Sumit Garg wrote:
> On Mon, 19 Oct 2020 at 17:50, Marc Zyngier <maz@kernel.org> wrote:
>> 
>> On 2020-10-14 12:12, Sumit Garg wrote:
>> > Enable NMI backtrace support on arm64 using IPI turned as an NMI
>> > leveraging pseudo NMIs support. It is now possible for users to get a
>> > backtrace of a CPU stuck in hard-lockup using magic SYSRQ.
>> >
>> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
>> > ---
>> >  arch/arm64/include/asm/irq.h |  6 ++++++
>> >  arch/arm64/kernel/ipi_nmi.c  | 12 +++++++++++-
>> >  2 files changed, 17 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/arch/arm64/include/asm/irq.h
>> > b/arch/arm64/include/asm/irq.h
>> > index b2b0c64..e840bf1 100644
>> > --- a/arch/arm64/include/asm/irq.h
>> > +++ b/arch/arm64/include/asm/irq.h
>> > @@ -6,6 +6,12 @@
>> >
>> >  #include <asm-generic/irq.h>
>> >
>> > +#ifdef CONFIG_SMP
>> > +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
>> > +                                        bool exclude_self);
>> > +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
>> > +#endif
>> > +
>> >  struct pt_regs;
>> >
>> >  static inline int nr_legacy_irqs(void)
>> > diff --git a/arch/arm64/kernel/ipi_nmi.c b/arch/arm64/kernel/ipi_nmi.c
>> > index e0a9e03..e1dc19d 100644
>> > --- a/arch/arm64/kernel/ipi_nmi.c
>> > +++ b/arch/arm64/kernel/ipi_nmi.c
>> > @@ -9,6 +9,7 @@
>> >  #include <linux/interrupt.h>
>> >  #include <linux/irq.h>
>> >  #include <linux/kgdb.h>
>> > +#include <linux/nmi.h>
>> >  #include <linux/smp.h>
>> >
>> >  #include <asm/nmi.h>
>> > @@ -25,12 +26,21 @@ void arch_send_call_nmi_func_ipi_mask(cpumask_t
>> > *mask)
>> >       __ipi_send_mask(ipi_desc, mask);
>> >  }
>> >
>> > +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool
>> > exclude_self)
>> > +{
>> > +     nmi_trigger_cpumask_backtrace(mask, exclude_self,
>> > +                                   arch_send_call_nmi_func_ipi_mask);
>> > +}
>> > +
>> >  static irqreturn_t ipi_nmi_handler(int irq, void *data)
>> >  {
>> >       unsigned int cpu = smp_processor_id();
>> >
>> > -     ipi_kgdb_nmicallback(cpu, get_irq_regs());
>> > +     if (nmi_cpu_backtrace(get_irq_regs()))
>> > +             goto out;
>> >
>> > +     ipi_kgdb_nmicallback(cpu, get_irq_regs());
>> > +out:
>> >       return IRQ_HANDLED;
>> >  }
>> 
>> Can't you have *both* a request for a backtrace and a KGDB call?
>> It really shouldn't be either/or. It also outlines how well shared
>> interrupts work with edge triggered signalling...
> 
> Unfortunately, NMIs doesn't seem to support shared mode [1].

You are totally missing the point: shared interrupts *cannot* work
reliably with edge signalling. What I am saying here is that you need
implement the sharing yourself in this function.

         M.
Sumit Garg Oct. 21, 2020, 11:28 a.m. UTC | #5
On Wed, 21 Oct 2020 at 16:02, Marc Zyngier <maz@kernel.org> wrote:
>
> On 2020-10-20 10:13, Sumit Garg wrote:
> > On Mon, 19 Oct 2020 at 17:50, Marc Zyngier <maz@kernel.org> wrote:
> >>
> >> On 2020-10-14 12:12, Sumit Garg wrote:
> >> > Enable NMI backtrace support on arm64 using IPI turned as an NMI
> >> > leveraging pseudo NMIs support. It is now possible for users to get a
> >> > backtrace of a CPU stuck in hard-lockup using magic SYSRQ.
> >> >
> >> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
> >> > ---
> >> >  arch/arm64/include/asm/irq.h |  6 ++++++
> >> >  arch/arm64/kernel/ipi_nmi.c  | 12 +++++++++++-
> >> >  2 files changed, 17 insertions(+), 1 deletion(-)
> >> >
> >> > diff --git a/arch/arm64/include/asm/irq.h
> >> > b/arch/arm64/include/asm/irq.h
> >> > index b2b0c64..e840bf1 100644
> >> > --- a/arch/arm64/include/asm/irq.h
> >> > +++ b/arch/arm64/include/asm/irq.h
> >> > @@ -6,6 +6,12 @@
> >> >
> >> >  #include <asm-generic/irq.h>
> >> >
> >> > +#ifdef CONFIG_SMP
> >> > +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
> >> > +                                        bool exclude_self);
> >> > +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
> >> > +#endif
> >> > +
> >> >  struct pt_regs;
> >> >
> >> >  static inline int nr_legacy_irqs(void)
> >> > diff --git a/arch/arm64/kernel/ipi_nmi.c b/arch/arm64/kernel/ipi_nmi.c
> >> > index e0a9e03..e1dc19d 100644
> >> > --- a/arch/arm64/kernel/ipi_nmi.c
> >> > +++ b/arch/arm64/kernel/ipi_nmi.c
> >> > @@ -9,6 +9,7 @@
> >> >  #include <linux/interrupt.h>
> >> >  #include <linux/irq.h>
> >> >  #include <linux/kgdb.h>
> >> > +#include <linux/nmi.h>
> >> >  #include <linux/smp.h>
> >> >
> >> >  #include <asm/nmi.h>
> >> > @@ -25,12 +26,21 @@ void arch_send_call_nmi_func_ipi_mask(cpumask_t
> >> > *mask)
> >> >       __ipi_send_mask(ipi_desc, mask);
> >> >  }
> >> >
> >> > +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool
> >> > exclude_self)
> >> > +{
> >> > +     nmi_trigger_cpumask_backtrace(mask, exclude_self,
> >> > +                                   arch_send_call_nmi_func_ipi_mask);
> >> > +}
> >> > +
> >> >  static irqreturn_t ipi_nmi_handler(int irq, void *data)
> >> >  {
> >> >       unsigned int cpu = smp_processor_id();
> >> >
> >> > -     ipi_kgdb_nmicallback(cpu, get_irq_regs());
> >> > +     if (nmi_cpu_backtrace(get_irq_regs()))
> >> > +             goto out;
> >> >
> >> > +     ipi_kgdb_nmicallback(cpu, get_irq_regs());
> >> > +out:
> >> >       return IRQ_HANDLED;
> >> >  }
> >>
> >> Can't you have *both* a request for a backtrace and a KGDB call?
> >> It really shouldn't be either/or. It also outlines how well shared
> >> interrupts work with edge triggered signalling...
> >
> > Unfortunately, NMIs doesn't seem to support shared mode [1].
>
> You are totally missing the point: shared interrupts *cannot* work
> reliably with edge signalling. What I am saying here is that you need
> implement the sharing yourself in this function.

Ah, I see your point now. Will instead share this IPI among both handlers.

-Sumit

>
>          M.
> --
> Jazz is not dead. It just smells funny...
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b2b0c64..e840bf1 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -6,6 +6,12 @@ 
 
 #include <asm-generic/irq.h>
 
+#ifdef CONFIG_SMP
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+					   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+#endif
+
 struct pt_regs;
 
 static inline int nr_legacy_irqs(void)
diff --git a/arch/arm64/kernel/ipi_nmi.c b/arch/arm64/kernel/ipi_nmi.c
index e0a9e03..e1dc19d 100644
--- a/arch/arm64/kernel/ipi_nmi.c
+++ b/arch/arm64/kernel/ipi_nmi.c
@@ -9,6 +9,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kgdb.h>
+#include <linux/nmi.h>
 #include <linux/smp.h>
 
 #include <asm/nmi.h>
@@ -25,12 +26,21 @@  void arch_send_call_nmi_func_ipi_mask(cpumask_t *mask)
 	__ipi_send_mask(ipi_desc, mask);
 }
 
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	nmi_trigger_cpumask_backtrace(mask, exclude_self,
+				      arch_send_call_nmi_func_ipi_mask);
+}
+
 static irqreturn_t ipi_nmi_handler(int irq, void *data)
 {
 	unsigned int cpu = smp_processor_id();
 
-	ipi_kgdb_nmicallback(cpu, get_irq_regs());
+	if (nmi_cpu_backtrace(get_irq_regs()))
+		goto out;
 
+	ipi_kgdb_nmicallback(cpu, get_irq_regs());
+out:
 	return IRQ_HANDLED;
 }