diff mbox series

[v2,3/4] arm64: Make debug exception handlers visible from RCU

Message ID 156378173770.12011.3832608237079432765.stgit@devnote2 (mailing list archive)
State New, archived
Headers show
Series arm64: kprobes: Fix some bugs in arm64 kprobes | expand

Commit Message

Masami Hiramatsu (Google) July 22, 2019, 7:48 a.m. UTC
Make debug exceptions visible from RCU so that synchronize_rcu()
correctly track the debug exception handler.

This also introduces sanity checks for user-mode exceptions as same
as x86's ist_enter()/ist_exit().

The debug exception can interrupt in idle task. For example, it warns
if we put a kprobe on a function called from idle task as below.
The warning message showed that the rcu_read_lock() caused this
problem. But actually, this means the RCU is lost the context which
is already in NMI/IRQ.

  /sys/kernel/debug/tracing # echo p default_idle_call >> kprobe_events
  /sys/kernel/debug/tracing # echo 1 > events/kprobes/enable
  /sys/kernel/debug/tracing # [  135.122237]
  [  135.125035] =============================
  [  135.125310] WARNING: suspicious RCU usage
  [  135.125581] 5.2.0-08445-g9187c508bdc7 #20 Not tainted
  [  135.125904] -----------------------------
  [  135.126205] include/linux/rcupdate.h:594 rcu_read_lock() used illegally while idle!
  [  135.126839]
  [  135.126839] other info that might help us debug this:
  [  135.126839]
  [  135.127410]
  [  135.127410] RCU used illegally from idle CPU!
  [  135.127410] rcu_scheduler_active = 2, debug_locks = 1
  [  135.128114] RCU used illegally from extended quiescent state!
  [  135.128555] 1 lock held by swapper/0/0:
  [  135.128944]  #0: (____ptrval____) (rcu_read_lock){....}, at: call_break_hook+0x0/0x178
  [  135.130499]
  [  135.130499] stack backtrace:
  [  135.131192] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-08445-g9187c508bdc7 #20
  [  135.131841] Hardware name: linux,dummy-virt (DT)
  [  135.132224] Call trace:
  [  135.132491]  dump_backtrace+0x0/0x140
  [  135.132806]  show_stack+0x24/0x30
  [  135.133133]  dump_stack+0xc4/0x10c
  [  135.133726]  lockdep_rcu_suspicious+0xf8/0x108
  [  135.134171]  call_break_hook+0x170/0x178
  [  135.134486]  brk_handler+0x28/0x68
  [  135.134792]  do_debug_exception+0x90/0x150
  [  135.135051]  el1_dbg+0x18/0x8c
  [  135.135260]  default_idle_call+0x0/0x44
  [  135.135516]  cpu_startup_entry+0x2c/0x30
  [  135.135815]  rest_init+0x1b0/0x280
  [  135.136044]  arch_call_rest_init+0x14/0x1c
  [  135.136305]  start_kernel+0x4d4/0x500
  [  135.136597]

So make debug exception visible to RCU can fix this warning.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 arch/arm64/mm/fault.c |   40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

Comments

Paul E. McKenney July 22, 2019, 12:07 p.m. UTC | #1
On Mon, Jul 22, 2019 at 04:48:58PM +0900, Masami Hiramatsu wrote:
> Make debug exceptions visible from RCU so that synchronize_rcu()
> correctly track the debug exception handler.
> 
> This also introduces sanity checks for user-mode exceptions as same
> as x86's ist_enter()/ist_exit().
> 
> The debug exception can interrupt in idle task. For example, it warns
> if we put a kprobe on a function called from idle task as below.
> The warning message showed that the rcu_read_lock() caused this
> problem. But actually, this means the RCU is lost the context which
> is already in NMI/IRQ.
> 
>   /sys/kernel/debug/tracing # echo p default_idle_call >> kprobe_events
>   /sys/kernel/debug/tracing # echo 1 > events/kprobes/enable
>   /sys/kernel/debug/tracing # [  135.122237]
>   [  135.125035] =============================
>   [  135.125310] WARNING: suspicious RCU usage
>   [  135.125581] 5.2.0-08445-g9187c508bdc7 #20 Not tainted
>   [  135.125904] -----------------------------
>   [  135.126205] include/linux/rcupdate.h:594 rcu_read_lock() used illegally while idle!
>   [  135.126839]
>   [  135.126839] other info that might help us debug this:
>   [  135.126839]
>   [  135.127410]
>   [  135.127410] RCU used illegally from idle CPU!
>   [  135.127410] rcu_scheduler_active = 2, debug_locks = 1
>   [  135.128114] RCU used illegally from extended quiescent state!
>   [  135.128555] 1 lock held by swapper/0/0:
>   [  135.128944]  #0: (____ptrval____) (rcu_read_lock){....}, at: call_break_hook+0x0/0x178
>   [  135.130499]
>   [  135.130499] stack backtrace:
>   [  135.131192] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-08445-g9187c508bdc7 #20
>   [  135.131841] Hardware name: linux,dummy-virt (DT)
>   [  135.132224] Call trace:
>   [  135.132491]  dump_backtrace+0x0/0x140
>   [  135.132806]  show_stack+0x24/0x30
>   [  135.133133]  dump_stack+0xc4/0x10c
>   [  135.133726]  lockdep_rcu_suspicious+0xf8/0x108
>   [  135.134171]  call_break_hook+0x170/0x178
>   [  135.134486]  brk_handler+0x28/0x68
>   [  135.134792]  do_debug_exception+0x90/0x150
>   [  135.135051]  el1_dbg+0x18/0x8c
>   [  135.135260]  default_idle_call+0x0/0x44
>   [  135.135516]  cpu_startup_entry+0x2c/0x30
>   [  135.135815]  rest_init+0x1b0/0x280
>   [  135.136044]  arch_call_rest_init+0x14/0x1c
>   [  135.136305]  start_kernel+0x4d4/0x500
>   [  135.136597]
> 
> So make debug exception visible to RCU can fix this warning.
> 
> Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
> Cc: Paul E. McKenney <paulmck@linux.ibm.com>

From an RCU viewpoint:

Acked-by: Paul E. McKenney <paulmck@linux.ibm.com>

> Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
> ---
>  arch/arm64/mm/fault.c |   40 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 40 insertions(+)
> 
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 9568c116ac7f..a6b244240db6 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -777,6 +777,42 @@ void __init hook_debug_fault_code(int nr,
>  	debug_fault_info[nr].name	= name;
>  }
>  
> +/*
> + * In debug exception context, we explicitly disable preemption.
> + * This serves two purposes: it makes it much less likely that we would
> + * accidentally schedule in exception context and it will force a warning
> + * if we somehow manage to schedule by accident.
> + */
> +static void debug_exception_enter(struct pt_regs *regs)
> +{
> +	if (user_mode(regs)) {
> +		RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
> +	} else {
> +		/*
> +		 * We might have interrupted pretty much anything.  In
> +		 * fact, if we're a debug exception, we can even interrupt
> +		 * NMI processing.  We don't want in_nmi() to return true,
> +		 * but we need to notify RCU.
> +		 */
> +		rcu_nmi_enter();
> +	}
> +
> +	preempt_disable();
> +
> +	/* This code is a bit fragile.  Test it. */
> +	RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
> +}
> +NOKPROBE_SYMBOL(debug_exception_enter);
> +
> +static void debug_exception_exit(struct pt_regs *regs)
> +{
> +	preempt_enable_no_resched();
> +
> +	if (!user_mode(regs))
> +		rcu_nmi_exit();
> +}
> +NOKPROBE_SYMBOL(debug_exception_exit);
> +
>  #ifdef CONFIG_ARM64_ERRATUM_1463225
>  DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
>  
> @@ -824,6 +860,8 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
>  	if (interrupts_enabled(regs))
>  		trace_hardirqs_off();
>  
> +	debug_exception_enter(regs);
> +
>  	if (user_mode(regs) && !is_ttbr0_addr(pc))
>  		arm64_apply_bp_hardening();
>  
> @@ -832,6 +870,8 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
>  				 inf->sig, inf->code, (void __user *)pc, esr);
>  	}
>  
> +	debug_exception_exit(regs);
> +
>  	if (interrupts_enabled(regs))
>  		trace_hardirqs_on();
>  }
>
James Morse July 23, 2019, 5:07 p.m. UTC | #2
Hi,

On 22/07/2019 08:48, Masami Hiramatsu wrote:
> Make debug exceptions visible from RCU so that synchronize_rcu()
> correctly track the debug exception handler.
> 
> This also introduces sanity checks for user-mode exceptions as same
> as x86's ist_enter()/ist_exit().
> 
> The debug exception can interrupt in idle task. For example, it warns
> if we put a kprobe on a function called from idle task as below.
> The warning message showed that the rcu_read_lock() caused this
> problem. But actually, this means the RCU is lost the context which
> is already in NMI/IRQ.

> So make debug exception visible to RCU can fix this warning.

> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 9568c116ac7f..a6b244240db6 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -777,6 +777,42 @@ void __init hook_debug_fault_code(int nr,
>  	debug_fault_info[nr].name	= name;
>  }
>  
> +/*
> + * In debug exception context, we explicitly disable preemption.
> + * This serves two purposes: it makes it much less likely that we would
> + * accidentally schedule in exception context and it will force a warning
> + * if we somehow manage to schedule by accident.
> + */
> +static void debug_exception_enter(struct pt_regs *regs)
> +{
> +	if (user_mode(regs)) {
> +		RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");

Would moving entry.S's context_tracking_user_exit() call to be before do_debug_exception()
also fix this?

I don't know the reason its done 'after' debug exception handling. Its always been like
this: commit 6c81fe7925cc4c42 ("arm64: enable context tracking").


> +	} else {
> +		/*
> +		 * We might have interrupted pretty much anything.  In
> +		 * fact, if we're a debug exception, we can even interrupt
> +		 * NMI processing.

> +		 * We don't want in_nmi() to return true,
> +		 * but we need to notify RCU.

How come? If you interrupted an SError or pseudo-nmi, it already is. Those paths should
all be painted no-kprobe, but I'm sure there are gaps. The hw-breakpoints can almost
certainly hook them.


> +		 */
> +		rcu_nmi_enter();

Can we interrupt printk()? Do we need printk_nmi_enter()? ... What about ftrace?

Because SError and pseudo-nmi can interrupt interrupt-masked code, we describe them as
NMI. The only difference here is these exceptions are synchronous.


I suspect we should make these debug exceptions nmi for EL1. We can then use this for the
kprobe-re-entrance stuff so the pre/post hooks don't get run if they interrupted something
also described as NMI.


> +	}
> +
> +	preempt_disable();
> +
> +	/* This code is a bit fragile.  Test it. */
> +	RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
> +}
> +NOKPROBE_SYMBOL(debug_exception_enter);


Thanks,

James
Masami Hiramatsu (Google) July 24, 2019, 11:47 a.m. UTC | #3
On Tue, 23 Jul 2019 18:07:56 +0100
James Morse <james.morse@arm.com> wrote:

> Hi,
> 
> On 22/07/2019 08:48, Masami Hiramatsu wrote:
> > Make debug exceptions visible from RCU so that synchronize_rcu()
> > correctly track the debug exception handler.
> > 
> > This also introduces sanity checks for user-mode exceptions as same
> > as x86's ist_enter()/ist_exit().
> > 
> > The debug exception can interrupt in idle task. For example, it warns
> > if we put a kprobe on a function called from idle task as below.
> > The warning message showed that the rcu_read_lock() caused this
> > problem. But actually, this means the RCU is lost the context which
> > is already in NMI/IRQ.
> 
> > So make debug exception visible to RCU can fix this warning.
> 
> > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > index 9568c116ac7f..a6b244240db6 100644
> > --- a/arch/arm64/mm/fault.c
> > +++ b/arch/arm64/mm/fault.c
> > @@ -777,6 +777,42 @@ void __init hook_debug_fault_code(int nr,
> >  	debug_fault_info[nr].name	= name;
> >  }
> >  
> > +/*
> > + * In debug exception context, we explicitly disable preemption.
> > + * This serves two purposes: it makes it much less likely that we would
> > + * accidentally schedule in exception context and it will force a warning
> > + * if we somehow manage to schedule by accident.
> > + */
> > +static void debug_exception_enter(struct pt_regs *regs)
> > +{
> > +	if (user_mode(regs)) {
> > +		RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
> 
> Would moving entry.S's context_tracking_user_exit() call to be before do_debug_exception()
> also fix this?

It sounds like treating only user context, correct?
This part is just adding assertion, not fixing the problem which Naresh reported.

> 
> I don't know the reason its done 'after' debug exception handling. Its always been like
> this: commit 6c81fe7925cc4c42 ("arm64: enable context tracking").
> 
> 
> > +	} else {
> > +		/*
> > +		 * We might have interrupted pretty much anything.  In
> > +		 * fact, if we're a debug exception, we can even interrupt
> > +		 * NMI processing.
> 
> > +		 * We don't want in_nmi() to return true,
> > +		 * but we need to notify RCU.
> 
> How come? If you interrupted an SError or pseudo-nmi, it already is. Those paths should
> all be painted no-kprobe, but I'm sure there are gaps. The hw-breakpoints can almost
> certainly hook them.

I think that sentense means "we don't want that this code makes in_nmi() to return true"
So, if the breakpoint interrupts pNMI/SError context, it is OK that in_nmi() returns true.

> 
> 
> > +		 */
> > +		rcu_nmi_enter();
> 
> Can we interrupt printk()? Do we need printk_nmi_enter()? ... What about ftrace?

Good point! As far as I know, we don't use it because ftrace doesn't use printk.
But indeed, kprobes user can use printk and they have to call printk_nmi_enter()/exit(),
that must be commented in the documentation. Anyway, basically it is user's choice.

> 
> Because SError and pseudo-nmi can interrupt interrupt-masked code, we describe them as
> NMI. The only difference here is these exceptions are synchronous.
> 
> 
> I suspect we should make these debug exceptions nmi for EL1. We can then use this for the
> kprobe-re-entrance stuff so the pre/post hooks don't get run if they interrupted something
> also described as NMI.

I'm not sure how it can prevent... anyway because we have to run a single-stepping for
recovery, and kprobe already check the reentered kprobes and skip user-handlers in
such case.

Thank you,

> 
> 
> > +	}
> > +
> > +	preempt_disable();
> > +
> > +	/* This code is a bit fragile.  Test it. */
> > +	RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
> > +}
> > +NOKPROBE_SYMBOL(debug_exception_enter);
> 
> 
> Thanks,
> 
> James
diff mbox series

Patch

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9568c116ac7f..a6b244240db6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -777,6 +777,42 @@  void __init hook_debug_fault_code(int nr,
 	debug_fault_info[nr].name	= name;
 }
 
+/*
+ * In debug exception context, we explicitly disable preemption.
+ * This serves two purposes: it makes it much less likely that we would
+ * accidentally schedule in exception context and it will force a warning
+ * if we somehow manage to schedule by accident.
+ */
+static void debug_exception_enter(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+	} else {
+		/*
+		 * We might have interrupted pretty much anything.  In
+		 * fact, if we're a debug exception, we can even interrupt
+		 * NMI processing.  We don't want in_nmi() to return true,
+		 * but we need to notify RCU.
+		 */
+		rcu_nmi_enter();
+	}
+
+	preempt_disable();
+
+	/* This code is a bit fragile.  Test it. */
+	RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
+}
+NOKPROBE_SYMBOL(debug_exception_enter);
+
+static void debug_exception_exit(struct pt_regs *regs)
+{
+	preempt_enable_no_resched();
+
+	if (!user_mode(regs))
+		rcu_nmi_exit();
+}
+NOKPROBE_SYMBOL(debug_exception_exit);
+
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
@@ -824,6 +860,8 @@  asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
 	if (interrupts_enabled(regs))
 		trace_hardirqs_off();
 
+	debug_exception_enter(regs);
+
 	if (user_mode(regs) && !is_ttbr0_addr(pc))
 		arm64_apply_bp_hardening();
 
@@ -832,6 +870,8 @@  asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
 				 inf->sig, inf->code, (void __user *)pc, esr);
 	}
 
+	debug_exception_exit(regs);
+
 	if (interrupts_enabled(regs))
 		trace_hardirqs_on();
 }