diff mbox series

[3/6] ftrace/x86: Warn and ignore graph tracing when RCU is disabled

Message ID 20230123205515.059999893@infradead.org (mailing list archive)
State New, archived
Headers show
Series A few cpuidle vs rcu fixes | expand

Commit Message

Peter Zijlstra Jan. 23, 2023, 8:50 p.m. UTC
All RCU disabled code should be noinstr and hence we should never get
here -- when we do, WARN about it and make sure to not actually do
tracing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/ftrace.c |    3 +++
 1 file changed, 3 insertions(+)

Comments

Steven Rostedt Jan. 23, 2023, 9:53 p.m. UTC | #1
On Mon, 23 Jan 2023 21:50:12 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> All RCU disabled code should be noinstr and hence we should never get
> here -- when we do, WARN about it and make sure to not actually do
> tracing.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/kernel/ftrace.c |    3 +++
>  1 file changed, 3 insertions(+)
> 
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -646,6 +646,9 @@ void prepare_ftrace_return(unsigned long
>  	if (unlikely(atomic_read(&current->tracing_graph_pause)))
>  		return;
>  
> +	if (WARN_ONCE(!rcu_is_watching(), "RCU not on for: %pS\n", (void *)ip))
> +		return;
> +

Please add this to after recursion trylock below. Although WARN_ONCE()
should not not have recursion issues, as function tracing can do weird
things, I rather be safe than sorry, and not have the system triple boot
due to some path that might get added in the future.

If rcu_is_watching() is false, it will still get by the below recursion
check and warn. That is, the below check should be done before this
function calls any other function.

>  	bit = ftrace_test_recursion_trylock(ip, *parent);
>  	if (bit < 0)
>  		return;
> 

-- Steve
Steven Rostedt Jan. 23, 2023, 10:07 p.m. UTC | #2
On Mon, 23 Jan 2023 16:53:04 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Mon, 23 Jan 2023 21:50:12 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > All RCU disabled code should be noinstr and hence we should never get
> > here -- when we do, WARN about it and make sure to not actually do
> > tracing.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  arch/x86/kernel/ftrace.c |    3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > --- a/arch/x86/kernel/ftrace.c
> > +++ b/arch/x86/kernel/ftrace.c
> > @@ -646,6 +646,9 @@ void prepare_ftrace_return(unsigned long
> >  	if (unlikely(atomic_read(&current->tracing_graph_pause)))
> >  		return;
> >  
> > +	if (WARN_ONCE(!rcu_is_watching(), "RCU not on for: %pS\n", (void *)ip))
> > +		return;
> > +  
> 
> Please add this to after recursion trylock below. Although WARN_ONCE()
> should not not have recursion issues, as function tracing can do weird
> things, I rather be safe than sorry, and not have the system triple boot
> due to some path that might get added in the future.
> 
> If rcu_is_watching() is false, it will still get by the below recursion
> check and warn. That is, the below check should be done before this
> function calls any other function.
> 
> >  	bit = ftrace_test_recursion_trylock(ip, *parent);
> >  	if (bit < 0)
> >  		return;
> >   
> 

Actually, perhaps we can just add this, and all you need to do is create
and set CONFIG_NO_RCU_TRACING (or some other name).

This should cover all ftrace locations. (Uncompiled).

-- Steve

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index c303f7a114e9..10ee3fbb9113 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -135,6 +135,22 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
 # define do_ftrace_record_recursion(ip, pip)	do { } while (0)
 #endif
 
+#ifdef CONFIG_NO_RCU_TRACING
+# define trace_warn_on_no_rcu(ip)					\
+	({								\
+		bool __ret = false;					\
+		if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \
+			trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \
+			__ret = WARN_ONCE(!rcu_is_watching(),		\
+					  "RCU not on for: %pS\n", (void *)ip); \
+			trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \
+		}							\
+		__ret;							\
+	})
+#else
+# define trace_warn_on_no_rcu(ip)	false
+#endif
+
 /*
  * Preemption is promised to be disabled when return bit >= 0.
  */
@@ -144,6 +160,9 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign
 	unsigned int val = READ_ONCE(current->trace_recursion);
 	int bit;
 
+	if (trace_warn_on_no_rcu(ip))
+		return -1;
+
 	bit = trace_get_context_bit() + start;
 	if (unlikely(val & (1 << bit))) {
 		/*
Peter Zijlstra Jan. 24, 2023, 2:44 p.m. UTC | #3
On Mon, Jan 23, 2023 at 05:07:53PM -0500, Steven Rostedt wrote:

> Actually, perhaps we can just add this, and all you need to do is create
> and set CONFIG_NO_RCU_TRACING (or some other name).

Elsewhere I've used CONFIG_ARCH_WANTS_NO_INSTR for this.

Anyway, I took it for a spin and it .... doesn't seems to do the job.

With my patch the first splat is

  "RCU not on for: cpuidle_poll_time+0x0/0x70"

While with yours I seems to get the endless:

  "WARNING: suspicious RCU usage"

thing. Let me see if I can figure out where it goes side-ways.
Mark Rutland Jan. 24, 2023, 5:12 p.m. UTC | #4
On Tue, Jan 24, 2023 at 03:44:35PM +0100, Peter Zijlstra wrote:
> On Mon, Jan 23, 2023 at 05:07:53PM -0500, Steven Rostedt wrote:
> 
> > Actually, perhaps we can just add this, and all you need to do is create
> > and set CONFIG_NO_RCU_TRACING (or some other name).
> 
> Elsewhere I've used CONFIG_ARCH_WANTS_NO_INSTR for this.

Yes please; if we use CONFIG_ARCH_WANTS_NO_INSTR then arm64 will get this "for
free" once we add the missing checks (which I assume we need) in our ftrace_prepare_return().

> Anyway, I took it for a spin and it .... doesn't seems to do the job.
> 
> With my patch the first splat is
> 
>   "RCU not on for: cpuidle_poll_time+0x0/0x70"
> 
> While with yours I seems to get the endless:
> 
>   "WARNING: suspicious RCU usage"
> 
> thing. Let me see if I can figure out where it goes side-ways.

Hmmm... for WARN_ONCE() don't we need to wake RCU first also? I thought we
needed that at least for the printk machinery?

Thanks,
Mark.
Peter Zijlstra Jan. 25, 2023, 9:37 a.m. UTC | #5
On Tue, Jan 24, 2023 at 05:12:14PM +0000, Mark Rutland wrote:
> On Tue, Jan 24, 2023 at 03:44:35PM +0100, Peter Zijlstra wrote:
> > On Mon, Jan 23, 2023 at 05:07:53PM -0500, Steven Rostedt wrote:
> > 
> > > Actually, perhaps we can just add this, and all you need to do is create
> > > and set CONFIG_NO_RCU_TRACING (or some other name).
> > 
> > Elsewhere I've used CONFIG_ARCH_WANTS_NO_INSTR for this.
> 
> Yes please; if we use CONFIG_ARCH_WANTS_NO_INSTR then arm64 will get this "for
> free" once we add the missing checks (which I assume we need) in our ftrace_prepare_return().

Aye.

> > Anyway, I took it for a spin and it .... doesn't seems to do the job.
> > 
> > With my patch the first splat is
> > 
> >   "RCU not on for: cpuidle_poll_time+0x0/0x70"
> > 
> > While with yours I seems to get the endless:
> > 
> >   "WARNING: suspicious RCU usage"
> > 
> > thing. Let me see if I can figure out where it goes side-ways.
> 
> Hmmm... for WARN_ONCE() don't we need to wake RCU first also? I thought we
> needed that at least for the printk machinery?

Yeah, I'm currently running with a hacked up printk that redirects
everything into early_printk() but it still trips up lots.

I was just about to go stick on RCU magic into WARN itself, this isn't
going to be the only site triggering this fail-cascade.
Peter Zijlstra Jan. 25, 2023, 10:47 a.m. UTC | #6
On Tue, Jan 24, 2023 at 05:12:14PM +0000, Mark Rutland wrote:
> On Tue, Jan 24, 2023 at 03:44:35PM +0100, Peter Zijlstra wrote:
> > On Mon, Jan 23, 2023 at 05:07:53PM -0500, Steven Rostedt wrote:
> > 
> > > Actually, perhaps we can just add this, and all you need to do is create
> > > and set CONFIG_NO_RCU_TRACING (or some other name).
> > 
> > Elsewhere I've used CONFIG_ARCH_WANTS_NO_INSTR for this.
> 
> Yes please; if we use CONFIG_ARCH_WANTS_NO_INSTR then arm64 will get this "for
> free" once we add the missing checks (which I assume we need) in our ftrace_prepare_return().
> 
> > Anyway, I took it for a spin and it .... doesn't seems to do the job.
> > 
> > With my patch the first splat is
> > 
> >   "RCU not on for: cpuidle_poll_time+0x0/0x70"
> > 
> > While with yours I seems to get the endless:
> > 
> >   "WARNING: suspicious RCU usage"
> > 
> > thing. Let me see if I can figure out where it goes side-ways.
> 
> Hmmm... for WARN_ONCE() don't we need to wake RCU first also? I thought we
> needed that at least for the printk machinery?

OK, the below seems to work nice for me -- although I'm still on a
hacked up printk, but the recursive RCU not watching fail seems to be
tamed.

Ofc. Paul might have an opinion on this glorious bodge ;-)

---

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index c303f7a114e9..d48cd92d2364 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -135,6 +135,21 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
 # define do_ftrace_record_recursion(ip, pip)	do { } while (0)
 #endif
 
+#ifdef CONFIG_ARCH_WANTS_NO_INSTR
+# define trace_warn_on_no_rcu(ip)					\
+	({								\
+		bool __ret = !rcu_is_watching();			\
+		if (__ret && !trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \
+			trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \
+			WARN_ONCE(true, "RCU not on for: %pS\n", (void *)ip); \
+			trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \
+		}							\
+		__ret;							\
+	})
+#else
+# define trace_warn_on_no_rcu(ip)	false
+#endif
+
 /*
  * Preemption is promised to be disabled when return bit >= 0.
  */
@@ -144,6 +159,9 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign
 	unsigned int val = READ_ONCE(current->trace_recursion);
 	int bit;
 
+	if (trace_warn_on_no_rcu(ip))
+		return -1;
+
 	bit = trace_get_context_bit() + start;
 	if (unlikely(val & (1 << bit))) {
 		/*
diff --git a/lib/bug.c b/lib/bug.c
index c223a2575b72..0a10643ea168 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -47,6 +47,7 @@
 #include <linux/sched.h>
 #include <linux/rculist.h>
 #include <linux/ftrace.h>
+#include <linux/context_tracking.h>
 
 extern struct bug_entry __start___bug_table[], __stop___bug_table[];
 
@@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long bugaddr)
 	return module_find_bug(bugaddr);
 }
 
-enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
 {
 	struct bug_entry *bug;
 	const char *file;
@@ -209,6 +210,30 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 	return BUG_TRAP_TYPE_BUG;
 }
 
+enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+{
+	enum bug_trap_type ret;
+	bool rcu = false;
+
+#ifdef CONFIG_CONTEXT_TRACKING_IDLE
+	/*
+	 * Horrible hack to shut up recursive RCU isn't watching fail since
+	 * lots of the actual reporting also relies on RCU.
+	 */
+	if (!rcu_is_watching()) {
+		rcu = true;
+		ct_state_inc(RCU_DYNTICKS_IDX);
+	}
+#endif
+
+	ret = __report_bug(bugaddr, regs);
+
+	if (rcu)
+		ct_state_inc(RCU_DYNTICKS_IDX);
+
+	return ret;
+}
+
 static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
 {
 	struct bug_entry *bug;
Mark Rutland Jan. 25, 2023, 11:32 a.m. UTC | #7
On Wed, Jan 25, 2023 at 11:47:44AM +0100, Peter Zijlstra wrote:
> On Tue, Jan 24, 2023 at 05:12:14PM +0000, Mark Rutland wrote:
> > On Tue, Jan 24, 2023 at 03:44:35PM +0100, Peter Zijlstra wrote:
> > > On Mon, Jan 23, 2023 at 05:07:53PM -0500, Steven Rostedt wrote:
> > > 
> > > > Actually, perhaps we can just add this, and all you need to do is create
> > > > and set CONFIG_NO_RCU_TRACING (or some other name).
> > > 
> > > Elsewhere I've used CONFIG_ARCH_WANTS_NO_INSTR for this.
> > 
> > Yes please; if we use CONFIG_ARCH_WANTS_NO_INSTR then arm64 will get this "for
> > free" once we add the missing checks (which I assume we need) in our ftrace_prepare_return().
> > 
> > > Anyway, I took it for a spin and it .... doesn't seems to do the job.
> > > 
> > > With my patch the first splat is
> > > 
> > >   "RCU not on for: cpuidle_poll_time+0x0/0x70"
> > > 
> > > While with yours I seems to get the endless:
> > > 
> > >   "WARNING: suspicious RCU usage"
> > > 
> > > thing. Let me see if I can figure out where it goes side-ways.
> > 
> > Hmmm... for WARN_ONCE() don't we need to wake RCU first also? I thought we
> > needed that at least for the printk machinery?
> 
> OK, the below seems to work nice for me -- although I'm still on a
> hacked up printk, but the recursive RCU not watching fail seems to be
> tamed.

FWIW, I gave that a spin on arm64 with the ftrace selftests, and I see no
splats, so it looks good on that front.

Currently arm64's BUG/WARN exception handling does the usual
lockdep/rcu/whatever stuff before getting to report_bug(), so that bit should
be redundant (and any WARN() or BUG() early in the entry code is likely to lead
to a stack overflow and kill the kernel), but it shouldn't be harmful.

> Ofc. Paul might have an opinion on this glorious bodge ;-)

I'll leave that to Paul. ;)

Thanks,
Mark.

> 
> ---
> 
> diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
> index c303f7a114e9..d48cd92d2364 100644
> --- a/include/linux/trace_recursion.h
> +++ b/include/linux/trace_recursion.h
> @@ -135,6 +135,21 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
>  # define do_ftrace_record_recursion(ip, pip)	do { } while (0)
>  #endif
>  
> +#ifdef CONFIG_ARCH_WANTS_NO_INSTR
> +# define trace_warn_on_no_rcu(ip)					\
> +	({								\
> +		bool __ret = !rcu_is_watching();			\
> +		if (__ret && !trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \
> +			trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \
> +			WARN_ONCE(true, "RCU not on for: %pS\n", (void *)ip); \
> +			trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \
> +		}							\
> +		__ret;							\
> +	})
> +#else
> +# define trace_warn_on_no_rcu(ip)	false
> +#endif
> +
>  /*
>   * Preemption is promised to be disabled when return bit >= 0.
>   */
> @@ -144,6 +159,9 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign
>  	unsigned int val = READ_ONCE(current->trace_recursion);
>  	int bit;
>  
> +	if (trace_warn_on_no_rcu(ip))
> +		return -1;
> +
>  	bit = trace_get_context_bit() + start;
>  	if (unlikely(val & (1 << bit))) {
>  		/*
> diff --git a/lib/bug.c b/lib/bug.c
> index c223a2575b72..0a10643ea168 100644
> --- a/lib/bug.c
> +++ b/lib/bug.c
> @@ -47,6 +47,7 @@
>  #include <linux/sched.h>
>  #include <linux/rculist.h>
>  #include <linux/ftrace.h>
> +#include <linux/context_tracking.h>
>  
>  extern struct bug_entry __start___bug_table[], __stop___bug_table[];
>  
> @@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long bugaddr)
>  	return module_find_bug(bugaddr);
>  }
>  
> -enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
>  {
>  	struct bug_entry *bug;
>  	const char *file;
> @@ -209,6 +210,30 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
>  	return BUG_TRAP_TYPE_BUG;
>  }
>  
> +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +{
> +	enum bug_trap_type ret;
> +	bool rcu = false;
> +
> +#ifdef CONFIG_CONTEXT_TRACKING_IDLE
> +	/*
> +	 * Horrible hack to shut up recursive RCU isn't watching fail since
> +	 * lots of the actual reporting also relies on RCU.
> +	 */
> +	if (!rcu_is_watching()) {
> +		rcu = true;
> +		ct_state_inc(RCU_DYNTICKS_IDX);
> +	}
> +#endif
> +
> +	ret = __report_bug(bugaddr, regs);
> +
> +	if (rcu)
> +		ct_state_inc(RCU_DYNTICKS_IDX);
> +
> +	return ret;
> +}
> +
>  static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
>  {
>  	struct bug_entry *bug;
Paul E. McKenney Jan. 25, 2023, 6:46 p.m. UTC | #8
On Wed, Jan 25, 2023 at 11:47:44AM +0100, Peter Zijlstra wrote:
> On Tue, Jan 24, 2023 at 05:12:14PM +0000, Mark Rutland wrote:
> > On Tue, Jan 24, 2023 at 03:44:35PM +0100, Peter Zijlstra wrote:
> > > On Mon, Jan 23, 2023 at 05:07:53PM -0500, Steven Rostedt wrote:
> > > 
> > > > Actually, perhaps we can just add this, and all you need to do is create
> > > > and set CONFIG_NO_RCU_TRACING (or some other name).
> > > 
> > > Elsewhere I've used CONFIG_ARCH_WANTS_NO_INSTR for this.
> > 
> > Yes please; if we use CONFIG_ARCH_WANTS_NO_INSTR then arm64 will get this "for
> > free" once we add the missing checks (which I assume we need) in our ftrace_prepare_return().
> > 
> > > Anyway, I took it for a spin and it .... doesn't seems to do the job.
> > > 
> > > With my patch the first splat is
> > > 
> > >   "RCU not on for: cpuidle_poll_time+0x0/0x70"
> > > 
> > > While with yours I seems to get the endless:
> > > 
> > >   "WARNING: suspicious RCU usage"
> > > 
> > > thing. Let me see if I can figure out where it goes side-ways.
> > 
> > Hmmm... for WARN_ONCE() don't we need to wake RCU first also? I thought we
> > needed that at least for the printk machinery?
> 
> OK, the below seems to work nice for me -- although I'm still on a
> hacked up printk, but the recursive RCU not watching fail seems to be
> tamed.
> 
> Ofc. Paul might have an opinion on this glorious bodge ;-)

For some definition of the word "glorious", to be sure.  ;-)

Am I correct that you have two things happening here?  (1) Preventing
trace recursion and (2) forcing RCU to pay attention when needed.

I cannot resist pointing out that you have re-invented RCU_NONIDLE(),
though avoiding much of the overhead when not needed.  ;-)

I would have objections if this ever leaks out onto a non-error code path.
There are things that need doing when RCU starts and stops watching,
and this approach omits those things.  Which again is OK in this case,
where this code is only ever executed when something is already broken,
but definitely *not* OK when things are not already broken.

							Thanx, Paul

> ---
> 
> diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
> index c303f7a114e9..d48cd92d2364 100644
> --- a/include/linux/trace_recursion.h
> +++ b/include/linux/trace_recursion.h
> @@ -135,6 +135,21 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
>  # define do_ftrace_record_recursion(ip, pip)	do { } while (0)
>  #endif
>  
> +#ifdef CONFIG_ARCH_WANTS_NO_INSTR
> +# define trace_warn_on_no_rcu(ip)					\
> +	({								\
> +		bool __ret = !rcu_is_watching();			\
> +		if (__ret && !trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \
> +			trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \
> +			WARN_ONCE(true, "RCU not on for: %pS\n", (void *)ip); \
> +			trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \
> +		}							\
> +		__ret;							\
> +	})
> +#else
> +# define trace_warn_on_no_rcu(ip)	false
> +#endif
> +
>  /*
>   * Preemption is promised to be disabled when return bit >= 0.
>   */
> @@ -144,6 +159,9 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign
>  	unsigned int val = READ_ONCE(current->trace_recursion);
>  	int bit;
>  
> +	if (trace_warn_on_no_rcu(ip))
> +		return -1;
> +
>  	bit = trace_get_context_bit() + start;
>  	if (unlikely(val & (1 << bit))) {
>  		/*
> diff --git a/lib/bug.c b/lib/bug.c
> index c223a2575b72..0a10643ea168 100644
> --- a/lib/bug.c
> +++ b/lib/bug.c
> @@ -47,6 +47,7 @@
>  #include <linux/sched.h>
>  #include <linux/rculist.h>
>  #include <linux/ftrace.h>
> +#include <linux/context_tracking.h>
>  
>  extern struct bug_entry __start___bug_table[], __stop___bug_table[];
>  
> @@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long bugaddr)
>  	return module_find_bug(bugaddr);
>  }
>  
> -enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
>  {
>  	struct bug_entry *bug;
>  	const char *file;
> @@ -209,6 +210,30 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
>  	return BUG_TRAP_TYPE_BUG;
>  }
>  
> +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +{
> +	enum bug_trap_type ret;
> +	bool rcu = false;
> +
> +#ifdef CONFIG_CONTEXT_TRACKING_IDLE
> +	/*
> +	 * Horrible hack to shut up recursive RCU isn't watching fail since
> +	 * lots of the actual reporting also relies on RCU.
> +	 */
> +	if (!rcu_is_watching()) {
> +		rcu = true;
> +		ct_state_inc(RCU_DYNTICKS_IDX);
> +	}
> +#endif
> +
> +	ret = __report_bug(bugaddr, regs);
> +
> +	if (rcu)
> +		ct_state_inc(RCU_DYNTICKS_IDX);
> +
> +	return ret;
> +}
> +
>  static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
>  {
>  	struct bug_entry *bug;
Peter Zijlstra Jan. 26, 2023, 9:28 a.m. UTC | #9
On Wed, Jan 25, 2023 at 10:46:58AM -0800, Paul E. McKenney wrote:

> > Ofc. Paul might have an opinion on this glorious bodge ;-)
> 
> For some definition of the word "glorious", to be sure.  ;-)
> 
> Am I correct that you have two things happening here?  (1) Preventing
> trace recursion and (2) forcing RCU to pay attention when needed.

Mostly just (1), we're in an error situation, I'm not too worried about
(2).

> I cannot resist pointing out that you have re-invented RCU_NONIDLE(),
> though avoiding much of the overhead when not needed.  ;-)

Yeah, this was the absolute minimal bodge I could come up with that
shuts up the rcu_derefence warning thing.

> I would have objections if this ever leaks out onto a non-error code path.

Agreed.

> There are things that need doing when RCU starts and stops watching,
> and this approach omits those things.  Which again is OK in this case,
> where this code is only ever executed when something is already broken,
> but definitely *not* OK when things are not already broken.

And agreed.

Current version of the bodge looks like so (will repost the whole series
a little later today).

I managed to tickle the recursion so that it was a test-case for the
stack guard...

With this on, it prints just the one WARN and lives.

---
Subject: bug: Disable rcu_is_watching() during WARN/BUG
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed Jan 25 13:57:49 CET 2023

In order to avoid WARN/BUG from generating nested or even recursive
warnings, force rcu_is_watching() true during
WARN/lockdep_rcu_suspicious().

Notably things like unwinding the stack can trigger rcu_dereference()
warnings, which then triggers more unwinding which then triggers more
warnings etc..

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/context_tracking.h |   27 +++++++++++++++++++++++++++
 kernel/locking/lockdep.c         |    3 +++
 kernel/panic.c                   |    5 +++++
 lib/bug.c                        |   15 ++++++++++++++-
 4 files changed, 49 insertions(+), 1 deletion(-)

--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -130,9 +130,36 @@ static __always_inline unsigned long ct_
 	return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
 }
 
+static __always_inline bool warn_rcu_enter(void)
+{
+	bool ret = false;
+
+	/*
+	 * Horrible hack to shut up recursive RCU isn't watching fail since
+	 * lots of the actual reporting also relies on RCU.
+	 */
+	preempt_disable_notrace();
+	if (rcu_dynticks_curr_cpu_in_eqs()) {
+		ret = true;
+		ct_state_inc(RCU_DYNTICKS_IDX);
+	}
+
+	return ret;
+}
+
+static __always_inline void warn_rcu_exit(bool rcu)
+{
+	if (rcu)
+		ct_state_inc(RCU_DYNTICKS_IDX);
+	preempt_enable_notrace();
+}
+
 #else
 static inline void ct_idle_enter(void) { }
 static inline void ct_idle_exit(void) { }
+
+static __always_inline bool warn_rcu_enter(void) { return false; }
+static __always_inline void warn_rcu_exit(bool rcu) { }
 #endif /* !CONFIG_CONTEXT_TRACKING_IDLE */
 
 #endif
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -55,6 +55,7 @@
 #include <linux/rcupdate.h>
 #include <linux/kprobes.h>
 #include <linux/lockdep.h>
+#include <linux/context_tracking.h>
 
 #include <asm/sections.h>
 
@@ -6555,6 +6556,7 @@ void lockdep_rcu_suspicious(const char *
 {
 	struct task_struct *curr = current;
 	int dl = READ_ONCE(debug_locks);
+	bool rcu = warn_rcu_enter();
 
 	/* Note: the following can be executed concurrently, so be careful. */
 	pr_warn("\n");
@@ -6595,5 +6597,6 @@ void lockdep_rcu_suspicious(const char *
 	lockdep_print_held_locks(curr);
 	pr_warn("\nstack backtrace:\n");
 	dump_stack();
+	warn_rcu_exit(rcu);
 }
 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,6 +34,7 @@
 #include <linux/ratelimit.h>
 #include <linux/debugfs.h>
 #include <linux/sysfs.h>
+#include <linux/context_tracking.h>
 #include <trace/events/error_report.h>
 #include <asm/sections.h>
 
@@ -679,6 +680,7 @@ void __warn(const char *file, int line,
 void warn_slowpath_fmt(const char *file, int line, unsigned taint,
 		       const char *fmt, ...)
 {
+	bool rcu = warn_rcu_enter();
 	struct warn_args args;
 
 	pr_warn(CUT_HERE);
@@ -693,11 +695,13 @@ void warn_slowpath_fmt(const char *file,
 	va_start(args.args, fmt);
 	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
 	va_end(args.args);
+	warn_rcu_exit(rcu);
 }
 EXPORT_SYMBOL(warn_slowpath_fmt);
 #else
 void __warn_printk(const char *fmt, ...)
 {
+	bool rcu = warn_rcu_enter();
 	va_list args;
 
 	pr_warn(CUT_HERE);
@@ -705,6 +709,7 @@ void __warn_printk(const char *fmt, ...)
 	va_start(args, fmt);
 	vprintk(fmt, args);
 	va_end(args);
+	warn_rcu_exit(rcu);
 }
 EXPORT_SYMBOL(__warn_printk);
 #endif
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -47,6 +47,7 @@
 #include <linux/sched.h>
 #include <linux/rculist.h>
 #include <linux/ftrace.h>
+#include <linux/context_tracking.h>
 
 extern struct bug_entry __start___bug_table[], __stop___bug_table[];
 
@@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long
 	return module_find_bug(bugaddr);
 }
 
-enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
 {
 	struct bug_entry *bug;
 	const char *file;
@@ -209,6 +210,18 @@ enum bug_trap_type report_bug(unsigned l
 	return BUG_TRAP_TYPE_BUG;
 }
 
+enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+{
+	enum bug_trap_type ret;
+	bool rcu = false;
+
+	rcu = warn_rcu_enter();
+	ret = __report_bug(bugaddr, regs);
+	warn_rcu_exit(rcu);
+
+	return ret;
+}
+
 static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
 {
 	struct bug_entry *bug;
Paul E. McKenney Jan. 28, 2023, 7:12 p.m. UTC | #10
On Thu, Jan 26, 2023 at 10:28:51AM +0100, Peter Zijlstra wrote:
> On Wed, Jan 25, 2023 at 10:46:58AM -0800, Paul E. McKenney wrote:
> 
> > > Ofc. Paul might have an opinion on this glorious bodge ;-)
> > 
> > For some definition of the word "glorious", to be sure.  ;-)
> > 
> > Am I correct that you have two things happening here?  (1) Preventing
> > trace recursion and (2) forcing RCU to pay attention when needed.
> 
> Mostly just (1), we're in an error situation, I'm not too worried about
> (2).
> 
> > I cannot resist pointing out that you have re-invented RCU_NONIDLE(),
> > though avoiding much of the overhead when not needed.  ;-)
> 
> Yeah, this was the absolute minimal bodge I could come up with that
> shuts up the rcu_derefence warning thing.
> 
> > I would have objections if this ever leaks out onto a non-error code path.
> 
> Agreed.
> 
> > There are things that need doing when RCU starts and stops watching,
> > and this approach omits those things.  Which again is OK in this case,
> > where this code is only ever executed when something is already broken,
> > but definitely *not* OK when things are not already broken.
> 
> And agreed.
> 
> Current version of the bodge looks like so (will repost the whole series
> a little later today).
> 
> I managed to tickle the recursion so that it was a test-case for the
> stack guard...
> 
> With this on, it prints just the one WARN and lives.
> 
> ---
> Subject: bug: Disable rcu_is_watching() during WARN/BUG
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Wed Jan 25 13:57:49 CET 2023
> 
> In order to avoid WARN/BUG from generating nested or even recursive
> warnings, force rcu_is_watching() true during
> WARN/lockdep_rcu_suspicious().
> 
> Notably things like unwinding the stack can trigger rcu_dereference()
> warnings, which then triggers more unwinding which then triggers more
> warnings etc..
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

From an RCU perspective:

Acked-by: Paul E. McKenney <paulmck@kernel.org>

> ---
>  include/linux/context_tracking.h |   27 +++++++++++++++++++++++++++
>  kernel/locking/lockdep.c         |    3 +++
>  kernel/panic.c                   |    5 +++++
>  lib/bug.c                        |   15 ++++++++++++++-
>  4 files changed, 49 insertions(+), 1 deletion(-)
> 
> --- a/include/linux/context_tracking.h
> +++ b/include/linux/context_tracking.h
> @@ -130,9 +130,36 @@ static __always_inline unsigned long ct_
>  	return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
>  }
>  
> +static __always_inline bool warn_rcu_enter(void)
> +{
> +	bool ret = false;
> +
> +	/*
> +	 * Horrible hack to shut up recursive RCU isn't watching fail since
> +	 * lots of the actual reporting also relies on RCU.
> +	 */
> +	preempt_disable_notrace();
> +	if (rcu_dynticks_curr_cpu_in_eqs()) {
> +		ret = true;
> +		ct_state_inc(RCU_DYNTICKS_IDX);
> +	}
> +
> +	return ret;
> +}
> +
> +static __always_inline void warn_rcu_exit(bool rcu)
> +{
> +	if (rcu)
> +		ct_state_inc(RCU_DYNTICKS_IDX);
> +	preempt_enable_notrace();
> +}
> +
>  #else
>  static inline void ct_idle_enter(void) { }
>  static inline void ct_idle_exit(void) { }
> +
> +static __always_inline bool warn_rcu_enter(void) { return false; }
> +static __always_inline void warn_rcu_exit(bool rcu) { }
>  #endif /* !CONFIG_CONTEXT_TRACKING_IDLE */
>  
>  #endif
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -55,6 +55,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/kprobes.h>
>  #include <linux/lockdep.h>
> +#include <linux/context_tracking.h>
>  
>  #include <asm/sections.h>
>  
> @@ -6555,6 +6556,7 @@ void lockdep_rcu_suspicious(const char *
>  {
>  	struct task_struct *curr = current;
>  	int dl = READ_ONCE(debug_locks);
> +	bool rcu = warn_rcu_enter();
>  
>  	/* Note: the following can be executed concurrently, so be careful. */
>  	pr_warn("\n");
> @@ -6595,5 +6597,6 @@ void lockdep_rcu_suspicious(const char *
>  	lockdep_print_held_locks(curr);
>  	pr_warn("\nstack backtrace:\n");
>  	dump_stack();
> +	warn_rcu_exit(rcu);
>  }
>  EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -34,6 +34,7 @@
>  #include <linux/ratelimit.h>
>  #include <linux/debugfs.h>
>  #include <linux/sysfs.h>
> +#include <linux/context_tracking.h>
>  #include <trace/events/error_report.h>
>  #include <asm/sections.h>
>  
> @@ -679,6 +680,7 @@ void __warn(const char *file, int line,
>  void warn_slowpath_fmt(const char *file, int line, unsigned taint,
>  		       const char *fmt, ...)
>  {
> +	bool rcu = warn_rcu_enter();
>  	struct warn_args args;
>  
>  	pr_warn(CUT_HERE);
> @@ -693,11 +695,13 @@ void warn_slowpath_fmt(const char *file,
>  	va_start(args.args, fmt);
>  	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
>  	va_end(args.args);
> +	warn_rcu_exit(rcu);
>  }
>  EXPORT_SYMBOL(warn_slowpath_fmt);
>  #else
>  void __warn_printk(const char *fmt, ...)
>  {
> +	bool rcu = warn_rcu_enter();
>  	va_list args;
>  
>  	pr_warn(CUT_HERE);
> @@ -705,6 +709,7 @@ void __warn_printk(const char *fmt, ...)
>  	va_start(args, fmt);
>  	vprintk(fmt, args);
>  	va_end(args);
> +	warn_rcu_exit(rcu);
>  }
>  EXPORT_SYMBOL(__warn_printk);
>  #endif
> --- a/lib/bug.c
> +++ b/lib/bug.c
> @@ -47,6 +47,7 @@
>  #include <linux/sched.h>
>  #include <linux/rculist.h>
>  #include <linux/ftrace.h>
> +#include <linux/context_tracking.h>
>  
>  extern struct bug_entry __start___bug_table[], __stop___bug_table[];
>  
> @@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long
>  	return module_find_bug(bugaddr);
>  }
>  
> -enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs)
>  {
>  	struct bug_entry *bug;
>  	const char *file;
> @@ -209,6 +210,18 @@ enum bug_trap_type report_bug(unsigned l
>  	return BUG_TRAP_TYPE_BUG;
>  }
>  
> +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
> +{
> +	enum bug_trap_type ret;
> +	bool rcu = false;
> +
> +	rcu = warn_rcu_enter();
> +	ret = __report_bug(bugaddr, regs);
> +	warn_rcu_exit(rcu);
> +
> +	return ret;
> +}
> +
>  static void clear_once_table(struct bug_entry *start, struct bug_entry *end)
>  {
>  	struct bug_entry *bug;
diff mbox series

Patch

--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -646,6 +646,9 @@  void prepare_ftrace_return(unsigned long
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
+	if (WARN_ONCE(!rcu_is_watching(), "RCU not on for: %pS\n", (void *)ip))
+		return;
+
 	bit = ftrace_test_recursion_trylock(ip, *parent);
 	if (bit < 0)
 		return;