diff mbox series

[V2,21/29] tracing: Use percpu stack trace buffer more intelligently

Message ID 20190418084254.999521114@linutronix.de (mailing list archive)
State New, archived
Headers show
Series stacktrace: Consolidate stack trace usage | expand

Commit Message

Thomas Gleixner April 18, 2019, 8:41 a.m. UTC
The per cpu stack trace buffer usage pattern is odd at best. The buffer has
place for 512 stack trace entries on 64-bit and 1024 on 32-bit. When
interrupts or exceptions nest after the per cpu buffer was acquired the
stacktrace length is hardcoded to 8 entries. 512/1024 stack trace entries
in kernel stacks are unrealistic so the buffer is a complete waste.

Split the buffer into chunks of 64 stack entries which is plenty. This
allows nesting contexts (interrupts, exceptions) to utilize the cpu buffer
for stack retrieval and avoids the fixed length allocation along with the
conditional execution pathes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c |   77 +++++++++++++++++++++++++--------------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

Comments

Steven Rostedt April 18, 2019, 2:53 p.m. UTC | #1
On Thu, 18 Apr 2019 10:41:40 +0200
Thomas Gleixner <tglx@linutronix.de> wrote:

> The per cpu stack trace buffer usage pattern is odd at best. The buffer has
> place for 512 stack trace entries on 64-bit and 1024 on 32-bit. When
> interrupts or exceptions nest after the per cpu buffer was acquired the
> stacktrace length is hardcoded to 8 entries. 512/1024 stack trace entries
> in kernel stacks are unrealistic so the buffer is a complete waste.
> 
> Split the buffer into chunks of 64 stack entries which is plenty. This
> allows nesting contexts (interrupts, exceptions) to utilize the cpu buffer
> for stack retrieval and avoids the fixed length allocation along with the
> conditional execution pathes.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> ---
>  kernel/trace/trace.c |   77 +++++++++++++++++++++++++--------------------------
>  1 file changed, 39 insertions(+), 38 deletions(-)
> 
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2749,12 +2749,21 @@ trace_function(struct trace_array *tr,
>  
>  #ifdef CONFIG_STACKTRACE
>  
> -#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
> +/* 64 entries for kernel stacks are plenty */
> +#define FTRACE_KSTACK_ENTRIES	64
> +
>  struct ftrace_stack {
> -	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
> +	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
>  };
>  
> -static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
> +/* This allows 8 level nesting which is plenty */

Can we make this 4 level nesting and increase the size? (I can see us
going more than 64 deep, kernel developers never cease to amaze me ;-)
That's all we need:

 Context: Normal, softirq, irq, NMI

Is there any other way to nest?

-- Steve

> +#define FTRACE_KSTACK_NESTING	(PAGE_SIZE / sizeof(struct ftrace_stack))
> +
> +struct ftrace_stacks {
> +	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
> +};
> +
> +static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
>  static DEFINE_PER_CPU(int, ftrace_stack_reserve);
>  
>
Thomas Gleixner April 18, 2019, 3:43 p.m. UTC | #2
On Thu, 18 Apr 2019, Steven Rostedt wrote:
> On Thu, 18 Apr 2019 10:41:40 +0200
> Thomas Gleixner <tglx@linutronix.de> wrote:
> > -static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
> > +/* This allows 8 level nesting which is plenty */
> 
> Can we make this 4 level nesting and increase the size? (I can see us
> going more than 64 deep, kernel developers never cease to amaze me ;-)
> That's all we need:
> 
>  Context: Normal, softirq, irq, NMI
> 
> Is there any other way to nest?

Not that I know, but you are the tracer dude :)

Thanks,

	tglx
Steven Rostedt April 18, 2019, 3:46 p.m. UTC | #3
On Thu, 18 Apr 2019 17:43:59 +0200 (CEST)
Thomas Gleixner <tglx@linutronix.de> wrote:

> On Thu, 18 Apr 2019, Steven Rostedt wrote:
> > On Thu, 18 Apr 2019 10:41:40 +0200
> > Thomas Gleixner <tglx@linutronix.de> wrote:  
> > > -static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
> > > +/* This allows 8 level nesting which is plenty */  
> > 
> > Can we make this 4 level nesting and increase the size? (I can see us
> > going more than 64 deep, kernel developers never cease to amaze me ;-)
> > That's all we need:
> > 
> >  Context: Normal, softirq, irq, NMI
> > 
> > Is there any other way to nest?  
> 
> Not that I know, but you are the tracer dude :)
>

There's other places I only test 4 deep, so it should be fine to limit
it to 4 then.

Thanks!

-- Steve
diff mbox series

Patch

--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2749,12 +2749,21 @@  trace_function(struct trace_array *tr,
 
 #ifdef CONFIG_STACKTRACE
 
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+/* 64 entries for kernel stacks are plenty */
+#define FTRACE_KSTACK_ENTRIES	64
+
 struct ftrace_stack {
-	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
+	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
 };
 
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+/* This allows 8 level nesting which is plenty */
+#define FTRACE_KSTACK_NESTING	(PAGE_SIZE / sizeof(struct ftrace_stack))
+
+struct ftrace_stacks {
+	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
 
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
@@ -2763,10 +2772,11 @@  static void __ftrace_trace_stack(struct
 {
 	struct trace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
+	struct ftrace_stack *fstack;
 	struct stack_entry *entry;
 	struct stack_trace trace;
-	int use_stack;
-	int size = FTRACE_STACK_ENTRIES;
+	int size = FTRACE_KSTACK_ENTRIES;
+	int stackidx;
 
 	trace.nr_entries	= 0;
 	trace.skip		= skip;
@@ -2788,29 +2798,32 @@  static void __ftrace_trace_stack(struct
 	 */
 	preempt_disable_notrace();
 
-	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+	stackidx = __this_cpu_inc_return(ftrace_stack_reserve);
+
+	/* This should never happen. If it does, yell once and skip */
+	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
+		goto out;
+
 	/*
-	 * We don't need any atomic variables, just a barrier.
-	 * If an interrupt comes in, we don't care, because it would
-	 * have exited and put the counter back to what we want.
-	 * We just need a barrier to keep gcc from moving things
-	 * around.
+	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
+	 * interrupt will either see the value pre increment or post
+	 * increment. If the interrupt happens pre increment it will have
+	 * restored the counter when it returns.  We just need a barrier to
+	 * keep gcc from moving things around.
 	 */
 	barrier();
-	if (use_stack == 1) {
-		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
-		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
-
-		if (regs)
-			save_stack_trace_regs(regs, &trace);
-		else
-			save_stack_trace(&trace);
-
-		if (trace.nr_entries > size)
-			size = trace.nr_entries;
-	} else
-		/* From now on, use_stack is a boolean */
-		use_stack = 0;
+
+	fstack = this_cpu_ptr(ftrace_stacks.stacks) + (stackidx - 1);
+	trace.entries		= fstack->calls;
+	trace.max_entries	= FTRACE_KSTACK_ENTRIES;
+
+	if (regs)
+		save_stack_trace_regs(regs, &trace);
+	else
+		save_stack_trace(&trace);
+
+	if (trace.nr_entries > size)
+		size = trace.nr_entries;
 
 	size *= sizeof(unsigned long);
 
@@ -2820,19 +2833,7 @@  static void __ftrace_trace_stack(struct
 		goto out;
 	entry = ring_buffer_event_data(event);
 
-	memset(&entry->caller, 0, size);
-
-	if (use_stack)
-		memcpy(&entry->caller, trace.entries,
-		       trace.nr_entries * sizeof(unsigned long));
-	else {
-		trace.max_entries	= FTRACE_STACK_ENTRIES;
-		trace.entries		= entry->caller;
-		if (regs)
-			save_stack_trace_regs(regs, &trace);
-		else
-			save_stack_trace(&trace);
-	}
+	memcpy(&entry->caller, trace.entries, size);
 
 	entry->size = trace.nr_entries;