[V2,28/29] stacktrace: Provide common infrastructure
diff mbox series

Message ID 20190418084255.652003111@linutronix.de
State New
Headers show
Series
  • stacktrace: Consolidate stack trace usage
Related show

Commit Message

Thomas Gleixner April 18, 2019, 8:41 a.m. UTC
All architectures which support stacktrace carry duplicated code and
do the stack storage and filtering at the architecture side.

Provide a consolidated interface with a callback function for consuming the
stack entries provided by the architecture specific stack walker. This
removes lots of duplicated code and allows to implement better filtering
than 'skip number of entries' in the future without touching any
architecture specific code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
---
 include/linux/stacktrace.h |   38 +++++++++
 kernel/stacktrace.c        |  173 +++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig                |    4 +
 3 files changed, 215 insertions(+)

Comments

Mike Rapoport April 18, 2019, 11:52 a.m. UTC | #1
On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> All architectures which support stacktrace carry duplicated code and
> do the stack storage and filtering at the architecture side.
> 
> Provide a consolidated interface with a callback function for consuming the
> stack entries provided by the architecture specific stack walker. This
> removes lots of duplicated code and allows to implement better filtering
> than 'skip number of entries' in the future without touching any
> architecture specific code.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: linux-arch@vger.kernel.org
> ---
>  include/linux/stacktrace.h |   38 +++++++++
>  kernel/stacktrace.c        |  173 +++++++++++++++++++++++++++++++++++++++++++++
>  lib/Kconfig                |    4 +
>  3 files changed, 215 insertions(+)
> 
> --- a/include/linux/stacktrace.h
> +++ b/include/linux/stacktrace.h
> @@ -23,6 +23,43 @@ unsigned int stack_trace_save_regs(struc
>  unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
> 
>  /* Internal interfaces. Do not use in generic code */
> +#ifdef CONFIG_ARCH_STACKWALK
> +
> +/**
> + * stack_trace_consume_fn - Callback for arch_stack_walk()
> + * @cookie:	Caller supplied pointer handed back by arch_stack_walk()
> + * @addr:	The stack entry address to consume
> + * @reliable:	True when the stack entry is reliable. Required by
> + *		some printk based consumers.
> + *
> + * Returns:	True, if the entry was consumed or skipped
> + *		False, if there is no space left to store
> + */
> +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
> +				       bool reliable);
> +/**
> + * arch_stack_walk - Architecture specific function to walk the stack
> +

Nit: no '*' at line beginning makes kernel-doc unhappy

> + * @consume_entry:	Callback which is invoked by the architecture code for
> + *			each entry.
> + * @cookie:		Caller supplied pointer which is handed back to
> + *			@consume_entry
> + * @task:		Pointer to a task struct, can be NULL
> + * @regs:		Pointer to registers, can be NULL
> + *
> + * @task	@regs:
> + * NULL		NULL	Stack trace from current
> + * task		NULL	Stack trace from task (can be current)
> + * NULL		regs	Stack trace starting on regs->stackpointer

This will render as a single line with 'make *docs'.
Adding line separators makes this actually a table in the generated docs:

 * ============ ======= ============================================
 * task		regs
 * ============ ======= ============================================
 * NULL		NULL	Stack trace from current
 * task		NULL	Stack trace from task (can be current)
 * NULL		regs	Stack trace starting on regs->stackpointer
 * ============ ======= ============================================


> + */
> +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
> +		     struct task_struct *task, struct pt_regs *regs);
> +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
> +			     struct task_struct *task);
> +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
> +			  const struct pt_regs *regs);
> +
> +#else /* CONFIG_ARCH_STACKWALK */
>  struct stack_trace {
>  	unsigned int nr_entries, max_entries;
>  	unsigned long *entries;
> @@ -37,6 +74,7 @@ extern void save_stack_trace_tsk(struct
>  extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
>  					 struct stack_trace *trace);
>  extern void save_stack_trace_user(struct stack_trace *trace);
> +#endif /* !CONFIG_ARCH_STACKWALK */
>  #endif /* CONFIG_STACKTRACE */
> 
>  #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
> --- a/kernel/stacktrace.c
> +++ b/kernel/stacktrace.c
> @@ -5,6 +5,8 @@
>   *
>   *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
>   */
> +#include <linux/sched/task_stack.h>
> +#include <linux/sched/debug.h>
>  #include <linux/sched.h>
>  #include <linux/kernel.h>
>  #include <linux/export.h>
> @@ -64,6 +66,175 @@ int stack_trace_snprint(char *buf, size_
>  }
>  EXPORT_SYMBOL_GPL(stack_trace_snprint);
> 
> +#ifdef CONFIG_ARCH_STACKWALK
> +
> +struct stacktrace_cookie {
> +	unsigned long	*store;
> +	unsigned int	size;
> +	unsigned int	skip;
> +	unsigned int	len;
> +};
> +
> +static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
> +				      bool reliable)
> +{
> +	struct stacktrace_cookie *c = cookie;
> +
> +	if (c->len >= c->size)
> +		return false;
> +
> +	if (c->skip > 0) {
> +		c->skip--;
> +		return true;
> +	}
> +	c->store[c->len++] = addr;
> +	return c->len < c->size;
> +}
> +
> +static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr,
> +					      bool reliable)
> +{
> +	if (in_sched_functions(addr))
> +		return true;
> +	return stack_trace_consume_entry(cookie, addr, reliable);
> +}
> +
> +/**
> + * stack_trace_save - Save a stack trace into a storage array
> + * @store:	Pointer to storage array
> + * @size:	Size of the storage array
> + * @skipnr:	Number of entries to skip at the start of the stack trace
> + *
> + * Returns number of entries stored.

Can you please s/Returns/Return:/ so that kernel-doc will recognize this as
return section.

This is relevant for other comments below as well.

> + */
> +unsigned int stack_trace_save(unsigned long *store, unsigned int size,
> +			      unsigned int skipnr)
> +{
> +	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
> +	struct stacktrace_cookie c = {
> +		.store	= store,
> +		.size	= size,
> +		.skip	= skipnr + 1,
> +	};
> +
> +	arch_stack_walk(consume_entry, &c, current, NULL);
> +	return c.len;
> +}
> +EXPORT_SYMBOL_GPL(stack_trace_save);
> +
> +/**
> + * stack_trace_save_tsk - Save a task stack trace into a storage array
> + * @task:	The task to examine
> + * @store:	Pointer to storage array
> + * @size:	Size of the storage array
> + * @skipnr:	Number of entries to skip at the start of the stack trace
> + *
> + * Returns number of entries stored.
> + */
> +unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
> +				  unsigned int size, unsigned int skipnr)
> +{
> +	stack_trace_consume_fn consume_entry = stack_trace_consume_entry_nosched;
> +	struct stacktrace_cookie c = {
> +		.store	= store,
> +		.size	= size,
> +		.skip	= skipnr + 1,
> +	};
> +
> +	if (!try_get_task_stack(tsk))
> +		return 0;
> +
> +	arch_stack_walk(consume_entry, &c, tsk, NULL);
> +	put_task_stack(tsk);
> +	return c.len;
> +}
> +
> +/**
> + * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array
> + * @regs:	Pointer to pt_regs to examine
> + * @store:	Pointer to storage array
> + * @size:	Size of the storage array
> + * @skipnr:	Number of entries to skip at the start of the stack trace
> + *
> + * Returns number of entries stored.
> + */
> +unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
> +				   unsigned int size, unsigned int skipnr)
> +{
> +	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
> +	struct stacktrace_cookie c = {
> +		.store	= store,
> +		.size	= size,
> +		.skip	= skipnr,
> +	};
> +
> +	arch_stack_walk(consume_entry, &c, current, regs);
> +	return c.len;
> +}
> +
> +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
> +/**
> + * stack_trace_save_tsk_reliable - Save task stack with verification
> + * @tsk:	Pointer to the task to examine
> + * @store:	Pointer to storage array
> + * @size:	Size of the storage array
> + *
> + * Returns:	An error if it detects any unreliable features of the
> + *		stack. Otherwise it guarantees that the stack trace is
> + *		reliable and returns the number of entries stored.
> + *
> + * If the task is not 'current', the caller *must* ensure the task is inactive.
> + */
> +int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
> +				  unsigned int size)
> +{
> +	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
> +	struct stacktrace_cookie c = {
> +		.store	= store,
> +		.size	= size,
> +	};
> +	int ret;
> +
> +	/*
> +	 * If the task doesn't have a stack (e.g., a zombie), the stack is
> +	 * "reliably" empty.
> +	 */
> +	if (!try_get_task_stack(tsk))
> +		return 0;
> +
> +	ret = arch_stack_walk_reliable(consume_entry, &c, tsk);
> +	put_task_stack(tsk);
> +	return ret;
> +}
> +#endif
> +
> +#ifdef CONFIG_USER_STACKTRACE_SUPPORT
> +/**
> + * stack_trace_save_user - Save a user space stack trace into a storage array
> + * @store:	Pointer to storage array
> + * @size:	Size of the storage array
> + *
> + * Returns number of entries stored.
> + */
> +unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
> +{
> +	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
> +	struct stacktrace_cookie c = {
> +		.store	= store,
> +		.size	= size,
> +	};
> +
> +	/* Trace user stack if not a kernel thread */
> +	if (!current->mm)
> +		return 0;
> +
> +	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
> +	return c.len;
> +}
> +#endif
> +
> +#else /* CONFIG_ARCH_STACKWALK */
> +
>  /*
>   * Architectures that do not implement save_stack_trace_*()
>   * get these weak aliases and once-per-bootup warnings
> @@ -193,3 +364,5 @@ unsigned int stack_trace_save_user(unsig
>  	return trace.nr_entries;
>  }
>  #endif /* CONFIG_USER_STACKTRACE_SUPPORT */
> +
> +#endif /* !CONFIG_ARCH_STACKWALK */
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -597,6 +597,10 @@ config ARCH_HAS_UACCESS_FLUSHCACHE
>  config ARCH_HAS_UACCESS_MCSAFE
>  	bool
> 
> +# Temporary. Goes away when all archs are cleaned up
> +config ARCH_STACKWALK
> +       bool
> +
>  config STACKDEPOT
>  	bool
>  	select STACKTRACE
> 
>
Thomas Gleixner April 18, 2019, 11:57 a.m. UTC | #2
On Thu, 18 Apr 2019, Mike Rapoport wrote:
> On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> > +/**
> > + * arch_stack_walk - Architecture specific function to walk the stack
> > +
> 
> Nit: no '*' at line beginning makes kernel-doc unhappy

Oops.

> > + * @consume_entry:	Callback which is invoked by the architecture code for
> > + *			each entry.
> > + * @cookie:		Caller supplied pointer which is handed back to
> > + *			@consume_entry
> > + * @task:		Pointer to a task struct, can be NULL
> > + * @regs:		Pointer to registers, can be NULL
> > + *
> > + * @task	@regs:
> > + * NULL		NULL	Stack trace from current
> > + * task		NULL	Stack trace from task (can be current)
> > + * NULL		regs	Stack trace starting on regs->stackpointer
> 
> This will render as a single line with 'make *docs'.
> Adding line separators makes this actually a table in the generated docs:
> 
>  * ============ ======= ============================================
>  * task		regs
>  * ============ ======= ============================================
>  * NULL		NULL	Stack trace from current
>  * task		NULL	Stack trace from task (can be current)
>  * NULL		regs	Stack trace starting on regs->stackpointer
>  * ============ ======= ============================================

Cute.

> > + * Returns number of entries stored.
> 
> Can you please s/Returns/Return:/ so that kernel-doc will recognize this as
> return section.
> 
> This is relevant for other comments below as well.

Sure.

Thanks for the free kernel doc course! :)

	tglx
Josh Poimboeuf April 18, 2019, 2:52 p.m. UTC | #3
On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> All architectures which support stacktrace carry duplicated code and
> do the stack storage and filtering at the architecture side.
> 
> Provide a consolidated interface with a callback function for consuming the
> stack entries provided by the architecture specific stack walker. This
> removes lots of duplicated code and allows to implement better filtering
> than 'skip number of entries' in the future without touching any
> architecture specific code.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: linux-arch@vger.kernel.org

This is a step in the right direction, especially if it allows us to get
rid of the 'skip' stuff.  But I'm not crazy about the callbacks.

Another idea I had (but never got a chance to work on) was to extend the
x86 unwind interface to all arches.  So instead of the callbacks, each
arch would implement something like this API:


struct unwind_state state;

void unwind_start(struct unwind_state *state, struct task_struct *task,
		  struct pt_regs *regs, unsigned long *first_frame);

bool unwind_next_frame(struct unwind_state *state);

inline bool unwind_done(struct unwind_state *state);


Not only would it avoid the callbacks (which is a nice benefit already),
it would also allow the interfaces to be used outside of the
stack_trace_*() interfaces.  That would come in handy in cases like the
ftrace stack tracer code, which needs more than the stack_trace_*() API
can give.

Of course, this may be more work than what you thought you signed up for
;-)
Thomas Gleixner April 18, 2019, 3:42 p.m. UTC | #4
On Thu, 18 Apr 2019, Josh Poimboeuf wrote:

> On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> > All architectures which support stacktrace carry duplicated code and
> > do the stack storage and filtering at the architecture side.
> > 
> > Provide a consolidated interface with a callback function for consuming the
> > stack entries provided by the architecture specific stack walker. This
> > removes lots of duplicated code and allows to implement better filtering
> > than 'skip number of entries' in the future without touching any
> > architecture specific code.
> > 
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > Cc: linux-arch@vger.kernel.org
> 
> This is a step in the right direction, especially if it allows us to get
> rid of the 'skip' stuff.  But I'm not crazy about the callbacks.
> 
> Another idea I had (but never got a chance to work on) was to extend the
> x86 unwind interface to all arches.  So instead of the callbacks, each
> arch would implement something like this API:
> 
> 
> struct unwind_state state;
> 
> void unwind_start(struct unwind_state *state, struct task_struct *task,
> 		  struct pt_regs *regs, unsigned long *first_frame);
> 
> bool unwind_next_frame(struct unwind_state *state);
> 
> inline bool unwind_done(struct unwind_state *state);
> 
> 
> Not only would it avoid the callbacks (which is a nice benefit already),
> it would also allow the interfaces to be used outside of the
> stack_trace_*() interfaces.  That would come in handy in cases like the
> ftrace stack tracer code, which needs more than the stack_trace_*() API
> can give.

I surely thought about that, but after staring at all incarnations of
arch/*/stacktrace.c I just gave up.

Aside of that quite some archs already have callback based unwinders
because they use them for more than stacktracing and just have a single
implementation of that loop.

I'm fine either way. We can start with x86 and then let archs convert over
their stuff, but I wouldn't hold my breath that this will be completed in
the forseeable future.

> Of course, this may be more work than what you thought you signed up for
> ;-)

I did not sign up for anything. I tripped over that mess by accident and me
being me hated it strong enough to give it at least an initial steam blast.

Thanks,

	tglx
Peter Zijlstra April 19, 2019, 7:02 a.m. UTC | #5
On Thu, Apr 18, 2019 at 05:42:55PM +0200, Thomas Gleixner wrote:
> On Thu, 18 Apr 2019, Josh Poimboeuf wrote:

> > Another idea I had (but never got a chance to work on) was to extend the
> > x86 unwind interface to all arches.  So instead of the callbacks, each
> > arch would implement something like this API:

> I surely thought about that, but after staring at all incarnations of
> arch/*/stacktrace.c I just gave up.
> 
> Aside of that quite some archs already have callback based unwinders
> because they use them for more than stacktracing and just have a single
> implementation of that loop.
> 
> I'm fine either way. We can start with x86 and then let archs convert over
> their stuff, but I wouldn't hold my breath that this will be completed in
> the forseeable future.

I suggested the same to Thomas early on, and I even spend the time to
convert some $random arch to the iterator interface, and while it is
indeed entirely feasible, it is _far_ more work.

The callback thing OTOH is flexible enough to do what we want to do now,
and allows converting most archs to it without too much pain (as Thomas
said, many archs are already in this form and only need minor API
adjustments), which gets us in a far better place than we are now.

And we can always go to iterators later on. But I think getting the
generic unwinder improved across all archs is a really important first
step here.
Peter Zijlstra April 19, 2019, 7:18 a.m. UTC | #6
On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:

> +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
> +                                      bool reliable);

> +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
> +		     struct task_struct *task, struct pt_regs *regs);
> +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
> +			     struct task_struct *task);

This bugs me a little; ideally the _reliable() thing would not exists.

Thomas said that the existing __save_stack_trace_reliable() is different
enough for the unification to be non-trivial, but maybe Josh can help
out?

From what I can see the biggest significant differences are:

 - it looks at the regs sets on the stack and for FP bails early
 - bails for khreads and idle (after it does all the hard work!?!)

The first (FP checking for exceptions) should probably be reflected in
consume_fn(.reliable) anyway -- although that would mean a lot of extra
'?' entries where there are none today.

And the second (KTHREAD/IDLE) is something that the generic code can
easily do before calling into the arch unwinder.

Hmm?
Thomas Gleixner April 19, 2019, 8:32 a.m. UTC | #7
On Fri, 19 Apr 2019, Peter Zijlstra wrote:
> On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> 
> > +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
> > +                                      bool reliable);
> 
> > +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
> > +		     struct task_struct *task, struct pt_regs *regs);
> > +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
> > +			     struct task_struct *task);
> 
> This bugs me a little; ideally the _reliable() thing would not exists.
> 
> Thomas said that the existing __save_stack_trace_reliable() is different
> enough for the unification to be non-trivial, but maybe Josh can help
> out?
> 
> >From what I can see the biggest significant differences are:
> 
>  - it looks at the regs sets on the stack and for FP bails early
>  - bails for khreads and idle (after it does all the hard work!?!)
> 
> The first (FP checking for exceptions) should probably be reflected in
> consume_fn(.reliable) anyway -- although that would mean a lot of extra
> '?' entries where there are none today.
> 
> And the second (KTHREAD/IDLE) is something that the generic code can
> easily do before calling into the arch unwinder.

And looking at the powerpc version of it, that has even more interesting
extra checks in that function.

Thanks,

	tglx
Peter Zijlstra April 19, 2019, 9:07 a.m. UTC | #8
On Fri, Apr 19, 2019 at 10:32:30AM +0200, Thomas Gleixner wrote:
> On Fri, 19 Apr 2019, Peter Zijlstra wrote:
> > On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> > 
> > > +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
> > > +                                      bool reliable);
> > 
> > > +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
> > > +		     struct task_struct *task, struct pt_regs *regs);
> > > +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
> > > +			     struct task_struct *task);
> > 
> > This bugs me a little; ideally the _reliable() thing would not exists.
> > 
> > Thomas said that the existing __save_stack_trace_reliable() is different
> > enough for the unification to be non-trivial, but maybe Josh can help
> > out?
> > 
> > >From what I can see the biggest significant differences are:
> > 
> >  - it looks at the regs sets on the stack and for FP bails early
> >  - bails for khreads and idle (after it does all the hard work!?!)
> > 
> > The first (FP checking for exceptions) should probably be reflected in
> > consume_fn(.reliable) anyway -- although that would mean a lot of extra
> > '?' entries where there are none today.
> > 
> > And the second (KTHREAD/IDLE) is something that the generic code can
> > easily do before calling into the arch unwinder.
> 
> And looking at the powerpc version of it, that has even more interesting
> extra checks in that function.

Right, but not fundamentally different from determining @reliable I
think.

Anyway, it would be good if someone knowledgable could have a look at
this.
Josh Poimboeuf April 19, 2019, 3:50 p.m. UTC | #9
On Fri, Apr 19, 2019 at 09:02:11AM +0200, Peter Zijlstra wrote:
> On Thu, Apr 18, 2019 at 05:42:55PM +0200, Thomas Gleixner wrote:
> > On Thu, 18 Apr 2019, Josh Poimboeuf wrote:
> 
> > > Another idea I had (but never got a chance to work on) was to extend the
> > > x86 unwind interface to all arches.  So instead of the callbacks, each
> > > arch would implement something like this API:
> 
> > I surely thought about that, but after staring at all incarnations of
> > arch/*/stacktrace.c I just gave up.
> > 
> > Aside of that quite some archs already have callback based unwinders
> > because they use them for more than stacktracing and just have a single
> > implementation of that loop.
> > 
> > I'm fine either way. We can start with x86 and then let archs convert over
> > their stuff, but I wouldn't hold my breath that this will be completed in
> > the forseeable future.
> 
> I suggested the same to Thomas early on, and I even spend the time to
> convert some $random arch to the iterator interface, and while it is
> indeed entirely feasible, it is _far_ more work.
> 
> The callback thing OTOH is flexible enough to do what we want to do now,
> and allows converting most archs to it without too much pain (as Thomas
> said, many archs are already in this form and only need minor API
> adjustments), which gets us in a far better place than we are now.
> 
> And we can always go to iterators later on. But I think getting the
> generic unwinder improved across all archs is a really important first
> step here.

Fair enough.
Josh Poimboeuf April 19, 2019, 4:17 p.m. UTC | #10
On Fri, Apr 19, 2019 at 11:07:17AM +0200, Peter Zijlstra wrote:
> On Fri, Apr 19, 2019 at 10:32:30AM +0200, Thomas Gleixner wrote:
> > On Fri, 19 Apr 2019, Peter Zijlstra wrote:
> > > On Thu, Apr 18, 2019 at 10:41:47AM +0200, Thomas Gleixner wrote:
> > > 
> > > > +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
> > > > +                                      bool reliable);
> > > 
> > > > +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
> > > > +		     struct task_struct *task, struct pt_regs *regs);
> > > > +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
> > > > +			     struct task_struct *task);
> > > 
> > > This bugs me a little; ideally the _reliable() thing would not exists.
> > > 
> > > Thomas said that the existing __save_stack_trace_reliable() is different
> > > enough for the unification to be non-trivial, but maybe Josh can help
> > > out?
> > > 
> > > >From what I can see the biggest significant differences are:
> > > 
> > >  - it looks at the regs sets on the stack and for FP bails early
> > >  - bails for khreads and idle (after it does all the hard work!?!)

That's done for a reason, see the "Success path" comments.

> > > The first (FP checking for exceptions) should probably be reflected in
> > > consume_fn(.reliable) anyway -- although that would mean a lot of extra
> > > '?' entries where there are none today.
> > > 
> > > And the second (KTHREAD/IDLE) is something that the generic code can
> > > easily do before calling into the arch unwinder.
> > 
> > And looking at the powerpc version of it, that has even more interesting
> > extra checks in that function.
> 
> Right, but not fundamentally different from determining @reliable I
> think.
> 
> Anyway, it would be good if someone knowledgable could have a look at
> this.

Yeah, we could probably do that.

The flow would need to be changed a bit -- some of the errors are soft
errors which most users don't care about because they just want a best
effort.  The soft errors can be remembered without breaking out of the
loop, and just returned at the end.  Most users could just ignore the
return code.

The only thing I'd be worried about is performance for the non-livepatch
users, but I guess those checks don't look very expensive.  And the x86
unwinders are already pretty slow anyway...

Patch
diff mbox series

--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -23,6 +23,43 @@  unsigned int stack_trace_save_regs(struc
 unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
 
 /* Internal interfaces. Do not use in generic code */
+#ifdef CONFIG_ARCH_STACKWALK
+
+/**
+ * stack_trace_consume_fn - Callback for arch_stack_walk()
+ * @cookie:	Caller supplied pointer handed back by arch_stack_walk()
+ * @addr:	The stack entry address to consume
+ * @reliable:	True when the stack entry is reliable. Required by
+ *		some printk based consumers.
+ *
+ * Returns:	True, if the entry was consumed or skipped
+ *		False, if there is no space left to store
+ */
+typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
+				       bool reliable);
+/**
+ * arch_stack_walk - Architecture specific function to walk the stack
+
+ * @consume_entry:	Callback which is invoked by the architecture code for
+ *			each entry.
+ * @cookie:		Caller supplied pointer which is handed back to
+ *			@consume_entry
+ * @task:		Pointer to a task struct, can be NULL
+ * @regs:		Pointer to registers, can be NULL
+ *
+ * @task	@regs:
+ * NULL		NULL	Stack trace from current
+ * task		NULL	Stack trace from task (can be current)
+ * NULL		regs	Stack trace starting on regs->stackpointer
+ */
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs);
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
+			     struct task_struct *task);
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+			  const struct pt_regs *regs);
+
+#else /* CONFIG_ARCH_STACKWALK */
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
@@ -37,6 +74,7 @@  extern void save_stack_trace_tsk(struct
 extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
 					 struct stack_trace *trace);
 extern void save_stack_trace_user(struct stack_trace *trace);
+#endif /* !CONFIG_ARCH_STACKWALK */
 #endif /* CONFIG_STACKTRACE */
 
 #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -5,6 +5,8 @@ 
  *
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
+#include <linux/sched/task_stack.h>
+#include <linux/sched/debug.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
@@ -64,6 +66,175 @@  int stack_trace_snprint(char *buf, size_
 }
 EXPORT_SYMBOL_GPL(stack_trace_snprint);
 
+#ifdef CONFIG_ARCH_STACKWALK
+
+struct stacktrace_cookie {
+	unsigned long	*store;
+	unsigned int	size;
+	unsigned int	skip;
+	unsigned int	len;
+};
+
+static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
+				      bool reliable)
+{
+	struct stacktrace_cookie *c = cookie;
+
+	if (c->len >= c->size)
+		return false;
+
+	if (c->skip > 0) {
+		c->skip--;
+		return true;
+	}
+	c->store[c->len++] = addr;
+	return c->len < c->size;
+}
+
+static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr,
+					      bool reliable)
+{
+	if (in_sched_functions(addr))
+		return true;
+	return stack_trace_consume_entry(cookie, addr, reliable);
+}
+
+/**
+ * stack_trace_save - Save a stack trace into a storage array
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Returns number of entries stored.
+ */
+unsigned int stack_trace_save(unsigned long *store, unsigned int size,
+			      unsigned int skipnr)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+		.skip	= skipnr + 1,
+	};
+
+	arch_stack_walk(consume_entry, &c, current, NULL);
+	return c.len;
+}
+EXPORT_SYMBOL_GPL(stack_trace_save);
+
+/**
+ * stack_trace_save_tsk - Save a task stack trace into a storage array
+ * @task:	The task to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Returns number of entries stored.
+ */
+unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
+				  unsigned int size, unsigned int skipnr)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry_nosched;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+		.skip	= skipnr + 1,
+	};
+
+	if (!try_get_task_stack(tsk))
+		return 0;
+
+	arch_stack_walk(consume_entry, &c, tsk, NULL);
+	put_task_stack(tsk);
+	return c.len;
+}
+
+/**
+ * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array
+ * @regs:	Pointer to pt_regs to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Returns number of entries stored.
+ */
+unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
+				   unsigned int size, unsigned int skipnr)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+		.skip	= skipnr,
+	};
+
+	arch_stack_walk(consume_entry, &c, current, regs);
+	return c.len;
+}
+
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+/**
+ * stack_trace_save_tsk_reliable - Save task stack with verification
+ * @tsk:	Pointer to the task to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ *
+ * Returns:	An error if it detects any unreliable features of the
+ *		stack. Otherwise it guarantees that the stack trace is
+ *		reliable and returns the number of entries stored.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
+				  unsigned int size)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+	};
+	int ret;
+
+	/*
+	 * If the task doesn't have a stack (e.g., a zombie), the stack is
+	 * "reliably" empty.
+	 */
+	if (!try_get_task_stack(tsk))
+		return 0;
+
+	ret = arch_stack_walk_reliable(consume_entry, &c, tsk);
+	put_task_stack(tsk);
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
+/**
+ * stack_trace_save_user - Save a user space stack trace into a storage array
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ *
+ * Returns number of entries stored.
+ */
+unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+	};
+
+	/* Trace user stack if not a kernel thread */
+	if (!current->mm)
+		return 0;
+
+	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+	return c.len;
+}
+#endif
+
+#else /* CONFIG_ARCH_STACKWALK */
+
 /*
  * Architectures that do not implement save_stack_trace_*()
  * get these weak aliases and once-per-bootup warnings
@@ -193,3 +364,5 @@  unsigned int stack_trace_save_user(unsig
 	return trace.nr_entries;
 }
 #endif /* CONFIG_USER_STACKTRACE_SUPPORT */
+
+#endif /* !CONFIG_ARCH_STACKWALK */
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -597,6 +597,10 @@  config ARCH_HAS_UACCESS_FLUSHCACHE
 config ARCH_HAS_UACCESS_MCSAFE
 	bool
 
+# Temporary. Goes away when all archs are cleaned up
+config ARCH_STACKWALK
+       bool
+
 config STACKDEPOT
 	bool
 	select STACKTRACE