diff mbox series

[1/3] lib/stackdepot: Add a refcount field in stack_record

Message ID 20220901044249.4624-2-osalvador@suse.de (mailing list archive)
State New
Headers show
Series page_owner: print stacks and their counter | expand

Commit Message

Oscar Salvador Sept. 1, 2022, 4:42 a.m. UTC
We want to filter out page_owner output and print only those
stacks that have been repeated beyond a certain threshold.
This gives us the chance to get rid of a lot of noise.
In order to do that, we need to keep track of how many repeated stacks
(for allocation) do we have, so we add a new refcount_t field
in the stack_record struct.

Note that on __set_page_owner_handle(), page_owner->handle is set,
and on __reset_page_owner(), page_owner->free_handle is set.

We are interested in page_owner->handle, so when __set_page_owner()
gets called, we derive the stack_record struct from page_owner->handle,
and we increment its refcount_t field; and when __reset_page_owner()
gets called, we derive its stack_record from page_owner->handle()
and we decrement its refcount_t field.

This is a preparation for patch#2.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 include/linux/stackdepot.h | 13 ++++++-
 lib/stackdepot.c           | 79 +++++++++++++++++++++++++++++++-------
 mm/kasan/common.c          |  3 +-
 mm/page_owner.c            | 13 +++++--
 4 files changed, 88 insertions(+), 20 deletions(-)

Comments

Marco Elver Sept. 1, 2022, 8:24 a.m. UTC | #1
On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
> We want to filter out page_owner output and print only those
> stacks that have been repeated beyond a certain threshold.
> This gives us the chance to get rid of a lot of noise.
> In order to do that, we need to keep track of how many repeated stacks
> (for allocation) do we have, so we add a new refcount_t field
> in the stack_record struct.
> 
> Note that on __set_page_owner_handle(), page_owner->handle is set,
> and on __reset_page_owner(), page_owner->free_handle is set.
> 
> We are interested in page_owner->handle, so when __set_page_owner()
> gets called, we derive the stack_record struct from page_owner->handle,
> and we increment its refcount_t field; and when __reset_page_owner()
> gets called, we derive its stack_record from page_owner->handle()
> and we decrement its refcount_t field.
> 
> This is a preparation for patch#2.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> ---
>  include/linux/stackdepot.h | 13 ++++++-
>  lib/stackdepot.c           | 79 +++++++++++++++++++++++++++++++-------
>  mm/kasan/common.c          |  3 +-

+Cc other kasan maintainers

>  mm/page_owner.c            | 13 +++++--
>  4 files changed, 88 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
> index bc2797955de9..5ee0cf5be88f 100644
> --- a/include/linux/stackdepot.h
> +++ b/include/linux/stackdepot.h
> @@ -15,9 +15,16 @@
>  
>  typedef u32 depot_stack_handle_t;
>  
> +typedef enum stack_action {
> +	STACK_ACTION_NONE,
> +	STACK_ACTION_INC,
> +}stack_action_t;
> +

missing space after '}'. But please no unnecessary typedef, just 'enum
stack_action' (and spelling out 'enum stack_action' elsewhere) is just
fine.

This is in the global namespace, so I'd call this
stack_depot_action+STACK_DEPOT_ACTION_*.

However, .._ACTION_INC doesn't really say what's incremented. As an
analog to stack_depot_dec_count(), perhaps .._ACTION_COUNT?

In general it'd be nicer if there was stack_depot_inc_count() instead of
this additional argument, but I see that for performance reasons you
might not like that?

>  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>  					unsigned int nr_entries,
> -					gfp_t gfp_flags, bool can_alloc);
> +					gfp_t gfp_flags, bool can_alloc,
> +					stack_action_t action);
> +void stack_depot_dec_count(depot_stack_handle_t handle);
>  
>  /*
>   * Every user of stack depot has to call stack_depot_init() during its own init
> @@ -55,6 +62,10 @@ static inline int stack_depot_early_init(void)	{ return 0; }
>  
>  depot_stack_handle_t stack_depot_save(unsigned long *entries,
>  				      unsigned int nr_entries, gfp_t gfp_flags);
> +depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
> +					     unsigned int nr_entries,
> +					     gfp_t gfp_flags,
> +					     stack_action_t action);
>  
>  unsigned int stack_depot_fetch(depot_stack_handle_t handle,
>  			       unsigned long **entries);
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index 5ca0d086ef4a..aeb59d3557e2 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -63,6 +63,7 @@ struct stack_record {
>  	u32 hash;			/* Hash in the hastable */
>  	u32 size;			/* Number of frames in the stack */
>  	union handle_parts handle;
> +	refcount_t count;		/* Number of the same repeated stacks */

This will increase stack_record size for every user, even if they don't
care about the count.

Is there a way to store this out-of-line somewhere?

>  	unsigned long entries[];	/* Variable-sized array of entries. */
>  };
>  
> @@ -139,6 +140,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
>  	stack->handle.slabindex = depot_index;
>  	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
>  	stack->handle.valid = 1;
> +	refcount_set(&stack->count, 1);
>  	memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
>  	depot_offset += required_size;
>  
> @@ -302,6 +304,29 @@ void stack_depot_print(depot_stack_handle_t stack)
>  }
>  EXPORT_SYMBOL_GPL(stack_depot_print);
>  
> +static struct stack_record *stack_depot_getstack(depot_stack_handle_t handle)
> +{
> +	union handle_parts parts = { .handle = handle };
> +	void *slab;
> +	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
> +	struct stack_record *stack;
> +
> +	if(!handle)
> +		return NULL;
> +
> +	if (parts.slabindex > depot_index) {
> +		WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
> +		     parts.slabindex, depot_index, handle);
> +		return NULL;
> +	}
> +	slab = stack_slabs[parts.slabindex];
> +	if (!slab)
> +		return NULL;
> +
> +	stack = slab + offset;
> +	return stack;
> +}
> +
>  /**
>   * stack_depot_fetch - Fetch stack entries from a depot
>   *
> @@ -314,30 +339,42 @@ EXPORT_SYMBOL_GPL(stack_depot_print);
>  unsigned int stack_depot_fetch(depot_stack_handle_t handle,
>  			       unsigned long **entries)
>  {
> -	union handle_parts parts = { .handle = handle };
> -	void *slab;
> -	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
>  	struct stack_record *stack;
>  
>  	*entries = NULL;
>  	if (!handle)
>  		return 0;
>  
> -	if (parts.slabindex > depot_index) {
> -		WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
> -			parts.slabindex, depot_index, handle);
> -		return 0;
> -	}
> -	slab = stack_slabs[parts.slabindex];
> -	if (!slab)
> +	stack = stack_depot_getstack(handle);
> +	if (!stack)
>  		return 0;
> -	stack = slab + offset;
>  
>  	*entries = stack->entries;
>  	return stack->size;
>  }
>  EXPORT_SYMBOL_GPL(stack_depot_fetch);
>  
> +static void stack_depot_inc_count(struct stack_record *stack)
> +{
> +	refcount_inc(&stack->count);
> +}
> +
> +void stack_depot_dec_count(depot_stack_handle_t handle)
> +{
> +	struct stack_record *stack = NULL;
> +
> +	stack = stack_depot_getstack(handle);
> +	if (stack) {
> +	/*
> +	 * page_owner creates some stacks via create_dummy_stack().
> +	 * We are not interested in those, so make sure we only decrement
> +	 * "valid" stacks.
> +	 */

Comment indent is wrong.

> +		if (refcount_read(&stack->count) > 1)
> +			refcount_dec(&stack->count);
> +	}
> +}
> +
>  /**
>   * __stack_depot_save - Save a stack trace from an array
>   *
> @@ -363,7 +400,8 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch);
>   */
>  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>  					unsigned int nr_entries,
> -					gfp_t alloc_flags, bool can_alloc)
> +					gfp_t alloc_flags, bool can_alloc,
> +					stack_action_t action)
>  {
>  	struct stack_record *found = NULL, **bucket;
>  	depot_stack_handle_t retval = 0;
> @@ -449,8 +487,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>  		/* Nobody used this memory, ok to free it. */
>  		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
>  	}
> -	if (found)
> +	if (found) {
>  		retval = found->handle.handle;
> +		if (action == STACK_ACTION_INC)
> +			stack_depot_inc_count(found);
> +	}
>  fast_exit:
>  	return retval;
>  }
> @@ -472,6 +513,16 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
>  				      unsigned int nr_entries,
>  				      gfp_t alloc_flags)
>  {
> -	return __stack_depot_save(entries, nr_entries, alloc_flags, true);
> +	return __stack_depot_save(entries, nr_entries, alloc_flags, true,
> +				  STACK_ACTION_NONE);
>  }
>  EXPORT_SYMBOL_GPL(stack_depot_save);
> +
> +depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
> +					     unsigned int nr_entries,
> +					     gfp_t alloc_flags,
> +					     stack_action_t action)
> +{
> +	return __stack_depot_save(entries, nr_entries, alloc_flags, true, action);
> +}
> +EXPORT_SYMBOL_GPL(stack_depot_save_action);
> diff --git a/mm/kasan/common.c b/mm/kasan/common.c
> index c40c0e7b3b5f..f434994f3b0d 100644
> --- a/mm/kasan/common.c
> +++ b/mm/kasan/common.c
> @@ -36,7 +36,8 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc)
>  	unsigned int nr_entries;
>  
>  	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
> -	return __stack_depot_save(entries, nr_entries, flags, can_alloc);
> +	return __stack_depot_save(entries, nr_entries, flags, can_alloc,
> +				  STACK_ACTION_NONE);
>  }
>  
>  void kasan_set_track(struct kasan_track *track, gfp_t flags)
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index e4c6f3f1695b..794f346d7520 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -106,7 +106,7 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
>  	return (void *)page_ext + page_owner_ops.offset;
>  }
>  
> -static noinline depot_stack_handle_t save_stack(gfp_t flags)
> +static noinline depot_stack_handle_t save_stack(gfp_t flags, stack_action_t action)
>  {
>  	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
>  	depot_stack_handle_t handle;
> @@ -125,7 +125,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
>  	current->in_page_owner = 1;
>  
>  	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
> -	handle = stack_depot_save(entries, nr_entries, flags);
> +	handle = stack_depot_save_action(entries, nr_entries, flags, action);
>  	if (!handle)
>  		handle = failure_handle;
>  
> @@ -138,6 +138,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
>  	int i;
>  	struct page_ext *page_ext;
>  	depot_stack_handle_t handle;
> +	depot_stack_handle_t alloc_handle;
>  	struct page_owner *page_owner;
>  	u64 free_ts_nsec = local_clock();
>  
> @@ -145,7 +146,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
>  	if (unlikely(!page_ext))
>  		return;
>  
> -	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
> +	page_owner = get_page_owner(page_ext);
> +	alloc_handle = page_owner->handle;
> +
> +	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_ACTION_NONE);
>  	for (i = 0; i < (1 << order); i++) {
>  		__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
>  		page_owner = get_page_owner(page_ext);
> @@ -153,6 +157,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
>  		page_owner->free_ts_nsec = free_ts_nsec;
>  		page_ext = page_ext_next(page_ext);
>  	}
> +	stack_depot_dec_count(alloc_handle);
>  }
>  
>  static inline void __set_page_owner_handle(struct page_ext *page_ext,
> @@ -189,7 +194,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
>  	if (unlikely(!page_ext))
>  		return;
>  
> -	handle = save_stack(gfp_mask);
> +	handle = save_stack(gfp_mask, STACK_ACTION_INC);
>  	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
>  }
>  
> -- 
> 2.35.3
Michal Hocko Sept. 1, 2022, 8:38 a.m. UTC | #2
On Thu 01-09-22 10:24:58, Marco Elver wrote:
> On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
[...]
> > diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> > index 5ca0d086ef4a..aeb59d3557e2 100644
> > --- a/lib/stackdepot.c
> > +++ b/lib/stackdepot.c
> > @@ -63,6 +63,7 @@ struct stack_record {
> >  	u32 hash;			/* Hash in the hastable */
> >  	u32 size;			/* Number of frames in the stack */
> >  	union handle_parts handle;
> > +	refcount_t count;		/* Number of the same repeated stacks */
> 
> This will increase stack_record size for every user, even if they don't
> care about the count.

Couldn't this be used for garbage collection?
Marco Elver Sept. 1, 2022, 9:18 a.m. UTC | #3
On Thu, 1 Sept 2022 at 10:38, Michal Hocko <mhocko@suse.com> wrote:
>
> On Thu 01-09-22 10:24:58, Marco Elver wrote:
> > On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
> [...]
> > > diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> > > index 5ca0d086ef4a..aeb59d3557e2 100644
> > > --- a/lib/stackdepot.c
> > > +++ b/lib/stackdepot.c
> > > @@ -63,6 +63,7 @@ struct stack_record {
> > >     u32 hash;                       /* Hash in the hastable */
> > >     u32 size;                       /* Number of frames in the stack */
> > >     union handle_parts handle;
> > > +   refcount_t count;               /* Number of the same repeated stacks */
> >
> > This will increase stack_record size for every user, even if they don't
> > care about the count.
>
> Couldn't this be used for garbage collection?

Only if we can precisely figure out at which point a stack is no
longer going to be needed.

But more realistically, stack depot was designed to be simple. Right
now it can allocate new stacks (from an internal pool), but giving the
memory back to that pool isn't supported. Doing garbage collection
would effectively be a redesign of stack depot. And for the purpose
for which stack depot was designed (debugging tools), memory has never
been an issue (note that stack depot also has a fixed upper bound on
memory usage).

We had talked (in the context of KASAN) about bounded stack storage,
but the preferred solution is usually a cache-based design which
allows evictions (in the simplest case a ring buffer), because
figuring out (and relying on) where precisely a stack will
definitively no longer be required in bug reports is complex and does
not guarantee the required bound on memory usage. Andrey has done the
work on this for tag-based KASAN modes:
https://lore.kernel.org/all/cover.1658189199.git.andreyknvl@google.com/
Michal Hocko Sept. 1, 2022, 10:01 a.m. UTC | #4
On Thu 01-09-22 11:18:19, Marco Elver wrote:
> On Thu, 1 Sept 2022 at 10:38, Michal Hocko <mhocko@suse.com> wrote:
> >
> > On Thu 01-09-22 10:24:58, Marco Elver wrote:
> > > On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
> > [...]
> > > > diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> > > > index 5ca0d086ef4a..aeb59d3557e2 100644
> > > > --- a/lib/stackdepot.c
> > > > +++ b/lib/stackdepot.c
> > > > @@ -63,6 +63,7 @@ struct stack_record {
> > > >     u32 hash;                       /* Hash in the hastable */
> > > >     u32 size;                       /* Number of frames in the stack */
> > > >     union handle_parts handle;
> > > > +   refcount_t count;               /* Number of the same repeated stacks */
> > >
> > > This will increase stack_record size for every user, even if they don't
> > > care about the count.
> >
> > Couldn't this be used for garbage collection?
> 
> Only if we can precisely figure out at which point a stack is no
> longer going to be needed.
> 
> But more realistically, stack depot was designed to be simple. Right
> now it can allocate new stacks (from an internal pool), but giving the
> memory back to that pool isn't supported. Doing garbage collection
> would effectively be a redesign of stack depot.

Fair argument. 

> And for the purpose
> for which stack depot was designed (debugging tools), memory has never
> been an issue (note that stack depot also has a fixed upper bound on
> memory usage).

Is the increased size really a blocker then? I see how it sucks to
maintain a counter when it is not used by anything but page_owner but
storing that counte externally would just add more complexity AFAICS
(more allocations, more tracking etc.).

Maybe the counter can be conditional on the page_owner which would add
some complexity as well (variable size structure) but at least the
external allocation stuff could be avoided.
Marco Elver Sept. 1, 2022, 10:20 a.m. UTC | #5
On Thu, 1 Sept 2022 at 12:01, Michal Hocko <mhocko@suse.com> wrote:
>
> On Thu 01-09-22 11:18:19, Marco Elver wrote:
> > On Thu, 1 Sept 2022 at 10:38, Michal Hocko <mhocko@suse.com> wrote:
> > >
> > > On Thu 01-09-22 10:24:58, Marco Elver wrote:
> > > > On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
> > > [...]
> > > > > diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> > > > > index 5ca0d086ef4a..aeb59d3557e2 100644
> > > > > --- a/lib/stackdepot.c
> > > > > +++ b/lib/stackdepot.c
> > > > > @@ -63,6 +63,7 @@ struct stack_record {
> > > > >     u32 hash;                       /* Hash in the hastable */
> > > > >     u32 size;                       /* Number of frames in the stack */
> > > > >     union handle_parts handle;
> > > > > +   refcount_t count;               /* Number of the same repeated stacks */
> > > >
> > > > This will increase stack_record size for every user, even if they don't
> > > > care about the count.
> > >
> > > Couldn't this be used for garbage collection?
> >
> > Only if we can precisely figure out at which point a stack is no
> > longer going to be needed.
> >
> > But more realistically, stack depot was designed to be simple. Right
> > now it can allocate new stacks (from an internal pool), but giving the
> > memory back to that pool isn't supported. Doing garbage collection
> > would effectively be a redesign of stack depot.
>
> Fair argument.
>
> > And for the purpose
> > for which stack depot was designed (debugging tools), memory has never
> > been an issue (note that stack depot also has a fixed upper bound on
> > memory usage).
>
> Is the increased size really a blocker then? I see how it sucks to
> maintain a counter when it is not used by anything but page_owner but
> storing that counte externally would just add more complexity AFAICS
> (more allocations, more tracking etc.).

Right, I think keeping it simple is better.

> Maybe the counter can be conditional on the page_owner which would add
> some complexity as well (variable size structure) but at least the
> external allocation stuff could be avoided.

Not sure it's needed - I just checked the size of stack_record on a
x86-64 build, and it's 24 bytes. Because 'handle_parts' is 4 bytes,
and refcount_t is 4 bytes, and the alignment of 'entries' being 8
bytes, even with the refcount_t, stack_record is still 24 bytes. :-)

And for me that's good enough. Maybe mentioning this in the commit
message is worthwhile. Of course 32-bit builds still suffer a little,
but I think we can live with that.
Oscar Salvador Sept. 2, 2022, 3:27 a.m. UTC | #6
On Thu, Sep 01, 2022 at 10:24:58AM +0200, Marco Elver wrote:
> On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
> >  include/linux/stackdepot.h | 13 ++++++-
> >  lib/stackdepot.c           | 79 +++++++++++++++++++++++++++++++-------
> >  mm/kasan/common.c          |  3 +-
> 
> +Cc other kasan maintainers

Yeah, sorry about that, I should have CCed you guys.

> > +typedef enum stack_action {
> > +	STACK_ACTION_NONE,
> > +	STACK_ACTION_INC,
> > +}stack_action_t;
> > +
> 
> missing space after '}'. But please no unnecessary typedef, just 'enum
> stack_action' (and spelling out 'enum stack_action' elsewhere) is just
> fine.

Sure, will re-name it.

> 
> This is in the global namespace, so I'd call this
> stack_depot_action+STACK_DEPOT_ACTION_*.
> 
> However, .._ACTION_INC doesn't really say what's incremented. As an
> analog to stack_depot_dec_count(), perhaps .._ACTION_COUNT?

I guess we can go "STACK_DEPOT_ACTION_COUNT", or "STACK_DEPOT_ACTION_REF_INC",
but the latter seems rather baroque for my taste.

> In general it'd be nicer if there was stack_depot_inc_count() instead of
> this additional argument, but I see that for performance reasons you
> might not like that?

Yes, the first prototypes didn't have this stack_action_t thing,
but that implied that we had to look for the stack twice
in the __set_page_owner() case.

This way we only do that in the __reset_page_owner() case.

So yes, it's a trade-off performance vs LOC.

> > --- a/lib/stackdepot.c
> > +++ b/lib/stackdepot.c
> > @@ -63,6 +63,7 @@ struct stack_record {
> >  	u32 hash;			/* Hash in the hastable */
> >  	u32 size;			/* Number of frames in the stack */
> >  	union handle_parts handle;
> > +	refcount_t count;		/* Number of the same repeated stacks */
> 
> This will increase stack_record size for every user, even if they don't
> care about the count.
> 
> Is there a way to store this out-of-line somewhere?

That would require having some kind of e.g: dynamic struct and allocating
new links to stacks as they were created and increase the refcount there.

But that would be too much of complexity, I think.

As I read in your other thread, we can probably live with that, but
it is worth spelling out in the changelog.

> > +void stack_depot_dec_count(depot_stack_handle_t handle)
> > +{
> > +	struct stack_record *stack = NULL;
> > +
> > +	stack = stack_depot_getstack(handle);
> > +	if (stack) {
> > +	/*
> > +	 * page_owner creates some stacks via create_dummy_stack().
> > +	 * We are not interested in those, so make sure we only decrement
> > +	 * "valid" stacks.
> > +	 */
> 
> Comment indent is wrong.

Will fix it.

Thanks for taking the time to review the code Marco!
Andrey Konovalov Sept. 5, 2022, 8:53 p.m. UTC | #7
On Thu, Sep 1, 2022 at 11:18 AM Marco Elver <elver@google.com> wrote:
>
> On Thu, 1 Sept 2022 at 10:38, Michal Hocko <mhocko@suse.com> wrote:
> >
> > On Thu 01-09-22 10:24:58, Marco Elver wrote:
> > > On Thu, Sep 01, 2022 at 06:42AM +0200, Oscar Salvador wrote:
> > [...]
> > > > diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> > > > index 5ca0d086ef4a..aeb59d3557e2 100644
> > > > --- a/lib/stackdepot.c
> > > > +++ b/lib/stackdepot.c
> > > > @@ -63,6 +63,7 @@ struct stack_record {
> > > >     u32 hash;                       /* Hash in the hastable */
> > > >     u32 size;                       /* Number of frames in the stack */
> > > >     union handle_parts handle;
> > > > +   refcount_t count;               /* Number of the same repeated stacks */
> > >
> > > This will increase stack_record size for every user, even if they don't
> > > care about the count.
> >
> > Couldn't this be used for garbage collection?
>
> Only if we can precisely figure out at which point a stack is no
> longer going to be needed.
>
> But more realistically, stack depot was designed to be simple. Right
> now it can allocate new stacks (from an internal pool), but giving the
> memory back to that pool isn't supported. Doing garbage collection
> would effectively be a redesign of stack depot. And for the purpose
> for which stack depot was designed (debugging tools), memory has never
> been an issue (note that stack depot also has a fixed upper bound on
> memory usage).
>
> We had talked (in the context of KASAN) about bounded stack storage,
> but the preferred solution is usually a cache-based design which
> allows evictions (in the simplest case a ring buffer), because
> figuring out (and relying on) where precisely a stack will
> definitively no longer be required in bug reports is complex and does
> not guarantee the required bound on memory usage. Andrey has done the
> work on this for tag-based KASAN modes:
> https://lore.kernel.org/all/cover.1658189199.git.andreyknvl@google.com/

To be clear, the stack ring buffer implementation for the KASAN
tag-based modes still uses the stack depot as a back end to store
stack traces.

I plan to explore redesigning the stack depot implementation to allow
evicting unneeded stack traces as the next step. (The goal is to have
a memory-bounded stack depot that doesn't just stop collecting stack
traces once the memory limit is reached.) Having a refcount for each
saved stack trace will likely be a part of this redesign.
diff mbox series

Patch

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index bc2797955de9..5ee0cf5be88f 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -15,9 +15,16 @@ 
 
 typedef u32 depot_stack_handle_t;
 
+typedef enum stack_action {
+	STACK_ACTION_NONE,
+	STACK_ACTION_INC,
+}stack_action_t;
+
 depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 					unsigned int nr_entries,
-					gfp_t gfp_flags, bool can_alloc);
+					gfp_t gfp_flags, bool can_alloc,
+					stack_action_t action);
+void stack_depot_dec_count(depot_stack_handle_t handle);
 
 /*
  * Every user of stack depot has to call stack_depot_init() during its own init
@@ -55,6 +62,10 @@  static inline int stack_depot_early_init(void)	{ return 0; }
 
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
 				      unsigned int nr_entries, gfp_t gfp_flags);
+depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
+					     unsigned int nr_entries,
+					     gfp_t gfp_flags,
+					     stack_action_t action);
 
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 5ca0d086ef4a..aeb59d3557e2 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -63,6 +63,7 @@  struct stack_record {
 	u32 hash;			/* Hash in the hastable */
 	u32 size;			/* Number of frames in the stack */
 	union handle_parts handle;
+	refcount_t count;		/* Number of the same repeated stacks */
 	unsigned long entries[];	/* Variable-sized array of entries. */
 };
 
@@ -139,6 +140,7 @@  depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
 	stack->handle.slabindex = depot_index;
 	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
 	stack->handle.valid = 1;
+	refcount_set(&stack->count, 1);
 	memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
 	depot_offset += required_size;
 
@@ -302,6 +304,29 @@  void stack_depot_print(depot_stack_handle_t stack)
 }
 EXPORT_SYMBOL_GPL(stack_depot_print);
 
+static struct stack_record *stack_depot_getstack(depot_stack_handle_t handle)
+{
+	union handle_parts parts = { .handle = handle };
+	void *slab;
+	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+	struct stack_record *stack;
+
+	if(!handle)
+		return NULL;
+
+	if (parts.slabindex > depot_index) {
+		WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
+		     parts.slabindex, depot_index, handle);
+		return NULL;
+	}
+	slab = stack_slabs[parts.slabindex];
+	if (!slab)
+		return NULL;
+
+	stack = slab + offset;
+	return stack;
+}
+
 /**
  * stack_depot_fetch - Fetch stack entries from a depot
  *
@@ -314,30 +339,42 @@  EXPORT_SYMBOL_GPL(stack_depot_print);
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries)
 {
-	union handle_parts parts = { .handle = handle };
-	void *slab;
-	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
 	struct stack_record *stack;
 
 	*entries = NULL;
 	if (!handle)
 		return 0;
 
-	if (parts.slabindex > depot_index) {
-		WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
-			parts.slabindex, depot_index, handle);
-		return 0;
-	}
-	slab = stack_slabs[parts.slabindex];
-	if (!slab)
+	stack = stack_depot_getstack(handle);
+	if (!stack)
 		return 0;
-	stack = slab + offset;
 
 	*entries = stack->entries;
 	return stack->size;
 }
 EXPORT_SYMBOL_GPL(stack_depot_fetch);
 
+static void stack_depot_inc_count(struct stack_record *stack)
+{
+	refcount_inc(&stack->count);
+}
+
+void stack_depot_dec_count(depot_stack_handle_t handle)
+{
+	struct stack_record *stack = NULL;
+
+	stack = stack_depot_getstack(handle);
+	if (stack) {
+	/*
+	 * page_owner creates some stacks via create_dummy_stack().
+	 * We are not interested in those, so make sure we only decrement
+	 * "valid" stacks.
+	 */
+		if (refcount_read(&stack->count) > 1)
+			refcount_dec(&stack->count);
+	}
+}
+
 /**
  * __stack_depot_save - Save a stack trace from an array
  *
@@ -363,7 +400,8 @@  EXPORT_SYMBOL_GPL(stack_depot_fetch);
  */
 depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 					unsigned int nr_entries,
-					gfp_t alloc_flags, bool can_alloc)
+					gfp_t alloc_flags, bool can_alloc,
+					stack_action_t action)
 {
 	struct stack_record *found = NULL, **bucket;
 	depot_stack_handle_t retval = 0;
@@ -449,8 +487,11 @@  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 		/* Nobody used this memory, ok to free it. */
 		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
 	}
-	if (found)
+	if (found) {
 		retval = found->handle.handle;
+		if (action == STACK_ACTION_INC)
+			stack_depot_inc_count(found);
+	}
 fast_exit:
 	return retval;
 }
@@ -472,6 +513,16 @@  depot_stack_handle_t stack_depot_save(unsigned long *entries,
 				      unsigned int nr_entries,
 				      gfp_t alloc_flags)
 {
-	return __stack_depot_save(entries, nr_entries, alloc_flags, true);
+	return __stack_depot_save(entries, nr_entries, alloc_flags, true,
+				  STACK_ACTION_NONE);
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
+
+depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
+					     unsigned int nr_entries,
+					     gfp_t alloc_flags,
+					     stack_action_t action)
+{
+	return __stack_depot_save(entries, nr_entries, alloc_flags, true, action);
+}
+EXPORT_SYMBOL_GPL(stack_depot_save_action);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index c40c0e7b3b5f..f434994f3b0d 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,7 +36,8 @@  depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc)
 	unsigned int nr_entries;
 
 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
-	return __stack_depot_save(entries, nr_entries, flags, can_alloc);
+	return __stack_depot_save(entries, nr_entries, flags, can_alloc,
+				  STACK_ACTION_NONE);
 }
 
 void kasan_set_track(struct kasan_track *track, gfp_t flags)
diff --git a/mm/page_owner.c b/mm/page_owner.c
index e4c6f3f1695b..794f346d7520 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -106,7 +106,7 @@  static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
 	return (void *)page_ext + page_owner_ops.offset;
 }
 
-static noinline depot_stack_handle_t save_stack(gfp_t flags)
+static noinline depot_stack_handle_t save_stack(gfp_t flags, stack_action_t action)
 {
 	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	depot_stack_handle_t handle;
@@ -125,7 +125,7 @@  static noinline depot_stack_handle_t save_stack(gfp_t flags)
 	current->in_page_owner = 1;
 
 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
-	handle = stack_depot_save(entries, nr_entries, flags);
+	handle = stack_depot_save_action(entries, nr_entries, flags, action);
 	if (!handle)
 		handle = failure_handle;
 
@@ -138,6 +138,7 @@  void __reset_page_owner(struct page *page, unsigned short order)
 	int i;
 	struct page_ext *page_ext;
 	depot_stack_handle_t handle;
+	depot_stack_handle_t alloc_handle;
 	struct page_owner *page_owner;
 	u64 free_ts_nsec = local_clock();
 
@@ -145,7 +146,10 @@  void __reset_page_owner(struct page *page, unsigned short order)
 	if (unlikely(!page_ext))
 		return;
 
-	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+	page_owner = get_page_owner(page_ext);
+	alloc_handle = page_owner->handle;
+
+	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN, STACK_ACTION_NONE);
 	for (i = 0; i < (1 << order); i++) {
 		__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
 		page_owner = get_page_owner(page_ext);
@@ -153,6 +157,7 @@  void __reset_page_owner(struct page *page, unsigned short order)
 		page_owner->free_ts_nsec = free_ts_nsec;
 		page_ext = page_ext_next(page_ext);
 	}
+	stack_depot_dec_count(alloc_handle);
 }
 
 static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -189,7 +194,7 @@  noinline void __set_page_owner(struct page *page, unsigned short order,
 	if (unlikely(!page_ext))
 		return;
 
-	handle = save_stack(gfp_mask);
+	handle = save_stack(gfp_mask, STACK_ACTION_INC);
 	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
 }