diff mbox series

[v2,2/3] mm, page_owner: Add page_owner_stacks file to print out only stacks and their counter

Message ID 20220905031012.4450-3-osalvador@suse.de (mailing list archive)
State New
Headers show
Series page_owner: print stacks and their counter | expand

Commit Message

Oscar Salvador Sept. 5, 2022, 3:10 a.m. UTC
We might be only interested in knowing about stacks <-> count
relationship, so instead of having to fiddle with page_owner
output and screen through pfns, let us add a new file called
'page_owner_stacks' that does just that.
By cating such file, we will get all the stacktraces followed by
its counter, so we can have a more global view.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 include/linux/stackdepot.h |  1 +
 lib/stackdepot.c           | 40 ++++++++++++++++++++++++++++++++++++++
 mm/page_owner.c            | 25 ++++++++++++++++++++++++
 3 files changed, 66 insertions(+)

Comments

Marco Elver Sept. 5, 2022, 12:57 p.m. UTC | #1
On Mon, Sep 05, 2022 at 05:10AM +0200, Oscar Salvador wrote:
[...]
> +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)

Can you add kernel-doc comment what this does (and also update
accordingly in 3/3 when you add 'threshold').

From what I see it prints *all* stacks that have a non-zero count.
Correct?

If so, should this be called stack_depot_print_all_count() (having
stack(s) in the name twice doesn't make it more obvious what it does)?
Then in the follow-up patch you add the 'threshold' arg.

> +{
> +	int i = *pos, ret = 0;
> +	struct stack_record **stacks, *stack;
> +	static struct stack_record *last = NULL;
> +	unsigned long stack_table_entries = stack_hash_mask + 1;
> +
> +	/* Continue from the last stack if we have one */
> +	if (last) {
> +		stack = last->next;

This is dead code?

> +	} else {
> +new_table:
> +		stacks = &stack_table[i];
> +		stack = (struct stack_record *)stacks;
> +	}
> +
> +	for (; stack; stack = stack->next) {
> +		if (!stack->size || stack->size < 0 ||
> +		    stack->size > size || stack->handle.valid != 1 ||
> +		    refcount_read(&stack->count) < 1)
> +			continue;
> +
> +		ret += stack_trace_snprint(buf, size, stack->entries, stack->size, 0);
> +		ret += scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
> +				 refcount_read(&stack->count));
> +		last = stack;
> +		return ret;
> +	}
> +
> +	i++;
> +	*pos = i;
> +	last = NULL;
> +
> +	/* Keep looking all tables for valid stacks */
> +	if (i < stack_table_entries)
> +		goto new_table;
> +
> +	return 0;
> +}

Either I'm missing something really obvious, but I was able to simplify
the above function to just this (untested!):

	int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
	{
		const unsigned long stack_table_entries = stack_hash_mask + 1;

		/* Iterate over all tables for valid stacks. */
		for (; *pos < stack_table_entries; (*pos)++) {
			for (struct stack_record *stack = stack_table[*pos]; stack; stack = stack->next) {
				if (!stack->size || stack->size < 0 || stack->size > size ||
				    stack->handle.valid != 1 || refcount_read(&stack->count) < 1)
					continue;

				return stack_trace_snprint(buf, size, stack->entries, stack->size, 0) +
				       scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
						 refcount_read(&stack->count));
			}
		}

		return 0;
	}

> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index 8730f377fa91..d88e6b4aefa0 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -664,6 +664,29 @@ static void init_early_allocated_pages(void)
>  		init_zones_in_node(pgdat);
>  }
>  
> +static ssize_t read_page_owner_stacks(struct file *file, char __user *buf,
> +				      size_t count, loff_t *pos)
> +{
> +	char *kbuf;
> +	int ret = 0;
> +
> +	count = min_t(size_t, count, PAGE_SIZE);
> +	kbuf = kmalloc(count, GFP_KERNEL);
> +	if (!kbuf)
> +		return -ENOMEM;
> +
> +	ret += stack_depot_print_stacks_threshold(kbuf, count, pos);

If I understood right, this will print *all* stacks that have non-zero
count, and this isn't related to page_owner per-se. Correct?

This might not be a problem right now, but once there might be more
users that want to count stack usage, you'll end up with page_owner +
other stacks here.
Marco Elver Sept. 5, 2022, 1 p.m. UTC | #2
On Mon, Sep 05, 2022 at 02:57PM +0200, Marco Elver wrote:
[...]
> > +{
> > +	int i = *pos, ret = 0;
> > +	struct stack_record **stacks, *stack;
> > +	static struct stack_record *last = NULL;
> > +	unsigned long stack_table_entries = stack_hash_mask + 1;
> > +
> > +	/* Continue from the last stack if we have one */
> > +	if (last) {
> > +		stack = last->next;
> 
> This is dead code?

Oof, I just noticed that 'last' is static. Please avoid that, because
it'll make this interface really tricky to use safely. I still don't
quite understand why it needs to do this, and a kernel-doc comment would
make this clearer.
kernel test robot Sept. 5, 2022, 10:20 p.m. UTC | #3
Hi Oscar,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.0-rc4]
[cannot apply to akpm-mm/mm-everything next-20220901]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Oscar-Salvador/page_owner-print-stacks-and-their-counter/20220905-111139
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 7e18e42e4b280c85b76967a9106a13ca61c16179
config: microblaze-randconfig-m041-20220905 (https://download.01.org/0day-ci/archive/20220906/202209060655.H9CK1KgC-lkp@intel.com/config)
compiler: microblaze-linux-gcc (GCC) 12.1.0

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

smatch warnings:
lib/stackdepot.c:586 stack_depot_print_stacks_threshold() warn: unsigned 'stack->size' is never less than zero.

vim +586 lib/stackdepot.c

   568	
   569	int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
   570	{
   571		int i = *pos, ret = 0;
   572		struct stack_record **stacks, *stack;
   573		static struct stack_record *last = NULL;
   574		unsigned long stack_table_entries = stack_hash_mask + 1;
   575	
   576		/* Continue from the last stack if we have one */
   577		if (last) {
   578			stack = last->next;
   579		} else {
   580	new_table:
   581			stacks = &stack_table[i];
   582			stack = (struct stack_record *)stacks;
   583		}
   584	
   585		for (; stack; stack = stack->next) {
 > 586			if (!stack->size || stack->size < 0 ||
Oscar Salvador Sept. 6, 2022, 7:43 a.m. UTC | #4
On Mon, Sep 05, 2022 at 02:57:50PM +0200, Marco Elver wrote:
> On Mon, Sep 05, 2022 at 05:10AM +0200, Oscar Salvador wrote:
> [...]
> > +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
> 
> Can you add kernel-doc comment what this does (and also update
> accordingly in 3/3 when you add 'threshold').

Yes, I guess a kernel-doc comment is due.

> From what I see it prints *all* stacks that have a non-zero count.
> Correct?

That's right.

> If so, should this be called stack_depot_print_all_count() (having
> stack(s) in the name twice doesn't make it more obvious what it does)?
> Then in the follow-up patch you add the 'threshold' arg.

I guess so. The only reason I went with the actual name is that for me
"stack_depot" was kinda the name of the module/library, and
so I wanted to make crystal clear what were we printing.

But I'm ok with renaming it if it's already self-explanatory

> > +{
> > +	int i = *pos, ret = 0;
> > +	struct stack_record **stacks, *stack;
> > +	static struct stack_record *last = NULL;
> > +	unsigned long stack_table_entries = stack_hash_mask + 1;
> > +
> > +	/* Continue from the last stack if we have one */
> > +	if (last) {
> > +		stack = last->next;
> 
> This is dead code?

No, more below.

> Either I'm missing something really obvious, but I was able to simplify
> the above function to just this (untested!):
> 
> 	int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
> 	{
> 		const unsigned long stack_table_entries = stack_hash_mask + 1;
> 
> 		/* Iterate over all tables for valid stacks. */
> 		for (; *pos < stack_table_entries; (*pos)++) {
> 			for (struct stack_record *stack = stack_table[*pos]; stack; stack = stack->next) {
> 				if (!stack->size || stack->size < 0 || stack->size > size ||
> 				    stack->handle.valid != 1 || refcount_read(&stack->count) < 1)
> 					continue;
> 
> 				return stack_trace_snprint(buf, size, stack->entries, stack->size, 0) +
> 				       scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
> 						 refcount_read(&stack->count));
> 			}
> 		}
> 
> 		return 0;

Yes, this will not work.

You have stack_table[] which is an array for struct stacks, and each struct
stack has a pointer to its next stack which walks from the beginning fo a specific
table till the end. e.g:

stack_table[0] = {stack1, stack2, stack3, ...} (each linked by ->next)
stack_table[1] = {stack1, stack2, stack3, ...} (each linked by ->next)
..
stack_table[stack_table_entries - 1] = {stack1, stack2, stack3, ...} (each linked by ->next)

*pos holds the index of stack_table[], while "last" holds the last stack within
the table we were processing.

So, when we find a valid stack to print, set "last" to that stack, and *pos to the index
of stack_table.
So, when we call stack_depot_print_stacks_threshold() again, we set "stack" to "last"->next,
and we are ready to keep looking with:

 for (; stack; stack = stack->next) {
    ...
    check if stack is valid
 }

Should not we find any more valid stacks in that stack_table, we need to check in
the next table, so we do::

    i++; (note that i was set to *pos at the beginning of the function)
	*pos = i;
	last = NULL; 
    goto new_table

and now are ready to do:

new_table:
		stacks = &stack_table[i];
		stack = (struct stack_record *)stacks;


Does this clarify it a little bit?

About using static vs non-static.
In the v1, I was using a parameter which contained last_stack:

https://patchwork.kernel.org/project/linux-mm/patch/20220901044249.4624-3-osalvador@suse.de/

Not sure if that's better? Thoughts?
Marco Elver Sept. 6, 2022, 8:35 a.m. UTC | #5
On Tue, 6 Sept 2022 at 09:44, Oscar Salvador <osalvador@suse.de> wrote:
>
> On Mon, Sep 05, 2022 at 02:57:50PM +0200, Marco Elver wrote:
> > On Mon, Sep 05, 2022 at 05:10AM +0200, Oscar Salvador wrote:
> > [...]
> > > +int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
> >
> > Can you add kernel-doc comment what this does (and also update
> > accordingly in 3/3 when you add 'threshold').
>
> Yes, I guess a kernel-doc comment is due.
>
> > From what I see it prints *all* stacks that have a non-zero count.
> > Correct?
>
> That's right.
>
> > If so, should this be called stack_depot_print_all_count() (having
> > stack(s) in the name twice doesn't make it more obvious what it does)?
> > Then in the follow-up patch you add the 'threshold' arg.
>
> I guess so. The only reason I went with the actual name is that for me
> "stack_depot" was kinda the name of the module/library, and
> so I wanted to make crystal clear what were we printing.
>
> But I'm ok with renaming it if it's already self-explanatory

I think it's clear from the fact we're using the stack depot that any
printing will print stacks. To mirror the existing
'stack_depot_print()', I'd go with 'stack_depot_print_all_count()'.


> > > +{
> > > +   int i = *pos, ret = 0;
> > > +   struct stack_record **stacks, *stack;
> > > +   static struct stack_record *last = NULL;
> > > +   unsigned long stack_table_entries = stack_hash_mask + 1;
> > > +
> > > +   /* Continue from the last stack if we have one */
> > > +   if (last) {
> > > +           stack = last->next;
> >
> > This is dead code?
>
> No, more below.
>
> > Either I'm missing something really obvious, but I was able to simplify
> > the above function to just this (untested!):
> >
> >       int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
> >       {
> >               const unsigned long stack_table_entries = stack_hash_mask + 1;
> >
> >               /* Iterate over all tables for valid stacks. */
> >               for (; *pos < stack_table_entries; (*pos)++) {
> >                       for (struct stack_record *stack = stack_table[*pos]; stack; stack = stack->next) {
> >                               if (!stack->size || stack->size < 0 || stack->size > size ||
> >                                   stack->handle.valid != 1 || refcount_read(&stack->count) < 1)
> >                                       continue;
> >
> >                               return stack_trace_snprint(buf, size, stack->entries, stack->size, 0) +
> >                                      scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
> >                                                refcount_read(&stack->count));
> >                       }
> >               }
> >
> >               return 0;
>
> Yes, this will not work.
>
> You have stack_table[] which is an array for struct stacks, and each struct
> stack has a pointer to its next stack which walks from the beginning fo a specific
> table till the end. e.g:
>
> stack_table[0] = {stack1, stack2, stack3, ...} (each linked by ->next)
> stack_table[1] = {stack1, stack2, stack3, ...} (each linked by ->next)
> ..
> stack_table[stack_table_entries - 1] = {stack1, stack2, stack3, ...} (each linked by ->next)
>
> *pos holds the index of stack_table[], while "last" holds the last stack within
> the table we were processing.
>
> So, when we find a valid stack to print, set "last" to that stack, and *pos to the index
> of stack_table.
> So, when we call stack_depot_print_stacks_threshold() again, we set "stack" to "last"->next,
> and we are ready to keep looking with:
>
>  for (; stack; stack = stack->next) {
>     ...
>     check if stack is valid
>  }
>
> Should not we find any more valid stacks in that stack_table, we need to check in
> the next table, so we do::
>
>     i++; (note that i was set to *pos at the beginning of the function)
>         *pos = i;
>         last = NULL;
>     goto new_table
>
> and now are ready to do:
>
> new_table:
>                 stacks = &stack_table[i];
>                 stack = (struct stack_record *)stacks;
>
>
> Does this clarify it a little bit?
>
> About using static vs non-static.
> In the v1, I was using a parameter which contained last_stack:
>
> https://patchwork.kernel.org/project/linux-mm/patch/20220901044249.4624-3-osalvador@suse.de/
>
> Not sure if that's better? Thoughts?

Moderately better, but still not great. Essentially you need 2
cursors, but with loff_t you only get 1.

I think the loff_t parameter can be used to encode both cursors. In
the kernel, loff_t is always 'long long', so it'll always be 64-bit.

Let's assume that collisions in the hash table are rare, so the number
of stacks per bucket are typically small. Then you can encode the
index into the bucket in bits 0-31 and the bucket index in bits 32-63.
STACK_HASH_ORDER_MAX is 20, so 32 bits is plenty to encode the index.
Oscar Salvador Sept. 7, 2022, 4 a.m. UTC | #6
On Tue, Sep 06, 2022 at 10:35:00AM +0200, Marco Elver wrote:
> I think it's clear from the fact we're using the stack depot that any
> printing will print stacks. To mirror the existing
> 'stack_depot_print()', I'd go with 'stack_depot_print_all_count()'.

Fair enough, I will rename it then.
 
> Moderately better, but still not great. Essentially you need 2
> cursors, but with loff_t you only get 1.
> 
> I think the loff_t parameter can be used to encode both cursors. In
> the kernel, loff_t is always 'long long', so it'll always be 64-bit.
> 
> Let's assume that collisions in the hash table are rare, so the number
> of stacks per bucket are typically small. Then you can encode the
> index into the bucket in bits 0-31 and the bucket index in bits 32-63.
> STACK_HASH_ORDER_MAX is 20, so 32 bits is plenty to encode the index.

I see, I didn't think of it to be honest.

Then, the below (completely untested) should the trick:

<----
 int stack_depot_print_all_count(char *buf, size_t size, loff_t *pos)
 {
         int ret = 0, stack_i, table_i;
         struct stack_record **stacks, *stack;
         unsigned long stack_table_entries = stack_hash_mask + 1;
 
         stack_i = (*pos & 31);
         table_i = (*pos >> 32);
 new_table:
         stacks = &stack_table[table_i];
         stack = ((struct stack_record *)stacks) + stack_i;
 
         for (; stack; stack = stack->next, stack_i++) {
                 if (!stack->size || stack->size < 0 ||
                     stack->size > size || stack->handle.valid != 1 ||
                     refcount_read(&stack->count) < 1)
                         continue;
 
                 ret += stack_trace_snprint(buf, size, stack->entries, stack->size, 0);
                 ret += scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
                                  refcount_read(&stack->count));
                 *pos |= stack_i;
                 *pos |= ((long long)table_i << 32);
                 return ret;
         }
 
         table_i++;
         /* Keep looking all tables for valid stacks */
         if (table_i < stack_table_entries)
                 goto new_table;
 
         return 0;
 }
---->

I will give it a go.

Thanks Marco!
Marco Elver Sept. 7, 2022, 7:14 a.m. UTC | #7
On Wed, 7 Sept 2022 at 06:00, Oscar Salvador <osalvador@suse.de> wrote:
>
> On Tue, Sep 06, 2022 at 10:35:00AM +0200, Marco Elver wrote:
> > I think it's clear from the fact we're using the stack depot that any
> > printing will print stacks. To mirror the existing
> > 'stack_depot_print()', I'd go with 'stack_depot_print_all_count()'.
>
> Fair enough, I will rename it then.
>
> > Moderately better, but still not great. Essentially you need 2
> > cursors, but with loff_t you only get 1.
> >
> > I think the loff_t parameter can be used to encode both cursors. In
> > the kernel, loff_t is always 'long long', so it'll always be 64-bit.
> >
> > Let's assume that collisions in the hash table are rare, so the number
> > of stacks per bucket are typically small. Then you can encode the
> > index into the bucket in bits 0-31 and the bucket index in bits 32-63.
> > STACK_HASH_ORDER_MAX is 20, so 32 bits is plenty to encode the index.
>
> I see, I didn't think of it to be honest.
>
> Then, the below (completely untested) should the trick:
>
> <----
>  int stack_depot_print_all_count(char *buf, size_t size, loff_t *pos)
>  {
>          int ret = 0, stack_i, table_i;
>          struct stack_record **stacks, *stack;
>          unsigned long stack_table_entries = stack_hash_mask + 1;
>
>          stack_i = (*pos & 31);
>          table_i = (*pos >> 32);
>  new_table:
>          stacks = &stack_table[table_i];
>          stack = ((struct stack_record *)stacks) + stack_i;

Why are you casting a stack_record** to a stack_record*? stack_table
is already appropriately typed, and there should be no need to cast
things around.

'stacks' is supposed to be the bucket? In which case you need to
dereference it to get the first entry in the bucket: bucket =
stack_table[table_i];

stack_i cannot be used to index into the bucket, because the elements
in it are a linked list and not necessarily adjacent in memory. You
have to traverse the linked list stack_i elements to get to the start:

  for (int i = 0; stack && i < stack_i; stack = stack->next, ++i);

then you can proceed with the below code.

>          for (; stack; stack = stack->next, stack_i++) {
>                  if (!stack->size || stack->size < 0 ||
>                      stack->size > size || stack->handle.valid != 1 ||
>                      refcount_read(&stack->count) < 1)
>                          continue;
>
>                  ret += stack_trace_snprint(buf, size, stack->entries, stack->size, 0);
>                  ret += scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
>                                   refcount_read(&stack->count));
>                  *pos |= stack_i;
>                  *pos |= ((long long)table_i << 32);
>                  return ret;
>          }
>
>          table_i++;
>          /* Keep looking all tables for valid stacks */
>          if (table_i < stack_table_entries)
>                  goto new_table;

While you're at it, could you try to come up with a version that
avoids the goto?

>          return 0;
>  }
> ---->
>
> I will give it a go.
>
> Thanks Marco!
>
>
> --
> Oscar Salvador
> SUSE Labs
Oscar Salvador Sept. 8, 2022, 3:32 a.m. UTC | #8
On Wed, Sep 07, 2022 at 09:14:35AM +0200, Marco Elver wrote:
> Why are you casting a stack_record** to a stack_record*? stack_table
> is already appropriately typed, and there should be no need to cast
> things around.
> 
> 'stacks' is supposed to be the bucket? In which case you need to
> dereference it to get the first entry in the bucket: bucket =
> stack_table[table_i];
> 
> stack_i cannot be used to index into the bucket, because the elements
> in it are a linked list and not necessarily adjacent in memory. You
> have to traverse the linked list stack_i elements to get to the start:

Yea, I figured that much after thinking about more, but I was overly
eager.

>   for (int i = 0; stack && i < stack_i; stack = stack->next, ++i);

But this seems suboptimal.
With this code, we have to walk the list till we find the right index
every time we enter the function, while the actual code of v2
or even the patch from v1 [1], we do not really need to do that
because we already have the pointer to the stack.

So I much rather prefer that, than having to traverse the stacks
till the find the right one.
Marco Elver Sept. 8, 2022, 5:31 a.m. UTC | #9
On Thu, 8 Sept 2022 at 05:32, Oscar Salvador <osalvador@suse.de> wrote:
>
> On Wed, Sep 07, 2022 at 09:14:35AM +0200, Marco Elver wrote:
> > Why are you casting a stack_record** to a stack_record*? stack_table
> > is already appropriately typed, and there should be no need to cast
> > things around.
> >
> > 'stacks' is supposed to be the bucket? In which case you need to
> > dereference it to get the first entry in the bucket: bucket =
> > stack_table[table_i];
> >
> > stack_i cannot be used to index into the bucket, because the elements
> > in it are a linked list and not necessarily adjacent in memory. You
> > have to traverse the linked list stack_i elements to get to the start:
>
> Yea, I figured that much after thinking about more, but I was overly
> eager.
>
> >   for (int i = 0; stack && i < stack_i; stack = stack->next, ++i);
>
> But this seems suboptimal.
> With this code, we have to walk the list till we find the right index
> every time we enter the function, while the actual code of v2
> or even the patch from v1 [1], we do not really need to do that
> because we already have the pointer to the stack.
>
> So I much rather prefer that, than having to traverse the stacks
> till the find the right one.

I would not prematurely optimize this. It's a hash map, and the only
problem is if there are tons of collisions. Also, this function isn't
performance critical, it's only used for printing, which itself is
slow.

I suggest you collect some stats how many entries each bucket has on
average. If the average is <10, I'd go with the cleaner interface.
diff mbox series

Patch

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 4e3a88f135ee..19d3f8295df8 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -25,6 +25,7 @@  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 					gfp_t gfp_flags, bool can_alloc,
 					enum stack_depot_action action);
 void stack_depot_dec_count(depot_stack_handle_t handle);
+int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos);
 
 /*
  * Every user of stack depot has to call stack_depot_init() during its own init
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index a806ef58a385..a198b2dbe3fb 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -565,3 +565,43 @@  depot_stack_handle_t stack_depot_save_action(unsigned long *entries,
 	return __stack_depot_save(entries, nr_entries, alloc_flags, true, action);
 }
 EXPORT_SYMBOL_GPL(stack_depot_save_action);
+
+int stack_depot_print_stacks_threshold(char *buf, size_t size, loff_t *pos)
+{
+	int i = *pos, ret = 0;
+	struct stack_record **stacks, *stack;
+	static struct stack_record *last = NULL;
+	unsigned long stack_table_entries = stack_hash_mask + 1;
+
+	/* Continue from the last stack if we have one */
+	if (last) {
+		stack = last->next;
+	} else {
+new_table:
+		stacks = &stack_table[i];
+		stack = (struct stack_record *)stacks;
+	}
+
+	for (; stack; stack = stack->next) {
+		if (!stack->size || stack->size < 0 ||
+		    stack->size > size || stack->handle.valid != 1 ||
+		    refcount_read(&stack->count) < 1)
+			continue;
+
+		ret += stack_trace_snprint(buf, size, stack->entries, stack->size, 0);
+		ret += scnprintf(buf + ret, size - ret, "stack count: %d\n\n",
+				 refcount_read(&stack->count));
+		last = stack;
+		return ret;
+	}
+
+	i++;
+	*pos = i;
+	last = NULL;
+
+	/* Keep looking all tables for valid stacks */
+	if (i < stack_table_entries)
+		goto new_table;
+
+	return 0;
+}
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 8730f377fa91..d88e6b4aefa0 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -664,6 +664,29 @@  static void init_early_allocated_pages(void)
 		init_zones_in_node(pgdat);
 }
 
+static ssize_t read_page_owner_stacks(struct file *file, char __user *buf,
+				      size_t count, loff_t *pos)
+{
+	char *kbuf;
+	int ret = 0;
+
+	count = min_t(size_t, count, PAGE_SIZE);
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	ret += stack_depot_print_stacks_threshold(kbuf, count, pos);
+	if (copy_to_user(buf, kbuf, ret))
+		ret = -EFAULT;
+
+	kfree(kbuf);
+	return ret;
+}
+
+static const struct file_operations proc_page_owner_stacks = {
+	.read = read_page_owner_stacks,
+};
+
 static const struct file_operations proc_page_owner_operations = {
 	.read		= read_page_owner,
 };
@@ -677,6 +700,8 @@  static int __init pageowner_init(void)
 
 	debugfs_create_file("page_owner", 0400, NULL, NULL,
 			    &proc_page_owner_operations);
+	debugfs_create_file("page_owner_stacks", 0400, NULL, NULL,
+			    &proc_page_owner_stacks);
 
 	return 0;
 }