diff mbox series

vmstat: Keep count of the maximum page reached by the kernel stack

Message ID 20240313033417.447216-1-pasha.tatashin@soleen.com (mailing list archive)
State New
Headers show
Series vmstat: Keep count of the maximum page reached by the kernel stack | expand

Commit Message

Pasha Tatashin March 13, 2024, 3:34 a.m. UTC
CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount
of memory that was left in stack. Every time the new anti-record is
reached a message is printed to the console.

However, this is not useful to know how much each page within stack was
actually used. Provide a mechanism to count the number of time each
stack page was reached throughout the live of the stack:

	$ grep kstack /proc/vmstat
	kstack_page_1 19974
	kstack_page_2 94
	kstack_page_3 0
	kstack_page_4 0

In the above example only out of ~20K threads that ever exited on that
machine only 94 touched second page of the stack, and none touched
pages three and four.

Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
 include/linux/sched/task_stack.h | 39 ++++++++++++++++++++++++++++++--
 include/linux/vm_event_item.h    | 29 ++++++++++++++++++++++++
 include/linux/vmstat.h           | 16 -------------
 mm/vmstat.c                      | 11 +++++++++
 4 files changed, 77 insertions(+), 18 deletions(-)

Comments

Pasha Tatashin March 13, 2024, 3:39 a.m. UTC | #1
On Tue, Mar 12, 2024 at 11:34 PM Pasha Tatashin
<pasha.tatashin@soleen.com> wrote:
>
> CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount
> of memory that was left in stack. Every time the new anti-record is
> reached a message is printed to the console.
>
> However, this is not useful to know how much each page within stack was
> actually used. Provide a mechanism to count the number of time each
> stack page was reached throughout the live of the stack:
>
>         $ grep kstack /proc/vmstat
>         kstack_page_1 19974
>         kstack_page_2 94
>         kstack_page_3 0
>         kstack_page_4 0
>
> In the above example only out of ~20K threads that ever exited on that
> machine only 94 touched second page of the stack, and none touched
> pages three and four.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
>  include/linux/sched/task_stack.h | 39 ++++++++++++++++++++++++++++++--
>  include/linux/vm_event_item.h    | 29 ++++++++++++++++++++++++
>  include/linux/vmstat.h           | 16 -------------
>  mm/vmstat.c                      | 11 +++++++++
>  4 files changed, 77 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
> index ccd72b978e1f..7ff7f9997266 100644
> --- a/include/linux/sched/task_stack.h
> +++ b/include/linux/sched/task_stack.h
> @@ -95,9 +95,41 @@ static inline int object_is_on_stack(const void *obj)
>  extern void thread_stack_cache_init(void);
>
>  #ifdef CONFIG_DEBUG_STACK_USAGE
> +#ifdef CONFIG_VM_EVENT_COUNTERS
> +#include <linux/vm_event_item.h>
> +
> +/* Count the maximum pages reached in kernel stacks */
> +static inline void count_kstack_page(int stack_max_page)
> +{
> +       switch (stack_max_page) {
> +       case 1:
> +               this_cpu_inc(vm_event_states.event[KSTACK_PAGE_1]);
> +               break;
> +       case 2:
> +               this_cpu_inc(vm_event_states.event[KSTACK_PAGE_2]);
> +               break;
> +#if THREAD_SIZE >= (4 * PAGE_SIZE)
> +       case 3:
> +               this_cpu_inc(vm_event_states.event[KSTACK_PAGE_3]);
> +               break;
> +       case 4:
> +               this_cpu_inc(vm_event_states.event[KSTACK_PAGE_4]);
> +               break;
> +#endif
> +#if THREAD_SIZE > (4 * PAGE_SIZE)
> +       default:
> +               this_cpu_inc(vm_event_states.event[KSTACK_PAGE_5]);

It should:
this_cpu_inc(vm_event_states.event[KSTACK_PAGE_REST]);

Will fix it in the next version.

> +#endif
> +       }
> +}
> +#else /* !CONFIG_VM_EVENT_COUNTERS */
> +static inline void count_kstack_page(int stack_max_page) {}
> +#endif /* CONFIG_VM_EVENT_COUNTERS */
> +
>  static inline unsigned long stack_not_used(struct task_struct *p)
>  {
>         unsigned long *n = end_of_stack(p);
> +       unsigned long unused_stack;
>
>         do {    /* Skip over canary */
>  # ifdef CONFIG_STACK_GROWSUP
> @@ -108,10 +140,13 @@ static inline unsigned long stack_not_used(struct task_struct *p)
>         } while (!*n);
>
>  # ifdef CONFIG_STACK_GROWSUP
> -       return (unsigned long)end_of_stack(p) - (unsigned long)n;
> +       unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n;
>  # else
> -       return (unsigned long)n - (unsigned long)end_of_stack(p);
> +       unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p);
>  # endif
> +       count_kstack_page(((THREAD_SIZE - unused_stack) >> PAGE_SHIFT) + 1);
> +
> +       return unused_stack;
>  }
>  #endif
>  extern void set_task_stack_end_magic(struct task_struct *tsk);
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 747943bc8cc2..1dbfe47ff048 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -153,10 +153,39 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>                 VMA_LOCK_ABORT,
>                 VMA_LOCK_RETRY,
>                 VMA_LOCK_MISS,
> +#endif
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +               KSTACK_PAGE_1,
> +               KSTACK_PAGE_2,
> +#if THREAD_SIZE >= (4 * PAGE_SIZE)
> +               KSTACK_PAGE_3,
> +               KSTACK_PAGE_4,
> +#endif
> +#if THREAD_SIZE > (4 * PAGE_SIZE)
> +               KSTACK_PAGE_REST,
> +#endif
>  #endif
>                 NR_VM_EVENT_ITEMS
>  };
>
> +#ifdef CONFIG_VM_EVENT_COUNTERS
> +/*
> + * Light weight per cpu counter implementation.
> + *
> + * Counters should only be incremented and no critical kernel component
> + * should rely on the counter values.
> + *
> + * Counters are handled completely inline. On many platforms the code
> + * generated will simply be the increment of a global address.
> + */
> +
> +struct vm_event_state {
> +       unsigned long event[NR_VM_EVENT_ITEMS];
> +};
> +
> +DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
> +#endif
> +
>  #ifndef CONFIG_TRANSPARENT_HUGEPAGE
>  #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
>  #define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; })
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 343906a98d6e..18d4a97d3afd 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -41,22 +41,6 @@ enum writeback_stat_item {
>  };
>
>  #ifdef CONFIG_VM_EVENT_COUNTERS
> -/*
> - * Light weight per cpu counter implementation.
> - *
> - * Counters should only be incremented and no critical kernel component
> - * should rely on the counter values.
> - *
> - * Counters are handled completely inline. On many platforms the code
> - * generated will simply be the increment of a global address.
> - */
> -
> -struct vm_event_state {
> -       unsigned long event[NR_VM_EVENT_ITEMS];
> -};
> -
> -DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
> -
>  /*
>   * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
>   * local_irq_disable overhead.
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index db79935e4a54..737c85689251 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1413,6 +1413,17 @@ const char * const vmstat_text[] = {
>         "vma_lock_retry",
>         "vma_lock_miss",
>  #endif
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +       "kstack_page_1",
> +       "kstack_page_2",
> +#if THREAD_SIZE >= (4 * PAGE_SIZE)
> +       "kstack_page_3",
> +       "kstack_page_4",
> +#endif
> +#if THREAD_SIZE > (4 * PAGE_SIZE)
> +       "kstack_page_rest",
> +#endif
> +#endif
>  #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
>  };
>  #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
> --
> 2.44.0.278.ge034bb2e1d-goog
>
Christophe Leroy March 14, 2024, 8:19 a.m. UTC | #2
Le 13/03/2024 à 04:34, Pasha Tatashin a écrit :
> CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount
> of memory that was left in stack. Every time the new anti-record is
> reached a message is printed to the console.
> 
> However, this is not useful to know how much each page within stack was
> actually used. Provide a mechanism to count the number of time each
> stack page was reached throughout the live of the stack:

by "this is not useful to know ", you mean "this does not allow us to 
know" ?

> 
> 	$ grep kstack /proc/vmstat
> 	kstack_page_1 19974
> 	kstack_page_2 94
> 	kstack_page_3 0
> 	kstack_page_4 0

That's probably only usefull when THREAD_SIZE is larger than PAGE_SIZE.

On powerpc 8xx, THREAD_SIZE is 8k by default and PAGE_SIZE can be either 
4k or 16k.

Christophe
Pasha Tatashin March 14, 2024, 1:42 p.m. UTC | #3
On Thu, Mar 14, 2024 at 4:19 AM Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 13/03/2024 à 04:34, Pasha Tatashin a écrit :
> > CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount
> > of memory that was left in stack. Every time the new anti-record is
> > reached a message is printed to the console.
> >
> > However, this is not useful to know how much each page within stack was
> > actually used. Provide a mechanism to count the number of time each
> > stack page was reached throughout the live of the stack:
>
> by "this is not useful to know ", you mean "this does not allow us to
> know" ?

Yes, bad wording from my side, I will change it to you suggestion in
the next version.

>
> >
> >       $
> >       kstack_page_1 19974
> >       kstack_page_2 94
> >       kstack_page_3 0
> >       kstack_page_4 0
>
> That's probably only usefull when THREAD_SIZE is larger than PAGE_SIZE.

That is right, if THREAD_SIZE <= PAGE_SIZE, only "kstack_page_1" would
be filled.

>
> On powerpc 8xx, THREAD_SIZE is 8k by default and PAGE_SIZE can be either
> 4k or 16k.

With THREAD_SIZE == 8K, and  PAGE_SIZE = 4K
There will be  two counters in /proc/vmstat, something like this:

kstack_page_1 XXX
kstack_page_2 YYY

With THREAD_SIZE=16K, and PAGE_SIZE = 16K
There will be two counters, but one will always be zero:

kstack_page_1 XXX
kstack_page_2 0

Thanks,
Pasha
diff mbox series

Patch

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index ccd72b978e1f..7ff7f9997266 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -95,9 +95,41 @@  static inline int object_is_on_stack(const void *obj)
 extern void thread_stack_cache_init(void);
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
+#ifdef CONFIG_VM_EVENT_COUNTERS
+#include <linux/vm_event_item.h>
+
+/* Count the maximum pages reached in kernel stacks */
+static inline void count_kstack_page(int stack_max_page)
+{
+	switch (stack_max_page) {
+	case 1:
+		this_cpu_inc(vm_event_states.event[KSTACK_PAGE_1]);
+		break;
+	case 2:
+		this_cpu_inc(vm_event_states.event[KSTACK_PAGE_2]);
+		break;
+#if THREAD_SIZE >= (4 * PAGE_SIZE)
+	case 3:
+		this_cpu_inc(vm_event_states.event[KSTACK_PAGE_3]);
+		break;
+	case 4:
+		this_cpu_inc(vm_event_states.event[KSTACK_PAGE_4]);
+		break;
+#endif
+#if THREAD_SIZE > (4 * PAGE_SIZE)
+	default:
+		this_cpu_inc(vm_event_states.event[KSTACK_PAGE_5]);
+#endif
+	}
+}
+#else /* !CONFIG_VM_EVENT_COUNTERS */
+static inline void count_kstack_page(int stack_max_page) {}
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
 static inline unsigned long stack_not_used(struct task_struct *p)
 {
 	unsigned long *n = end_of_stack(p);
+	unsigned long unused_stack;
 
 	do { 	/* Skip over canary */
 # ifdef CONFIG_STACK_GROWSUP
@@ -108,10 +140,13 @@  static inline unsigned long stack_not_used(struct task_struct *p)
 	} while (!*n);
 
 # ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long)end_of_stack(p) - (unsigned long)n;
+	unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n;
 # else
-	return (unsigned long)n - (unsigned long)end_of_stack(p);
+	unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p);
 # endif
+	count_kstack_page(((THREAD_SIZE - unused_stack) >> PAGE_SHIFT) + 1);
+
+	return unused_stack;
 }
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 747943bc8cc2..1dbfe47ff048 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -153,10 +153,39 @@  enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		VMA_LOCK_ABORT,
 		VMA_LOCK_RETRY,
 		VMA_LOCK_MISS,
+#endif
+#ifdef CONFIG_DEBUG_STACK_USAGE
+		KSTACK_PAGE_1,
+		KSTACK_PAGE_2,
+#if THREAD_SIZE >= (4 * PAGE_SIZE)
+		KSTACK_PAGE_3,
+		KSTACK_PAGE_4,
+#endif
+#if THREAD_SIZE > (4 * PAGE_SIZE)
+		KSTACK_PAGE_REST,
+#endif
 #endif
 		NR_VM_EVENT_ITEMS
 };
 
+#ifdef CONFIG_VM_EVENT_COUNTERS
+/*
+ * Light weight per cpu counter implementation.
+ *
+ * Counters should only be incremented and no critical kernel component
+ * should rely on the counter values.
+ *
+ * Counters are handled completely inline. On many platforms the code
+ * generated will simply be the increment of a global address.
+ */
+
+struct vm_event_state {
+	unsigned long event[NR_VM_EVENT_ITEMS];
+};
+
+DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+#endif
+
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
 #define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; })
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 343906a98d6e..18d4a97d3afd 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -41,22 +41,6 @@  enum writeback_stat_item {
 };
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
-/*
- * Light weight per cpu counter implementation.
- *
- * Counters should only be incremented and no critical kernel component
- * should rely on the counter values.
- *
- * Counters are handled completely inline. On many platforms the code
- * generated will simply be the increment of a global address.
- */
-
-struct vm_event_state {
-	unsigned long event[NR_VM_EVENT_ITEMS];
-};
-
-DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
-
 /*
  * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
  * local_irq_disable overhead.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..737c85689251 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1413,6 +1413,17 @@  const char * const vmstat_text[] = {
 	"vma_lock_retry",
 	"vma_lock_miss",
 #endif
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	"kstack_page_1",
+	"kstack_page_2",
+#if THREAD_SIZE >= (4 * PAGE_SIZE)
+	"kstack_page_3",
+	"kstack_page_4",
+#endif
+#if THREAD_SIZE > (4 * PAGE_SIZE)
+	"kstack_page_rest",
+#endif
+#endif
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */