Message ID | 20240313033417.447216-1-pasha.tatashin@soleen.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | vmstat: Keep count of the maximum page reached by the kernel stack | expand |
On Tue, Mar 12, 2024 at 11:34 PM Pasha Tatashin <pasha.tatashin@soleen.com> wrote: > > CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount > of memory that was left in stack. Every time the new anti-record is > reached a message is printed to the console. > > However, this is not useful to know how much each page within stack was > actually used. Provide a mechanism to count the number of time each > stack page was reached throughout the live of the stack: > > $ grep kstack /proc/vmstat > kstack_page_1 19974 > kstack_page_2 94 > kstack_page_3 0 > kstack_page_4 0 > > In the above example only out of ~20K threads that ever exited on that > machine only 94 touched second page of the stack, and none touched > pages three and four. > > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com> > --- > include/linux/sched/task_stack.h | 39 ++++++++++++++++++++++++++++++-- > include/linux/vm_event_item.h | 29 ++++++++++++++++++++++++ > include/linux/vmstat.h | 16 ------------- > mm/vmstat.c | 11 +++++++++ > 4 files changed, 77 insertions(+), 18 deletions(-) > > diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h > index ccd72b978e1f..7ff7f9997266 100644 > --- a/include/linux/sched/task_stack.h > +++ b/include/linux/sched/task_stack.h > @@ -95,9 +95,41 @@ static inline int object_is_on_stack(const void *obj) > extern void thread_stack_cache_init(void); > > #ifdef CONFIG_DEBUG_STACK_USAGE > +#ifdef CONFIG_VM_EVENT_COUNTERS > +#include <linux/vm_event_item.h> > + > +/* Count the maximum pages reached in kernel stacks */ > +static inline void count_kstack_page(int stack_max_page) > +{ > + switch (stack_max_page) { > + case 1: > + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_1]); > + break; > + case 2: > + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_2]); > + break; > +#if THREAD_SIZE >= (4 * PAGE_SIZE) > + case 3: > + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_3]); > + break; > + case 4: > + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_4]); > + break; > +#endif > +#if THREAD_SIZE > (4 * PAGE_SIZE) > + default: > + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_5]); It should: this_cpu_inc(vm_event_states.event[KSTACK_PAGE_REST]); Will fix it in the next version. > +#endif > + } > +} > +#else /* !CONFIG_VM_EVENT_COUNTERS */ > +static inline void count_kstack_page(int stack_max_page) {} > +#endif /* CONFIG_VM_EVENT_COUNTERS */ > + > static inline unsigned long stack_not_used(struct task_struct *p) > { > unsigned long *n = end_of_stack(p); > + unsigned long unused_stack; > > do { /* Skip over canary */ > # ifdef CONFIG_STACK_GROWSUP > @@ -108,10 +140,13 @@ static inline unsigned long stack_not_used(struct task_struct *p) > } while (!*n); > > # ifdef CONFIG_STACK_GROWSUP > - return (unsigned long)end_of_stack(p) - (unsigned long)n; > + unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n; > # else > - return (unsigned long)n - (unsigned long)end_of_stack(p); > + unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p); > # endif > + count_kstack_page(((THREAD_SIZE - unused_stack) >> PAGE_SHIFT) + 1); > + > + return unused_stack; > } > #endif > extern void set_task_stack_end_magic(struct task_struct *tsk); > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index 747943bc8cc2..1dbfe47ff048 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -153,10 +153,39 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > VMA_LOCK_ABORT, > VMA_LOCK_RETRY, > VMA_LOCK_MISS, > +#endif > +#ifdef CONFIG_DEBUG_STACK_USAGE > + KSTACK_PAGE_1, > + KSTACK_PAGE_2, > +#if THREAD_SIZE >= (4 * PAGE_SIZE) > + KSTACK_PAGE_3, > + KSTACK_PAGE_4, > +#endif > +#if THREAD_SIZE > (4 * PAGE_SIZE) > + KSTACK_PAGE_REST, > +#endif > #endif > NR_VM_EVENT_ITEMS > }; > > +#ifdef CONFIG_VM_EVENT_COUNTERS > +/* > + * Light weight per cpu counter implementation. > + * > + * Counters should only be incremented and no critical kernel component > + * should rely on the counter values. > + * > + * Counters are handled completely inline. On many platforms the code > + * generated will simply be the increment of a global address. > + */ > + > +struct vm_event_state { > + unsigned long event[NR_VM_EVENT_ITEMS]; > +}; > + > +DECLARE_PER_CPU(struct vm_event_state, vm_event_states); > +#endif > + > #ifndef CONFIG_TRANSPARENT_HUGEPAGE > #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; }) > #define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; }) > diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h > index 343906a98d6e..18d4a97d3afd 100644 > --- a/include/linux/vmstat.h > +++ b/include/linux/vmstat.h > @@ -41,22 +41,6 @@ enum writeback_stat_item { > }; > > #ifdef CONFIG_VM_EVENT_COUNTERS > -/* > - * Light weight per cpu counter implementation. > - * > - * Counters should only be incremented and no critical kernel component > - * should rely on the counter values. > - * > - * Counters are handled completely inline. On many platforms the code > - * generated will simply be the increment of a global address. > - */ > - > -struct vm_event_state { > - unsigned long event[NR_VM_EVENT_ITEMS]; > -}; > - > -DECLARE_PER_CPU(struct vm_event_state, vm_event_states); > - > /* > * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the > * local_irq_disable overhead. > diff --git a/mm/vmstat.c b/mm/vmstat.c > index db79935e4a54..737c85689251 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1413,6 +1413,17 @@ const char * const vmstat_text[] = { > "vma_lock_retry", > "vma_lock_miss", > #endif > +#ifdef CONFIG_DEBUG_STACK_USAGE > + "kstack_page_1", > + "kstack_page_2", > +#if THREAD_SIZE >= (4 * PAGE_SIZE) > + "kstack_page_3", > + "kstack_page_4", > +#endif > +#if THREAD_SIZE > (4 * PAGE_SIZE) > + "kstack_page_rest", > +#endif > +#endif > #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ > }; > #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ > -- > 2.44.0.278.ge034bb2e1d-goog >
Le 13/03/2024 à 04:34, Pasha Tatashin a écrit : > CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount > of memory that was left in stack. Every time the new anti-record is > reached a message is printed to the console. > > However, this is not useful to know how much each page within stack was > actually used. Provide a mechanism to count the number of time each > stack page was reached throughout the live of the stack: by "this is not useful to know ", you mean "this does not allow us to know" ? > > $ grep kstack /proc/vmstat > kstack_page_1 19974 > kstack_page_2 94 > kstack_page_3 0 > kstack_page_4 0 That's probably only usefull when THREAD_SIZE is larger than PAGE_SIZE. On powerpc 8xx, THREAD_SIZE is 8k by default and PAGE_SIZE can be either 4k or 16k. Christophe
On Thu, Mar 14, 2024 at 4:19 AM Christophe Leroy <christophe.leroy@csgroup.eu> wrote: > > > > Le 13/03/2024 à 04:34, Pasha Tatashin a écrit : > > CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount > > of memory that was left in stack. Every time the new anti-record is > > reached a message is printed to the console. > > > > However, this is not useful to know how much each page within stack was > > actually used. Provide a mechanism to count the number of time each > > stack page was reached throughout the live of the stack: > > by "this is not useful to know ", you mean "this does not allow us to > know" ? Yes, bad wording from my side, I will change it to you suggestion in the next version. > > > > > $ > > kstack_page_1 19974 > > kstack_page_2 94 > > kstack_page_3 0 > > kstack_page_4 0 > > That's probably only usefull when THREAD_SIZE is larger than PAGE_SIZE. That is right, if THREAD_SIZE <= PAGE_SIZE, only "kstack_page_1" would be filled. > > On powerpc 8xx, THREAD_SIZE is 8k by default and PAGE_SIZE can be either > 4k or 16k. With THREAD_SIZE == 8K, and PAGE_SIZE = 4K There will be two counters in /proc/vmstat, something like this: kstack_page_1 XXX kstack_page_2 YYY With THREAD_SIZE=16K, and PAGE_SIZE = 16K There will be two counters, but one will always be zero: kstack_page_1 XXX kstack_page_2 0 Thanks, Pasha
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index ccd72b978e1f..7ff7f9997266 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -95,9 +95,41 @@ static inline int object_is_on_stack(const void *obj) extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE +#ifdef CONFIG_VM_EVENT_COUNTERS +#include <linux/vm_event_item.h> + +/* Count the maximum pages reached in kernel stacks */ +static inline void count_kstack_page(int stack_max_page) +{ + switch (stack_max_page) { + case 1: + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_1]); + break; + case 2: + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_2]); + break; +#if THREAD_SIZE >= (4 * PAGE_SIZE) + case 3: + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_3]); + break; + case 4: + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_4]); + break; +#endif +#if THREAD_SIZE > (4 * PAGE_SIZE) + default: + this_cpu_inc(vm_event_states.event[KSTACK_PAGE_5]); +#endif + } +} +#else /* !CONFIG_VM_EVENT_COUNTERS */ +static inline void count_kstack_page(int stack_max_page) {} +#endif /* CONFIG_VM_EVENT_COUNTERS */ + static inline unsigned long stack_not_used(struct task_struct *p) { unsigned long *n = end_of_stack(p); + unsigned long unused_stack; do { /* Skip over canary */ # ifdef CONFIG_STACK_GROWSUP @@ -108,10 +140,13 @@ static inline unsigned long stack_not_used(struct task_struct *p) } while (!*n); # ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; + unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n; # else - return (unsigned long)n - (unsigned long)end_of_stack(p); + unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p); # endif + count_kstack_page(((THREAD_SIZE - unused_stack) >> PAGE_SHIFT) + 1); + + return unused_stack; } #endif extern void set_task_stack_end_magic(struct task_struct *tsk); diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 747943bc8cc2..1dbfe47ff048 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -153,10 +153,39 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, VMA_LOCK_ABORT, VMA_LOCK_RETRY, VMA_LOCK_MISS, +#endif +#ifdef CONFIG_DEBUG_STACK_USAGE + KSTACK_PAGE_1, + KSTACK_PAGE_2, +#if THREAD_SIZE >= (4 * PAGE_SIZE) + KSTACK_PAGE_3, + KSTACK_PAGE_4, +#endif +#if THREAD_SIZE > (4 * PAGE_SIZE) + KSTACK_PAGE_REST, +#endif #endif NR_VM_EVENT_ITEMS }; +#ifdef CONFIG_VM_EVENT_COUNTERS +/* + * Light weight per cpu counter implementation. + * + * Counters should only be incremented and no critical kernel component + * should rely on the counter values. + * + * Counters are handled completely inline. On many platforms the code + * generated will simply be the increment of a global address. + */ + +struct vm_event_state { + unsigned long event[NR_VM_EVENT_ITEMS]; +}; + +DECLARE_PER_CPU(struct vm_event_state, vm_event_states); +#endif + #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; }) #define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; }) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 343906a98d6e..18d4a97d3afd 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -41,22 +41,6 @@ enum writeback_stat_item { }; #ifdef CONFIG_VM_EVENT_COUNTERS -/* - * Light weight per cpu counter implementation. - * - * Counters should only be incremented and no critical kernel component - * should rely on the counter values. - * - * Counters are handled completely inline. On many platforms the code - * generated will simply be the increment of a global address. - */ - -struct vm_event_state { - unsigned long event[NR_VM_EVENT_ITEMS]; -}; - -DECLARE_PER_CPU(struct vm_event_state, vm_event_states); - /* * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the * local_irq_disable overhead. diff --git a/mm/vmstat.c b/mm/vmstat.c index db79935e4a54..737c85689251 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1413,6 +1413,17 @@ const char * const vmstat_text[] = { "vma_lock_retry", "vma_lock_miss", #endif +#ifdef CONFIG_DEBUG_STACK_USAGE + "kstack_page_1", + "kstack_page_2", +#if THREAD_SIZE >= (4 * PAGE_SIZE) + "kstack_page_3", + "kstack_page_4", +#endif +#if THREAD_SIZE > (4 * PAGE_SIZE) + "kstack_page_rest", +#endif +#endif #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount of memory that was left in stack. Every time the new anti-record is reached a message is printed to the console. However, this is not useful to know how much each page within stack was actually used. Provide a mechanism to count the number of time each stack page was reached throughout the live of the stack: $ grep kstack /proc/vmstat kstack_page_1 19974 kstack_page_2 94 kstack_page_3 0 kstack_page_4 0 In the above example only out of ~20K threads that ever exited on that machine only 94 touched second page of the stack, and none touched pages three and four. Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com> --- include/linux/sched/task_stack.h | 39 ++++++++++++++++++++++++++++++-- include/linux/vm_event_item.h | 29 ++++++++++++++++++++++++ include/linux/vmstat.h | 16 ------------- mm/vmstat.c | 11 +++++++++ 4 files changed, 77 insertions(+), 18 deletions(-)