mm: memcontrol: account kernel stack per node
diff mbox series

Message ID 20200630004413.1423733-1-shakeelb@google.com
State New
Headers show
Series
  • mm: memcontrol: account kernel stack per node
Related show

Commit Message

Shakeel Butt June 30, 2020, 12:44 a.m. UTC
Currently the kernel stack is being accounted per-zone. There is no need
to do that. In addition due to being per-zone, memcg has to keep a
separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate
MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
node_stat_item.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 drivers/base/node.c        |  4 ++--
 fs/proc/meminfo.c          |  4 ++--
 include/linux/memcontrol.h |  2 --
 include/linux/mmzone.h     |  8 ++++----
 kernel/fork.c              | 29 ++++++++++-------------------
 kernel/scs.c               |  2 +-
 mm/memcontrol.c            |  2 +-
 mm/page_alloc.c            | 16 ++++++++--------
 mm/vmstat.c                |  8 ++++----
 9 files changed, 32 insertions(+), 43 deletions(-)

Comments

Roman Gushchin June 30, 2020, 3:23 a.m. UTC | #1
On Mon, Jun 29, 2020 at 05:44:13PM -0700, Shakeel Butt wrote:
> Currently the kernel stack is being accounted per-zone. There is no need
> to do that. In addition due to being per-zone, memcg has to keep a
> separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate
> MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
> node_stat_item.
> 
> Signed-off-by: Shakeel Butt <shakeelb@google.com>
> ---
>  drivers/base/node.c        |  4 ++--
>  fs/proc/meminfo.c          |  4 ++--
>  include/linux/memcontrol.h |  2 --
>  include/linux/mmzone.h     |  8 ++++----
>  kernel/fork.c              | 29 ++++++++++-------------------
>  kernel/scs.c               |  2 +-
>  mm/memcontrol.c            |  2 +-
>  mm/page_alloc.c            | 16 ++++++++--------
>  mm/vmstat.c                |  8 ++++----
>  9 files changed, 32 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index 0cf13e31603c..508b80f6329b 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev,
>  		       nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
>  		       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
>  		       nid, K(i.sharedram),
> -		       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
> +		       nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
>  #ifdef CONFIG_SHADOW_CALL_STACK
> -		       nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
> +		       nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
>  #endif
>  		       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
>  		       nid, 0UL,
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index f262bff3ca31..887a5532e449 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
>  	show_val_kb(m, "SReclaimable:   ", sreclaimable);
>  	show_val_kb(m, "SUnreclaim:     ", sunreclaim);
>  	seq_printf(m, "KernelStack:    %8lu kB\n",
> -		   global_zone_page_state(NR_KERNEL_STACK_KB));
> +		   global_node_page_state(NR_KERNEL_STACK_KB));
>  #ifdef CONFIG_SHADOW_CALL_STACK
>  	seq_printf(m, "ShadowCallStack:%8lu kB\n",
> -		   global_zone_page_state(NR_KERNEL_SCS_KB));
> +		   global_node_page_state(NR_KERNEL_SCS_KB));
>  #endif
>  	show_val_kb(m, "PageTables:     ",
>  		    global_zone_page_state(NR_PAGETABLE));
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index ba1e42715ecf..a3ddb236898e 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -33,8 +33,6 @@ enum memcg_stat_item {
>  	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
>  	MEMCG_SOCK,
>  	MEMCG_PERCPU_B,
> -	/* XXX: why are these zone and not node counters? */
> -	MEMCG_KERNEL_STACK_KB,
>  	MEMCG_NR_STAT,
>  };
>  
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 8e859444927a..b79f73ce8b57 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -153,10 +153,6 @@ enum zone_stat_item {
>  	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
>  	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
>  	NR_PAGETABLE,		/* used for pagetables */
> -	NR_KERNEL_STACK_KB,	/* measured in KiB */
> -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> -	NR_KERNEL_SCS_KB,	/* measured in KiB */
> -#endif
>  	/* Second 128 byte cacheline */
>  	NR_BOUNCE,
>  #if IS_ENABLED(CONFIG_ZSMALLOC)
> @@ -201,6 +197,10 @@ enum node_stat_item {
>  	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
>  	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
>  	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
> +	NR_KERNEL_STACK_KB,	/* measured in KiB */
> +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> +	NR_KERNEL_SCS_KB,	/* measured in KiB */
> +#endif
>  	NR_VM_NODE_STAT_ITEMS
>  };
>  
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 73fdfa9674b5..ee5393350ef7 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -278,7 +278,7 @@ static inline void free_thread_stack(struct task_struct *tsk)
>  
>  		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
>  			mod_memcg_page_state(vm->pages[i],
> -					     MEMCG_KERNEL_STACK_KB,
> +					     NR_KERNEL_STACK_KB,
>  					     -(int)(PAGE_SIZE / 1024));

Hello, Shakeel!

Thank you for the cleanup, it makes total sense to me.

However I have some concerns: mod_memcg_page_state() does change only memcg's counters,
but not lruvec counters. So to make it per-node per-memcg (aka  lruvec)
we need to use mod_lruvec_state(), otherwise we won't have global per-node values.

>  
>  			memcg_kmem_uncharge_page(vm->pages[i], 0);
> @@ -381,32 +381,23 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
>  {
>  	void *stack = task_stack_page(tsk);
>  	struct vm_struct *vm = task_stack_vm_area(tsk);
> +	struct page *page;
>  
>  	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
>  
>  	if (vm) {
> -		int i;
> -
>  		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
> +		page = vm->pages[0];
>  
> -		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
> -			mod_zone_page_state(page_zone(vm->pages[i]),
> -					    NR_KERNEL_STACK_KB,
> -					    PAGE_SIZE / 1024 * account);
> -		}
>  	} else {
> -		/*
> -		 * All stack pages are in the same zone and belong to the
> -		 * same memcg.
> -		 */
> -		struct page *first_page = virt_to_page(stack);
> -
> -		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
> -				    THREAD_SIZE / 1024 * account);
> -
> -		mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
> +		page = virt_to_page(stack);
> +		mod_memcg_obj_state(stack, NR_KERNEL_STACK_KB,
>  				    account * (THREAD_SIZE / 1024));
>  	}
> +
> +	/* All stack pages are in the same node. */
> +	mod_node_page_state(page_pgdat(page), NR_KERNEL_STACK_KB,
> +			    THREAD_SIZE / 1024 * account);
>  }

And then we probably don't need a separate change for memcg- and per-node counters.

Thanks!

>  
>  static int memcg_charge_kernel_stack(struct task_struct *tsk)
> @@ -431,7 +422,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
>  				return ret;
>  
>  			mod_memcg_page_state(vm->pages[i],
> -					     MEMCG_KERNEL_STACK_KB,
> +					     NR_KERNEL_STACK_KB,
>  					     PAGE_SIZE / 1024);
>  		}
>  	}
> diff --git a/kernel/scs.c b/kernel/scs.c
> index 5d4d9bbdec36..4ff4a7ba0094 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -17,7 +17,7 @@ static void __scs_account(void *s, int account)
>  {
>  	struct page *scs_page = virt_to_page(s);
>  
> -	mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
> +	mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
>  			    account * (SCS_SIZE / SZ_1K));
>  }
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1a644224383..06de63901f81 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1485,7 +1485,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
>  		       (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
>  		       PAGE_SIZE);
>  	seq_buf_printf(&s, "kernel_stack %llu\n",
> -		       (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) *
> +		       (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
>  		       1024);
>  	seq_buf_printf(&s, "slab %llu\n",
>  		       (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 174c849ba9f2..0568b126f719 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5402,6 +5402,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
>  			" anon_thp: %lukB"
>  #endif
>  			" writeback_tmp:%lukB"
> +			" kernel_stack:%lukB"
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +			" shadow_call_stack:%lukB"
> +#endif
>  			" all_unreclaimable? %s"
>  			"\n",
>  			pgdat->node_id,
> @@ -5423,6 +5427,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
>  			K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
>  #endif
>  			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
> +			node_page_state(pgdat, NR_KERNEL_STACK_KB),
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +			node_page_state(pgdat, NR_KERNEL_SCS_KB),
> +#endif
>  			pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
>  				"yes" : "no");
>  	}
> @@ -5454,10 +5462,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
>  			" present:%lukB"
>  			" managed:%lukB"
>  			" mlocked:%lukB"
> -			" kernel_stack:%lukB"
> -#ifdef CONFIG_SHADOW_CALL_STACK
> -			" shadow_call_stack:%lukB"
> -#endif
>  			" pagetables:%lukB"
>  			" bounce:%lukB"
>  			" free_pcp:%lukB"
> @@ -5479,10 +5483,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
>  			K(zone->present_pages),
>  			K(zone_managed_pages(zone)),
>  			K(zone_page_state(zone, NR_MLOCK)),
> -			zone_page_state(zone, NR_KERNEL_STACK_KB),
> -#ifdef CONFIG_SHADOW_CALL_STACK
> -			zone_page_state(zone, NR_KERNEL_SCS_KB),
> -#endif
>  			K(zone_page_state(zone, NR_PAGETABLE)),
>  			K(zone_page_state(zone, NR_BOUNCE)),
>  			K(free_pcp),
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 2c5a96694490..96bf8bfffd1d 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1158,10 +1158,6 @@ const char * const vmstat_text[] = {
>  	"nr_zone_write_pending",
>  	"nr_mlock",
>  	"nr_page_table_pages",
> -	"nr_kernel_stack",
> -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> -	"nr_shadow_call_stack",
> -#endif
>  	"nr_bounce",
>  #if IS_ENABLED(CONFIG_ZSMALLOC)
>  	"nr_zspages",
> @@ -1212,6 +1208,10 @@ const char * const vmstat_text[] = {
>  	"nr_kernel_misc_reclaimable",
>  	"nr_foll_pin_acquired",
>  	"nr_foll_pin_released",
> +	"nr_kernel_stack",
> +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> +	"nr_shadow_call_stack",
> +#endif
>  
>  	/* enum writeback_stat_item counters */
>  	"nr_dirty_threshold",
> -- 
> 2.27.0.212.ge8ba1cc988-goog
>
Shakeel Butt June 30, 2020, 2:34 p.m. UTC | #2
On Mon, Jun 29, 2020 at 8:24 PM Roman Gushchin <guro@fb.com> wrote:
>
> On Mon, Jun 29, 2020 at 05:44:13PM -0700, Shakeel Butt wrote:
> > Currently the kernel stack is being accounted per-zone. There is no need
> > to do that. In addition due to being per-zone, memcg has to keep a
> > separate MEMCG_KERNEL_STACK_KB. Make the stat per-node and deprecate
> > MEMCG_KERNEL_STACK_KB as memcg_stat_item is an extension of
> > node_stat_item.
> >
> > Signed-off-by: Shakeel Butt <shakeelb@google.com>
> > ---
> >  drivers/base/node.c        |  4 ++--
> >  fs/proc/meminfo.c          |  4 ++--
> >  include/linux/memcontrol.h |  2 --
> >  include/linux/mmzone.h     |  8 ++++----
> >  kernel/fork.c              | 29 ++++++++++-------------------
> >  kernel/scs.c               |  2 +-
> >  mm/memcontrol.c            |  2 +-
> >  mm/page_alloc.c            | 16 ++++++++--------
> >  mm/vmstat.c                |  8 ++++----
> >  9 files changed, 32 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/base/node.c b/drivers/base/node.c
> > index 0cf13e31603c..508b80f6329b 100644
> > --- a/drivers/base/node.c
> > +++ b/drivers/base/node.c
> > @@ -440,9 +440,9 @@ static ssize_t node_read_meminfo(struct device *dev,
> >                      nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
> >                      nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
> >                      nid, K(i.sharedram),
> > -                    nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
> > +                    nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
> >  #ifdef CONFIG_SHADOW_CALL_STACK
> > -                    nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
> > +                    nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
> >  #endif
> >                      nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
> >                      nid, 0UL,
> > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> > index f262bff3ca31..887a5532e449 100644
> > --- a/fs/proc/meminfo.c
> > +++ b/fs/proc/meminfo.c
> > @@ -101,10 +101,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> >       show_val_kb(m, "SReclaimable:   ", sreclaimable);
> >       show_val_kb(m, "SUnreclaim:     ", sunreclaim);
> >       seq_printf(m, "KernelStack:    %8lu kB\n",
> > -                global_zone_page_state(NR_KERNEL_STACK_KB));
> > +                global_node_page_state(NR_KERNEL_STACK_KB));
> >  #ifdef CONFIG_SHADOW_CALL_STACK
> >       seq_printf(m, "ShadowCallStack:%8lu kB\n",
> > -                global_zone_page_state(NR_KERNEL_SCS_KB));
> > +                global_node_page_state(NR_KERNEL_SCS_KB));
> >  #endif
> >       show_val_kb(m, "PageTables:     ",
> >                   global_zone_page_state(NR_PAGETABLE));
> > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > index ba1e42715ecf..a3ddb236898e 100644
> > --- a/include/linux/memcontrol.h
> > +++ b/include/linux/memcontrol.h
> > @@ -33,8 +33,6 @@ enum memcg_stat_item {
> >       MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
> >       MEMCG_SOCK,
> >       MEMCG_PERCPU_B,
> > -     /* XXX: why are these zone and not node counters? */
> > -     MEMCG_KERNEL_STACK_KB,
> >       MEMCG_NR_STAT,
> >  };
> >
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 8e859444927a..b79f73ce8b57 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -153,10 +153,6 @@ enum zone_stat_item {
> >       NR_ZONE_WRITE_PENDING,  /* Count of dirty, writeback and unstable pages */
> >       NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
> >       NR_PAGETABLE,           /* used for pagetables */
> > -     NR_KERNEL_STACK_KB,     /* measured in KiB */
> > -#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> > -     NR_KERNEL_SCS_KB,       /* measured in KiB */
> > -#endif
> >       /* Second 128 byte cacheline */
> >       NR_BOUNCE,
> >  #if IS_ENABLED(CONFIG_ZSMALLOC)
> > @@ -201,6 +197,10 @@ enum node_stat_item {
> >       NR_KERNEL_MISC_RECLAIMABLE,     /* reclaimable non-slab kernel pages */
> >       NR_FOLL_PIN_ACQUIRED,   /* via: pin_user_page(), gup flag: FOLL_PIN */
> >       NR_FOLL_PIN_RELEASED,   /* pages returned via unpin_user_page() */
> > +     NR_KERNEL_STACK_KB,     /* measured in KiB */
> > +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
> > +     NR_KERNEL_SCS_KB,       /* measured in KiB */
> > +#endif
> >       NR_VM_NODE_STAT_ITEMS
> >  };
> >
> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index 73fdfa9674b5..ee5393350ef7 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -278,7 +278,7 @@ static inline void free_thread_stack(struct task_struct *tsk)
> >
> >               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
> >                       mod_memcg_page_state(vm->pages[i],
> > -                                          MEMCG_KERNEL_STACK_KB,
> > +                                          NR_KERNEL_STACK_KB,
> >                                            -(int)(PAGE_SIZE / 1024));
>
> Hello, Shakeel!
>
> Thank you for the cleanup, it makes total sense to me.
>
> However I have some concerns: mod_memcg_page_state() does change only memcg's counters,
> but not lruvec counters. So to make it per-node per-memcg (aka  lruvec)
> we need to use mod_lruvec_state(), otherwise we won't have global per-node values.
>
> >
> >                       memcg_kmem_uncharge_page(vm->pages[i], 0);
> > @@ -381,32 +381,23 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
> >  {
> >       void *stack = task_stack_page(tsk);
> >       struct vm_struct *vm = task_stack_vm_area(tsk);
> > +     struct page *page;
> >
> >       BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
> >
> >       if (vm) {
> > -             int i;
> > -
> >               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
> > +             page = vm->pages[0];
> >
> > -             for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
> > -                     mod_zone_page_state(page_zone(vm->pages[i]),
> > -                                         NR_KERNEL_STACK_KB,
> > -                                         PAGE_SIZE / 1024 * account);
> > -             }
> >       } else {
> > -             /*
> > -              * All stack pages are in the same zone and belong to the
> > -              * same memcg.
> > -              */
> > -             struct page *first_page = virt_to_page(stack);
> > -
> > -             mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
> > -                                 THREAD_SIZE / 1024 * account);
> > -
> > -             mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
> > +             page = virt_to_page(stack);
> > +             mod_memcg_obj_state(stack, NR_KERNEL_STACK_KB,
> >                                   account * (THREAD_SIZE / 1024));
> >       }
> > +
> > +     /* All stack pages are in the same node. */
> > +     mod_node_page_state(page_pgdat(page), NR_KERNEL_STACK_KB,
> > +                         THREAD_SIZE / 1024 * account);
> >  }
>
> And then we probably don't need a separate change for memcg- and per-node counters.
>

Yes, I thought about combining memcg and per-node counters but got
worried that the cached stacks for CONFIG_VMAP_STACK would not be
accounted for in the per-node global counters but I see that we
already don't account for them in both counters. I will further
simplify these. Thanks for the suggestion.

Shakeel

Patch
diff mbox series

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 0cf13e31603c..508b80f6329b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -440,9 +440,9 @@  static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
 		       nid, K(i.sharedram),
-		       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+		       nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
 #ifdef CONFIG_SHADOW_CALL_STACK
-		       nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
+		       nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
 #endif
 		       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
 		       nid, 0UL,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index f262bff3ca31..887a5532e449 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -101,10 +101,10 @@  static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "SReclaimable:   ", sreclaimable);
 	show_val_kb(m, "SUnreclaim:     ", sunreclaim);
 	seq_printf(m, "KernelStack:    %8lu kB\n",
-		   global_zone_page_state(NR_KERNEL_STACK_KB));
+		   global_node_page_state(NR_KERNEL_STACK_KB));
 #ifdef CONFIG_SHADOW_CALL_STACK
 	seq_printf(m, "ShadowCallStack:%8lu kB\n",
-		   global_zone_page_state(NR_KERNEL_SCS_KB));
+		   global_node_page_state(NR_KERNEL_SCS_KB));
 #endif
 	show_val_kb(m, "PageTables:     ",
 		    global_zone_page_state(NR_PAGETABLE));
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ba1e42715ecf..a3ddb236898e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -33,8 +33,6 @@  enum memcg_stat_item {
 	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
 	MEMCG_SOCK,
 	MEMCG_PERCPU_B,
-	/* XXX: why are these zone and not node counters? */
-	MEMCG_KERNEL_STACK_KB,
 	MEMCG_NR_STAT,
 };
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8e859444927a..b79f73ce8b57 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -153,10 +153,6 @@  enum zone_stat_item {
 	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
 	NR_PAGETABLE,		/* used for pagetables */
-	NR_KERNEL_STACK_KB,	/* measured in KiB */
-#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
-	NR_KERNEL_SCS_KB,	/* measured in KiB */
-#endif
 	/* Second 128 byte cacheline */
 	NR_BOUNCE,
 #if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -201,6 +197,10 @@  enum node_stat_item {
 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
 	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
 	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
+	NR_KERNEL_STACK_KB,	/* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+	NR_KERNEL_SCS_KB,	/* measured in KiB */
+#endif
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 73fdfa9674b5..ee5393350ef7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -278,7 +278,7 @@  static inline void free_thread_stack(struct task_struct *tsk)
 
 		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
 			mod_memcg_page_state(vm->pages[i],
-					     MEMCG_KERNEL_STACK_KB,
+					     NR_KERNEL_STACK_KB,
 					     -(int)(PAGE_SIZE / 1024));
 
 			memcg_kmem_uncharge_page(vm->pages[i], 0);
@@ -381,32 +381,23 @@  static void account_kernel_stack(struct task_struct *tsk, int account)
 {
 	void *stack = task_stack_page(tsk);
 	struct vm_struct *vm = task_stack_vm_area(tsk);
+	struct page *page;
 
 	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
 
 	if (vm) {
-		int i;
-
 		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+		page = vm->pages[0];
 
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-			mod_zone_page_state(page_zone(vm->pages[i]),
-					    NR_KERNEL_STACK_KB,
-					    PAGE_SIZE / 1024 * account);
-		}
 	} else {
-		/*
-		 * All stack pages are in the same zone and belong to the
-		 * same memcg.
-		 */
-		struct page *first_page = virt_to_page(stack);
-
-		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
-				    THREAD_SIZE / 1024 * account);
-
-		mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
+		page = virt_to_page(stack);
+		mod_memcg_obj_state(stack, NR_KERNEL_STACK_KB,
 				    account * (THREAD_SIZE / 1024));
 	}
+
+	/* All stack pages are in the same node. */
+	mod_node_page_state(page_pgdat(page), NR_KERNEL_STACK_KB,
+			    THREAD_SIZE / 1024 * account);
 }
 
 static int memcg_charge_kernel_stack(struct task_struct *tsk)
@@ -431,7 +422,7 @@  static int memcg_charge_kernel_stack(struct task_struct *tsk)
 				return ret;
 
 			mod_memcg_page_state(vm->pages[i],
-					     MEMCG_KERNEL_STACK_KB,
+					     NR_KERNEL_STACK_KB,
 					     PAGE_SIZE / 1024);
 		}
 	}
diff --git a/kernel/scs.c b/kernel/scs.c
index 5d4d9bbdec36..4ff4a7ba0094 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -17,7 +17,7 @@  static void __scs_account(void *s, int account)
 {
 	struct page *scs_page = virt_to_page(s);
 
-	mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
+	mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
 			    account * (SCS_SIZE / SZ_1K));
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b1a644224383..06de63901f81 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1485,7 +1485,7 @@  static char *memory_stat_format(struct mem_cgroup *memcg)
 		       (u64)memcg_page_state(memcg, NR_FILE_PAGES) *
 		       PAGE_SIZE);
 	seq_buf_printf(&s, "kernel_stack %llu\n",
-		       (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) *
+		       (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) *
 		       1024);
 	seq_buf_printf(&s, "slab %llu\n",
 		       (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 174c849ba9f2..0568b126f719 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5402,6 +5402,10 @@  void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			" anon_thp: %lukB"
 #endif
 			" writeback_tmp:%lukB"
+			" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+			" shadow_call_stack:%lukB"
+#endif
 			" all_unreclaimable? %s"
 			"\n",
 			pgdat->node_id,
@@ -5423,6 +5427,10 @@  void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
 			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
+			node_page_state(pgdat, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+			node_page_state(pgdat, NR_KERNEL_SCS_KB),
+#endif
 			pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
 				"yes" : "no");
 	}
@@ -5454,10 +5462,6 @@  void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			" present:%lukB"
 			" managed:%lukB"
 			" mlocked:%lukB"
-			" kernel_stack:%lukB"
-#ifdef CONFIG_SHADOW_CALL_STACK
-			" shadow_call_stack:%lukB"
-#endif
 			" pagetables:%lukB"
 			" bounce:%lukB"
 			" free_pcp:%lukB"
@@ -5479,10 +5483,6 @@  void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(zone->present_pages),
 			K(zone_managed_pages(zone)),
 			K(zone_page_state(zone, NR_MLOCK)),
-			zone_page_state(zone, NR_KERNEL_STACK_KB),
-#ifdef CONFIG_SHADOW_CALL_STACK
-			zone_page_state(zone, NR_KERNEL_SCS_KB),
-#endif
 			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 2c5a96694490..96bf8bfffd1d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1158,10 +1158,6 @@  const char * const vmstat_text[] = {
 	"nr_zone_write_pending",
 	"nr_mlock",
 	"nr_page_table_pages",
-	"nr_kernel_stack",
-#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
-	"nr_shadow_call_stack",
-#endif
 	"nr_bounce",
 #if IS_ENABLED(CONFIG_ZSMALLOC)
 	"nr_zspages",
@@ -1212,6 +1208,10 @@  const char * const vmstat_text[] = {
 	"nr_kernel_misc_reclaimable",
 	"nr_foll_pin_acquired",
 	"nr_foll_pin_released",
+	"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+	"nr_shadow_call_stack",
+#endif
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",