Message ID | 20200910084258.22293-1-songmuchun@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: memcontrol: Add the missing numa stat of anon and file for cgroup v2 | expand |
On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote: > > In the cgroup v1, we have a numa_stat interface. This is useful for > providing visibility into the numa locality information within an > memcg since the pages are allowed to be allocated from any physical > node. One of the use cases is evaluating application performance by > combining this information with the application's CPU allocation. > But the cgroup v2 does not. So this patch adds the missing information. > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > --- I am actually working on exposing this info on v2 as well. > mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 44 insertions(+), 2 deletions(-) > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 75cd1a1e66c8..c779673f29b2 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) > return false; > } > > +#ifdef CONFIG_NUMA > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg, > + unsigned int nid, > + enum node_stat_item idx) > +{ > + long x; > + struct mem_cgroup_per_node *pn; > + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); > + > + VM_BUG_ON(nid >= nr_node_ids); > + > + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > + x = atomic_long_read(&pn->lruvec_stat[idx]); > +#ifdef CONFIG_SMP > + if (x < 0) > + x = 0; > +#endif > + return x; > +} > +#endif > + > static char *memory_stat_format(struct mem_cgroup *memcg) > { > struct seq_buf s; > int i; > +#ifdef CONFIG_NUMA > + int nid; > +#endif > > seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); > if (!s.buffer) > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > * Current memory state: > */ > Let's not break the parsers of memory.stat. I would prefer a separate interface like v1 i.e. memory.numa_stat. > - seq_buf_printf(&s, "anon %llu\n", > + seq_buf_printf(&s, "anon %llu", > (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * > PAGE_SIZE); > - seq_buf_printf(&s, "file %llu\n", > +#ifdef CONFIG_NUMA > + for_each_node_state(nid, N_MEMORY) > + seq_buf_printf(&s, " N%d=%llu", nid, > + (u64)memcg_node_page_state(memcg, nid, > + NR_ANON_MAPPED) * > + PAGE_SIZE); > +#endif > + seq_buf_putc(&s, '\n'); > + > + seq_buf_printf(&s, "file %llu", > (u64)memcg_page_state(memcg, NR_FILE_PAGES) * > PAGE_SIZE); > +#ifdef CONFIG_NUMA > + for_each_node_state(nid, N_MEMORY) > + seq_buf_printf(&s, " N%d=%llu", nid, > + (u64)memcg_node_page_state(memcg, nid, > + NR_FILE_PAGES) * > + PAGE_SIZE); > +#endif > + seq_buf_putc(&s, '\n'); > + The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES? Also I think exposing slab_[un]reclaimable per node would be beneficial as well. > seq_buf_printf(&s, "kernel_stack %llu\n", > (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * > 1024); > -- > 2.20.1 >
On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote: > > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote: > > > > In the cgroup v1, we have a numa_stat interface. This is useful for > > providing visibility into the numa locality information within an > > memcg since the pages are allowed to be allocated from any physical > > node. One of the use cases is evaluating application performance by > > combining this information with the application's CPU allocation. > > But the cgroup v2 does not. So this patch adds the missing information. > > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > > --- > > I am actually working on exposing this info on v2 as well. > > > mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 44 insertions(+), 2 deletions(-) > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > index 75cd1a1e66c8..c779673f29b2 100644 > > --- a/mm/memcontrol.c > > +++ b/mm/memcontrol.c > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) > > return false; > > } > > > > +#ifdef CONFIG_NUMA > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg, > > + unsigned int nid, > > + enum node_stat_item idx) > > +{ > > + long x; > > + struct mem_cgroup_per_node *pn; > > + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); > > + > > + VM_BUG_ON(nid >= nr_node_ids); > > + > > + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > > + x = atomic_long_read(&pn->lruvec_stat[idx]); > > +#ifdef CONFIG_SMP > > + if (x < 0) > > + x = 0; > > +#endif > > + return x; > > +} > > +#endif > > + > > static char *memory_stat_format(struct mem_cgroup *memcg) > > { > > struct seq_buf s; > > int i; > > +#ifdef CONFIG_NUMA > > + int nid; > > +#endif > > > > seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); > > if (!s.buffer) > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > > * Current memory state: > > */ > > > > Let's not break the parsers of memory.stat. I would prefer a separate > interface like v1 i.e. memory.numa_stat. It is also a good idea to expose a new interface like memory.numa_stat. > > > - seq_buf_printf(&s, "anon %llu\n", > > + seq_buf_printf(&s, "anon %llu", > > (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * > > PAGE_SIZE); > > - seq_buf_printf(&s, "file %llu\n", > > +#ifdef CONFIG_NUMA > > + for_each_node_state(nid, N_MEMORY) > > + seq_buf_printf(&s, " N%d=%llu", nid, > > + (u64)memcg_node_page_state(memcg, nid, > > + NR_ANON_MAPPED) * > > + PAGE_SIZE); > > +#endif > > + seq_buf_putc(&s, '\n'); > > + > > + seq_buf_printf(&s, "file %llu", > > (u64)memcg_page_state(memcg, NR_FILE_PAGES) * > > PAGE_SIZE); > > +#ifdef CONFIG_NUMA > > + for_each_node_state(nid, N_MEMORY) > > + seq_buf_printf(&s, " N%d=%llu", nid, > > + (u64)memcg_node_page_state(memcg, nid, > > + NR_FILE_PAGES) * > > + PAGE_SIZE); > > +#endif > > + seq_buf_putc(&s, '\n'); > > + > > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES? If we want to expose the anon per node, we need to add inactive anon and active anon together. Why not use NR_ANON_MAPPED directly? > > Also I think exposing slab_[un]reclaimable per node would be beneficial as well. Yeah, I agree with you. Maybe kernel_stack and percpu also should be exposed. > > > seq_buf_printf(&s, "kernel_stack %llu\n", > > (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * > > 1024); > > -- > > 2.20.1 > >
On Thu, Sep 10, 2020 at 8:52 PM Muchun Song <songmuchun@bytedance.com> wrote: > > On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote: > > > > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote: > > > > > > In the cgroup v1, we have a numa_stat interface. This is useful for > > > providing visibility into the numa locality information within an > > > memcg since the pages are allowed to be allocated from any physical > > > node. One of the use cases is evaluating application performance by > > > combining this information with the application's CPU allocation. > > > But the cgroup v2 does not. So this patch adds the missing information. > > > > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > > > --- > > > > I am actually working on exposing this info on v2 as well. > > > > > mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- > > > 1 file changed, 44 insertions(+), 2 deletions(-) > > > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > > index 75cd1a1e66c8..c779673f29b2 100644 > > > --- a/mm/memcontrol.c > > > +++ b/mm/memcontrol.c > > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) > > > return false; > > > } > > > > > > +#ifdef CONFIG_NUMA > > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg, > > > + unsigned int nid, > > > + enum node_stat_item idx) > > > +{ > > > + long x; > > > + struct mem_cgroup_per_node *pn; > > > + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); > > > + > > > + VM_BUG_ON(nid >= nr_node_ids); > > > + > > > + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > > > + x = atomic_long_read(&pn->lruvec_stat[idx]); > > > +#ifdef CONFIG_SMP > > > + if (x < 0) > > > + x = 0; > > > +#endif > > > + return x; > > > +} > > > +#endif > > > + > > > static char *memory_stat_format(struct mem_cgroup *memcg) > > > { > > > struct seq_buf s; > > > int i; > > > +#ifdef CONFIG_NUMA > > > + int nid; > > > +#endif > > > > > > seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); > > > if (!s.buffer) > > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > > > * Current memory state: > > > */ > > > > > > > Let's not break the parsers of memory.stat. I would prefer a separate > > interface like v1 i.e. memory.numa_stat. > > It is also a good idea to expose a new interface like memory.numa_stat. > > > > > > - seq_buf_printf(&s, "anon %llu\n", > > > + seq_buf_printf(&s, "anon %llu", > > > (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * > > > PAGE_SIZE); > > > - seq_buf_printf(&s, "file %llu\n", > > > +#ifdef CONFIG_NUMA > > > + for_each_node_state(nid, N_MEMORY) > > > + seq_buf_printf(&s, " N%d=%llu", nid, > > > + (u64)memcg_node_page_state(memcg, nid, > > > + NR_ANON_MAPPED) * > > > + PAGE_SIZE); > > > +#endif > > > + seq_buf_putc(&s, '\n'); > > > + > > > + seq_buf_printf(&s, "file %llu", > > > (u64)memcg_page_state(memcg, NR_FILE_PAGES) * > > > PAGE_SIZE); > > > +#ifdef CONFIG_NUMA > > > + for_each_node_state(nid, N_MEMORY) > > > + seq_buf_printf(&s, " N%d=%llu", nid, > > > + (u64)memcg_node_page_state(memcg, nid, > > > + NR_FILE_PAGES) * > > > + PAGE_SIZE); > > > +#endif > > > + seq_buf_putc(&s, '\n'); > > > + > > > > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES? > > If we want to expose the anon per node, we need to add inactive anon and > active anon together. Why not use NR_ANON_MAPPED directly? > Active anon plus inactive anon is not equal to NR_ANON_MAPPED. The shmem related memory is on anon LRUs but not accounted in NR_ANON_MAPPED. Similarly file LRU can contain MADV_FREE pages which are not accounted in NR_FILE_PAGES. > > > > Also I think exposing slab_[un]reclaimable per node would be beneficial as well. > > Yeah, I agree with you. Maybe kernel_stack and percpu also should > be exposed. > > > > > > seq_buf_printf(&s, "kernel_stack %llu\n", > > > (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * > > > 1024); > > > -- > > > 2.20.1 > > > > > > > -- > Yours, > Muchun
On Fri, Sep 11, 2020 at 10:55 PM Shakeel Butt <shakeelb@google.com> wrote: > > On Thu, Sep 10, 2020 at 8:52 PM Muchun Song <songmuchun@bytedance.com> wrote: > > > > On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote: > > > > > > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote: > > > > > > > > In the cgroup v1, we have a numa_stat interface. This is useful for > > > > providing visibility into the numa locality information within an > > > > memcg since the pages are allowed to be allocated from any physical > > > > node. One of the use cases is evaluating application performance by > > > > combining this information with the application's CPU allocation. > > > > But the cgroup v2 does not. So this patch adds the missing information. > > > > > > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > > > > --- > > > > > > I am actually working on exposing this info on v2 as well. > > > > > > > mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- > > > > 1 file changed, 44 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > > > index 75cd1a1e66c8..c779673f29b2 100644 > > > > --- a/mm/memcontrol.c > > > > +++ b/mm/memcontrol.c > > > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) > > > > return false; > > > > } > > > > > > > > +#ifdef CONFIG_NUMA > > > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg, > > > > + unsigned int nid, > > > > + enum node_stat_item idx) > > > > +{ > > > > + long x; > > > > + struct mem_cgroup_per_node *pn; > > > > + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); > > > > + > > > > + VM_BUG_ON(nid >= nr_node_ids); > > > > + > > > > + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > > > > + x = atomic_long_read(&pn->lruvec_stat[idx]); > > > > +#ifdef CONFIG_SMP > > > > + if (x < 0) > > > > + x = 0; > > > > +#endif > > > > + return x; > > > > +} > > > > +#endif > > > > + > > > > static char *memory_stat_format(struct mem_cgroup *memcg) > > > > { > > > > struct seq_buf s; > > > > int i; > > > > +#ifdef CONFIG_NUMA > > > > + int nid; > > > > +#endif > > > > > > > > seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); > > > > if (!s.buffer) > > > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > > > > * Current memory state: > > > > */ > > > > > > > > > > Let's not break the parsers of memory.stat. I would prefer a separate > > > interface like v1 i.e. memory.numa_stat. > > > > It is also a good idea to expose a new interface like memory.numa_stat. > > > > > > > > > - seq_buf_printf(&s, "anon %llu\n", > > > > + seq_buf_printf(&s, "anon %llu", > > > > (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * > > > > PAGE_SIZE); > > > > - seq_buf_printf(&s, "file %llu\n", > > > > +#ifdef CONFIG_NUMA > > > > + for_each_node_state(nid, N_MEMORY) > > > > + seq_buf_printf(&s, " N%d=%llu", nid, > > > > + (u64)memcg_node_page_state(memcg, nid, > > > > + NR_ANON_MAPPED) * > > > > + PAGE_SIZE); > > > > +#endif > > > > + seq_buf_putc(&s, '\n'); > > > > + > > > > + seq_buf_printf(&s, "file %llu", > > > > (u64)memcg_page_state(memcg, NR_FILE_PAGES) * > > > > PAGE_SIZE); > > > > +#ifdef CONFIG_NUMA > > > > + for_each_node_state(nid, N_MEMORY) > > > > + seq_buf_printf(&s, " N%d=%llu", nid, > > > > + (u64)memcg_node_page_state(memcg, nid, > > > > + NR_FILE_PAGES) * > > > > + PAGE_SIZE); > > > > +#endif > > > > + seq_buf_putc(&s, '\n'); > > > > + > > > > > > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES? > > > > If we want to expose the anon per node, we need to add inactive anon and > > active anon together. Why not use NR_ANON_MAPPED directly? > > > > Active anon plus inactive anon is not equal to NR_ANON_MAPPED. The > shmem related memory is on anon LRUs but not accounted in > NR_ANON_MAPPED. > > Similarly file LRU can contain MADV_FREE pages which are not accounted > in NR_FILE_PAGES. I got it, thanks. Because the "state" interface exposes the anon and file information. So I think that we also should expose the anon and file for "numa_stat" per node instead of the lru statistics. Maybe it is better that we expose both of all the information. > > > > > > > Also I think exposing slab_[un]reclaimable per node would be beneficial as well. > > > > Yeah, I agree with you. Maybe kernel_stack and percpu also should > > be exposed. > > > > > > > > > seq_buf_printf(&s, "kernel_stack %llu\n", > > > > (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * > > > > 1024); > > > > -- > > > > 2.20.1 > > > > > > > > > > > > -- > > Yours, > > Muchun
On Fri, Sep 11, 2020 at 8:48 AM Muchun Song <songmuchun@bytedance.com> wrote: > [snip] > > I got it, thanks. Because the "state" interface exposes the anon and > file information. So I think that we also should expose the anon and > file for "numa_stat" per node instead of the lru statistics. Maybe it is > better that we expose both of all the information. > Sure, go ahead and please do update the doc file as well in the next version.
On Fri, Sep 11, 2020 at 11:51:42AM +0800, Muchun Song wrote: > On Fri, Sep 11, 2020 at 12:02 AM Shakeel Butt <shakeelb@google.com> wrote: > > > > On Thu, Sep 10, 2020 at 1:46 AM Muchun Song <songmuchun@bytedance.com> wrote: > > > > > > In the cgroup v1, we have a numa_stat interface. This is useful for > > > providing visibility into the numa locality information within an > > > memcg since the pages are allowed to be allocated from any physical > > > node. One of the use cases is evaluating application performance by > > > combining this information with the application's CPU allocation. > > > But the cgroup v2 does not. So this patch adds the missing information. > > > > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > > > --- > > > > I am actually working on exposing this info on v2 as well. > > > > > mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- > > > 1 file changed, 44 insertions(+), 2 deletions(-) > > > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > > index 75cd1a1e66c8..c779673f29b2 100644 > > > --- a/mm/memcontrol.c > > > +++ b/mm/memcontrol.c > > > @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) > > > return false; > > > } > > > > > > +#ifdef CONFIG_NUMA > > > +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg, > > > + unsigned int nid, > > > + enum node_stat_item idx) > > > +{ > > > + long x; > > > + struct mem_cgroup_per_node *pn; > > > + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); > > > + > > > + VM_BUG_ON(nid >= nr_node_ids); > > > + > > > + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > > > + x = atomic_long_read(&pn->lruvec_stat[idx]); > > > +#ifdef CONFIG_SMP > > > + if (x < 0) > > > + x = 0; > > > +#endif > > > + return x; > > > +} > > > +#endif > > > + > > > static char *memory_stat_format(struct mem_cgroup *memcg) > > > { > > > struct seq_buf s; > > > int i; > > > +#ifdef CONFIG_NUMA > > > + int nid; > > > +#endif > > > > > > seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); > > > if (!s.buffer) > > > @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > > > * Current memory state: > > > */ > > > > > > > Let's not break the parsers of memory.stat. I would prefer a separate > > interface like v1 i.e. memory.numa_stat. > > It is also a good idea to expose a new interface like memory.numa_stat. > > > > > > - seq_buf_printf(&s, "anon %llu\n", > > > + seq_buf_printf(&s, "anon %llu", > > > (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * > > > PAGE_SIZE); > > > - seq_buf_printf(&s, "file %llu\n", > > > +#ifdef CONFIG_NUMA > > > + for_each_node_state(nid, N_MEMORY) > > > + seq_buf_printf(&s, " N%d=%llu", nid, > > > + (u64)memcg_node_page_state(memcg, nid, > > > + NR_ANON_MAPPED) * > > > + PAGE_SIZE); > > > +#endif > > > + seq_buf_putc(&s, '\n'); > > > + > > > + seq_buf_printf(&s, "file %llu", > > > (u64)memcg_page_state(memcg, NR_FILE_PAGES) * > > > PAGE_SIZE); > > > +#ifdef CONFIG_NUMA > > > + for_each_node_state(nid, N_MEMORY) > > > + seq_buf_printf(&s, " N%d=%llu", nid, > > > + (u64)memcg_node_page_state(memcg, nid, > > > + NR_FILE_PAGES) * > > > + PAGE_SIZE); > > > +#endif > > > + seq_buf_putc(&s, '\n'); > > > + > > > > The v1's numa_stat exposes the LRUs, why NR_ANON_MAPPED and NR_FILE_PAGES? > > If we want to expose the anon per node, we need to add inactive anon and > active anon together. Why not use NR_ANON_MAPPED directly? > > > > > Also I think exposing slab_[un]reclaimable per node would be beneficial as well. > > Yeah, I agree with you. Maybe kernel_stack and percpu also should > be exposed. Percpu allocations are usually spread over multiple pages and numa nodes, so there are no per-node pepcpu counters. Thanks! > > > > > > seq_buf_printf(&s, "kernel_stack %llu\n", > > > (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * > > > 1024); > > > -- > > > 2.20.1 > > > > > > > -- > Yours, > Muchun
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75cd1a1e66c8..c779673f29b2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1492,10 +1492,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) return false; } +#ifdef CONFIG_NUMA +static unsigned long memcg_node_page_state(struct mem_cgroup *memcg, + unsigned int nid, + enum node_stat_item idx) +{ + long x; + struct mem_cgroup_per_node *pn; + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); + + VM_BUG_ON(nid >= nr_node_ids); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + x = atomic_long_read(&pn->lruvec_stat[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; +} +#endif + static char *memory_stat_format(struct mem_cgroup *memcg) { struct seq_buf s; int i; +#ifdef CONFIG_NUMA + int nid; +#endif seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); if (!s.buffer) @@ -1512,12 +1536,30 @@ static char *memory_stat_format(struct mem_cgroup *memcg) * Current memory state: */ - seq_buf_printf(&s, "anon %llu\n", + seq_buf_printf(&s, "anon %llu", (u64)memcg_page_state(memcg, NR_ANON_MAPPED) * PAGE_SIZE); - seq_buf_printf(&s, "file %llu\n", +#ifdef CONFIG_NUMA + for_each_node_state(nid, N_MEMORY) + seq_buf_printf(&s, " N%d=%llu", nid, + (u64)memcg_node_page_state(memcg, nid, + NR_ANON_MAPPED) * + PAGE_SIZE); +#endif + seq_buf_putc(&s, '\n'); + + seq_buf_printf(&s, "file %llu", (u64)memcg_page_state(memcg, NR_FILE_PAGES) * PAGE_SIZE); +#ifdef CONFIG_NUMA + for_each_node_state(nid, N_MEMORY) + seq_buf_printf(&s, " N%d=%llu", nid, + (u64)memcg_node_page_state(memcg, nid, + NR_FILE_PAGES) * + PAGE_SIZE); +#endif + seq_buf_putc(&s, '\n'); + seq_buf_printf(&s, "kernel_stack %llu\n", (u64)memcg_page_state(memcg, NR_KERNEL_STACK_KB) * 1024);
In the cgroup v1, we have a numa_stat interface. This is useful for providing visibility into the numa locality information within an memcg since the pages are allowed to be allocated from any physical node. One of the use cases is evaluating application performance by combining this information with the application's CPU allocation. But the cgroup v2 does not. So this patch adds the missing information. Signed-off-by: Muchun Song <songmuchun@bytedance.com> --- mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-)