[v1,3/5] mm: memcg/percpu: per-memcg percpu memory statistics
diff mbox series

Message ID 20200528232508.1132382-4-guro@fb.com
State New
Headers show
Series
  • mm: memcg accounting of percpu memory
Related show

Commit Message

Roman Gushchin May 28, 2020, 11:25 p.m. UTC
Percpu memory can represent a noticeable chunk of the total
memory consumption, especially on big machines with many CPUs.
Let's track percpu memory usage for each memcg and display
it in memory.stat.

A percpu allocation is usually scattered over multiple pages
(and nodes), and can be significantly smaller than a page.
So let's add a byte-sized counter on the memcg level:
MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs
can be perfectly reused for percpu case.

Signed-off-by: Roman Gushchin <guro@fb.com>
---
 Documentation/admin-guide/cgroup-v2.rst |  4 ++++
 include/linux/memcontrol.h              |  8 ++++++++
 mm/memcontrol.c                         |  4 +++-
 mm/percpu.c                             | 10 ++++++++++
 4 files changed, 25 insertions(+), 1 deletion(-)

Comments

Dennis Zhou June 5, 2020, 7:53 p.m. UTC | #1
On Thu, May 28, 2020 at 04:25:06PM -0700, Roman Gushchin wrote:
> Percpu memory can represent a noticeable chunk of the total
> memory consumption, especially on big machines with many CPUs.
> Let's track percpu memory usage for each memcg and display
> it in memory.stat.
> 
> A percpu allocation is usually scattered over multiple pages
> (and nodes), and can be significantly smaller than a page.
> So let's add a byte-sized counter on the memcg level:
> MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs
> can be perfectly reused for percpu case.
> 
> Signed-off-by: Roman Gushchin <guro@fb.com>
> ---
>  Documentation/admin-guide/cgroup-v2.rst |  4 ++++
>  include/linux/memcontrol.h              |  8 ++++++++
>  mm/memcontrol.c                         |  4 +++-
>  mm/percpu.c                             | 10 ++++++++++
>  4 files changed, 25 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index fed4e1d2a343..aa8cb6dadadc 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1276,6 +1276,10 @@ PAGE_SIZE multiple when read back.
>  		Amount of memory used for storing in-kernel data
>  		structures.
>  
> +	  percpu
> +		Amount of memory used for storing per-cpu kernel
> +		data structures.
> +
>  	  sock
>  		Amount of memory used in network transmission buffers
>  
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 7a84d9164449..f62a95d472f7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -32,11 +32,19 @@ struct kmem_cache;
>  enum memcg_stat_item {
>  	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
>  	MEMCG_SOCK,
> +	MEMCG_PERCPU_B,
>  	/* XXX: why are these zone and not node counters? */
>  	MEMCG_KERNEL_STACK_KB,
>  	MEMCG_NR_STAT,
>  };
>  
> +static __always_inline bool memcg_stat_item_in_bytes(enum memcg_stat_item item)
> +{
> +	if (item == MEMCG_PERCPU_B)
> +		return true;
> +	return vmstat_item_in_bytes(item);
> +}
> +
>  enum memcg_memory_event {
>  	MEMCG_LOW,
>  	MEMCG_HIGH,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 7bc3fd196210..5007d1585a4a 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -783,7 +783,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
>  	if (mem_cgroup_disabled())
>  		return;
>  
> -	if (vmstat_item_in_bytes(idx))
> +	if (memcg_stat_item_in_bytes(idx))
>  		threshold <<= PAGE_SHIFT;
>  
>  	x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
> @@ -1490,6 +1490,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
>  	seq_buf_printf(&s, "slab %llu\n",
>  		       (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
>  			     memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B)));
> +	seq_buf_printf(&s, "percpu %llu\n",
> +		       (u64)memcg_page_state(memcg, MEMCG_PERCPU_B));
>  	seq_buf_printf(&s, "sock %llu\n",
>  		       (u64)memcg_page_state(memcg, MEMCG_SOCK) *
>  		       PAGE_SIZE);
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 85f5755c9114..b4b3e9c8a6d1 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1608,6 +1608,11 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
>  
>  	if (chunk) {
>  		chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
> +
> +		rcu_read_lock();
> +		mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> +				size * num_possible_cpus());
> +		rcu_read_unlock();
>  	} else {
>  		obj_cgroup_uncharge(objcg, size * num_possible_cpus());
>  		obj_cgroup_put(objcg);
> @@ -1626,6 +1631,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
>  
>  	obj_cgroup_uncharge(objcg, size * num_possible_cpus());
>  
> +	rcu_read_lock();
> +	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> +			-(size * num_possible_cpus()));
> +	rcu_read_unlock();
> +
>  	obj_cgroup_put(objcg);
>  }
>  
> -- 
> 2.25.4
> 

Acked-by: Dennis Zhou <dennis@kernel.org>

Thanks,
Dennis

Patch
diff mbox series

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index fed4e1d2a343..aa8cb6dadadc 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1276,6 +1276,10 @@  PAGE_SIZE multiple when read back.
 		Amount of memory used for storing in-kernel data
 		structures.
 
+	  percpu
+		Amount of memory used for storing per-cpu kernel
+		data structures.
+
 	  sock
 		Amount of memory used in network transmission buffers
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7a84d9164449..f62a95d472f7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,11 +32,19 @@  struct kmem_cache;
 enum memcg_stat_item {
 	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
 	MEMCG_SOCK,
+	MEMCG_PERCPU_B,
 	/* XXX: why are these zone and not node counters? */
 	MEMCG_KERNEL_STACK_KB,
 	MEMCG_NR_STAT,
 };
 
+static __always_inline bool memcg_stat_item_in_bytes(enum memcg_stat_item item)
+{
+	if (item == MEMCG_PERCPU_B)
+		return true;
+	return vmstat_item_in_bytes(item);
+}
+
 enum memcg_memory_event {
 	MEMCG_LOW,
 	MEMCG_HIGH,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7bc3fd196210..5007d1585a4a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -783,7 +783,7 @@  void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 	if (mem_cgroup_disabled())
 		return;
 
-	if (vmstat_item_in_bytes(idx))
+	if (memcg_stat_item_in_bytes(idx))
 		threshold <<= PAGE_SHIFT;
 
 	x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
@@ -1490,6 +1490,8 @@  static char *memory_stat_format(struct mem_cgroup *memcg)
 	seq_buf_printf(&s, "slab %llu\n",
 		       (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
 			     memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B)));
+	seq_buf_printf(&s, "percpu %llu\n",
+		       (u64)memcg_page_state(memcg, MEMCG_PERCPU_B));
 	seq_buf_printf(&s, "sock %llu\n",
 		       (u64)memcg_page_state(memcg, MEMCG_SOCK) *
 		       PAGE_SIZE);
diff --git a/mm/percpu.c b/mm/percpu.c
index 85f5755c9114..b4b3e9c8a6d1 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1608,6 +1608,11 @@  static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
 
 	if (chunk) {
 		chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
+
+		rcu_read_lock();
+		mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
+				size * num_possible_cpus());
+		rcu_read_unlock();
 	} else {
 		obj_cgroup_uncharge(objcg, size * num_possible_cpus());
 		obj_cgroup_put(objcg);
@@ -1626,6 +1631,11 @@  static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
 
 	obj_cgroup_uncharge(objcg, size * num_possible_cpus());
 
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
+			-(size * num_possible_cpus()));
+	rcu_read_unlock();
+
 	obj_cgroup_put(objcg);
 }