diff mbox series

mm, memcg: Report number of memcg caches in slabinfo

Message ID 20190617142149.5245-1-longman@redhat.com (mailing list archive)
State New, archived
Headers show
Series mm, memcg: Report number of memcg caches in slabinfo | expand

Commit Message

Waiman Long June 17, 2019, 2:21 p.m. UTC
There are concerns about memory leaks from extensive use of memory
cgroups as each memory cgroup creates its own set of kmem caches. There
is a possiblity that the memcg kmem caches may remain even after the
memory cgroup removal.

Therefore, it will be useful to show how many memcg caches are present
for each of the kmem caches. As slabinfo reporting code has to iterate
through all the memcg caches to get the final numbers anyway, there is
no additional cost in reporting the number of memcg caches available.

The slabinfo version is bumped up to 2.2 as a new "<num_caches>" column
is added at the end.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 mm/slab_common.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

Comments

Michal Hocko June 17, 2019, 2:38 p.m. UTC | #1
[Cc linux-api]

On Mon 17-06-19 10:21:49, Waiman Long wrote:
> There are concerns about memory leaks from extensive use of memory
> cgroups as each memory cgroup creates its own set of kmem caches. There
> is a possiblity that the memcg kmem caches may remain even after the
> memory cgroup removal.
> 
> Therefore, it will be useful to show how many memcg caches are present
> for each of the kmem caches.

How is a user going to use that information?  Btw. Don't we have an
interface to display the number of (dead) cgroups?

Keeping the rest of the email for the reference.

> As slabinfo reporting code has to iterate
> through all the memcg caches to get the final numbers anyway, there is
> no additional cost in reporting the number of memcg caches available.
> 
> The slabinfo version is bumped up to 2.2 as a new "<num_caches>" column
> is added at the end.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> ---
>  mm/slab_common.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 58251ba63e4a..c7aa47a99b2b 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1308,13 +1308,13 @@ static void print_slabinfo_header(struct seq_file *m)
>  	 * without _too_ many complaints.
>  	 */
>  #ifdef CONFIG_DEBUG_SLAB
> -	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
> +	seq_puts(m, "slabinfo - version: 2.2 (statistics)\n");
>  #else
> -	seq_puts(m, "slabinfo - version: 2.1\n");
> +	seq_puts(m, "slabinfo - version: 2.2\n");
>  #endif
>  	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
>  	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
> -	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
> +	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail> <num_caches>");
>  #ifdef CONFIG_DEBUG_SLAB
>  	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
>  	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
> @@ -1338,14 +1338,18 @@ void slab_stop(struct seq_file *m, void *p)
>  	mutex_unlock(&slab_mutex);
>  }
>  
> -static void
> +/*
> + * Return number of memcg caches.
> + */
> +static unsigned int
>  memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
>  {
>  	struct kmem_cache *c;
>  	struct slabinfo sinfo;
> +	unsigned int cnt = 0;
>  
>  	if (!is_root_cache(s))
> -		return;
> +		return 0;
>  
>  	for_each_memcg_cache(c, s) {
>  		memset(&sinfo, 0, sizeof(sinfo));
> @@ -1356,17 +1360,20 @@ memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
>  		info->shared_avail += sinfo.shared_avail;
>  		info->active_objs += sinfo.active_objs;
>  		info->num_objs += sinfo.num_objs;
> +		cnt++;
>  	}
> +	return cnt;
>  }
>  
>  static void cache_show(struct kmem_cache *s, struct seq_file *m)
>  {
>  	struct slabinfo sinfo;
> +	unsigned int nr_memcg_caches;
>  
>  	memset(&sinfo, 0, sizeof(sinfo));
>  	get_slabinfo(s, &sinfo);
>  
> -	memcg_accumulate_slabinfo(s, &sinfo);
> +	nr_memcg_caches = memcg_accumulate_slabinfo(s, &sinfo);
>  
>  	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
>  		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
> @@ -1374,8 +1381,9 @@ static void cache_show(struct kmem_cache *s, struct seq_file *m)
>  
>  	seq_printf(m, " : tunables %4u %4u %4u",
>  		   sinfo.limit, sinfo.batchcount, sinfo.shared);
> -	seq_printf(m, " : slabdata %6lu %6lu %6lu",
> -		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
> +	seq_printf(m, " : slabdata %6lu %6lu %6lu %3u",
> +		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail,
> +		   nr_memcg_caches);
>  	slabinfo_show_stats(m, s);
>  	seq_putc(m, '\n');
>  }
> -- 
> 2.18.1
Waiman Long June 17, 2019, 2:50 p.m. UTC | #2
On 6/17/19 10:38 AM, Michal Hocko wrote:
> [Cc linux-api]
>
> On Mon 17-06-19 10:21:49, Waiman Long wrote:
>> There are concerns about memory leaks from extensive use of memory
>> cgroups as each memory cgroup creates its own set of kmem caches. There
>> is a possiblity that the memcg kmem caches may remain even after the
>> memory cgroup removal.
>>
>> Therefore, it will be useful to show how many memcg caches are present
>> for each of the kmem caches.
> How is a user going to use that information?  Btw. Don't we have an
> interface to display the number of (dead) cgroups?

The interface to report dead cgroups is for cgroup v2 (cgroup.stat)
only. I don't think there is a way to find that for cgroup v1. Also the
number of memcg kmem caches may not be the same as the number of
memcg's. It can range from 0 to above the number of memcg's.  So it is
an interesting number by itself.

From the user perspective, if the numbers is way above the number of
memcg's, there is probably something wrong there.

Cheers,
Longman
Michal Hocko June 18, 2019, 12:37 p.m. UTC | #3
On Mon 17-06-19 10:50:23, Waiman Long wrote:
> On 6/17/19 10:38 AM, Michal Hocko wrote:
> > [Cc linux-api]
> >
> > On Mon 17-06-19 10:21:49, Waiman Long wrote:
> >> There are concerns about memory leaks from extensive use of memory
> >> cgroups as each memory cgroup creates its own set of kmem caches. There
> >> is a possiblity that the memcg kmem caches may remain even after the
> >> memory cgroup removal.
> >>
> >> Therefore, it will be useful to show how many memcg caches are present
> >> for each of the kmem caches.
> > How is a user going to use that information?  Btw. Don't we have an
> > interface to display the number of (dead) cgroups?
> 
> The interface to report dead cgroups is for cgroup v2 (cgroup.stat)
> only. I don't think there is a way to find that for cgroup v1.

Doesn't debug_legacy_files provide the information for both cgroups
APIs?

> Also the
> number of memcg kmem caches may not be the same as the number of
> memcg's. It can range from 0 to above the number of memcg's.  So it is
> an interesting number by itself.

Is this useful enough to put into slabinfo? Doesn't this sound more like
a debugfs kinda a thing?

> From the user perspective, if the numbers is way above the number of
> memcg's, there is probably something wrong there.
Waiman Long June 18, 2019, 4:59 p.m. UTC | #4
On 6/18/19 8:37 AM, Michal Hocko wrote:
> On Mon 17-06-19 10:50:23, Waiman Long wrote:
>> On 6/17/19 10:38 AM, Michal Hocko wrote:
>>> [Cc linux-api]
>>>
>>> On Mon 17-06-19 10:21:49, Waiman Long wrote:
>>>> There are concerns about memory leaks from extensive use of memory
>>>> cgroups as each memory cgroup creates its own set of kmem caches. There
>>>> is a possiblity that the memcg kmem caches may remain even after the
>>>> memory cgroup removal.
>>>>
>>>> Therefore, it will be useful to show how many memcg caches are present
>>>> for each of the kmem caches.
>>> How is a user going to use that information?  Btw. Don't we have an
>>> interface to display the number of (dead) cgroups?
>> The interface to report dead cgroups is for cgroup v2 (cgroup.stat)
>> only. I don't think there is a way to find that for cgroup v1.
> Doesn't debug_legacy_files provide the information for both cgroups
> APIs?

Not really. The debug controller doesn't provide information about the
number of dead cgroups, for instance. Of course, we can always add those
information there. Also the debug controller is not typically configured
into a production kernel.


>> Also the
>> number of memcg kmem caches may not be the same as the number of
>> memcg's. It can range from 0 to above the number of memcg's.  So it is
>> an interesting number by itself.
> Is this useful enough to put into slabinfo? Doesn't this sound more like
> a debugfs kinda a thing?

I guess it is probably more on the debug side of things. I add it to
slabinfo as the data is readily available. It will be much more work if
we need to export the data via debugfs.

We are seeing the kmem_cache slab growing continuously overtime when
running a container-based workloads. Roman's kmem_cache reparenting
patch will hopefully solve a major part of the problem, but we still
need a way to confirm that by looking at how many memcg kmem_caches are
associated with each root kmem_cache.

Cheers,
Longman
Michal Hocko June 18, 2019, 6:32 p.m. UTC | #5
On Tue 18-06-19 12:59:24, Waiman Long wrote:
> On 6/18/19 8:37 AM, Michal Hocko wrote:
[...]
> > Is this useful enough to put into slabinfo? Doesn't this sound more like
> > a debugfs kinda a thing?
> 
> I guess it is probably more on the debug side of things. I add it to
> slabinfo as the data is readily available. It will be much more work if
> we need to export the data via debugfs.
> 
> We are seeing the kmem_cache slab growing continuously overtime when
> running a container-based workloads. Roman's kmem_cache reparenting
> patch will hopefully solve a major part of the problem, but we still
> need a way to confirm that by looking at how many memcg kmem_caches are
> associated with each root kmem_cache.

I am not disputing usefulness. Dead memcgs are showing up as a problem
for a longer time and having a more debugging information is definitely
useful. I am just not really sure that /proc/slabinfo is the proper
vehicle for that information. It might be just easier to stick it there
but that is not the best justification for adding something we will have
to maintain for ever. Not to mention that the number of dead memcgs
might not be enough to debug further when we can easily end up needing
to provide more in something less "carved in stone" kinda interface like
debugfs.
Waiman Long June 18, 2019, 7:27 p.m. UTC | #6
On 6/18/19 2:32 PM, Michal Hocko wrote:
> On Tue 18-06-19 12:59:24, Waiman Long wrote:
>> On 6/18/19 8:37 AM, Michal Hocko wrote:
> [...]
>>> Is this useful enough to put into slabinfo? Doesn't this sound more like
>>> a debugfs kinda a thing?
>> I guess it is probably more on the debug side of things. I add it to
>> slabinfo as the data is readily available. It will be much more work if
>> we need to export the data via debugfs.
>>
>> We are seeing the kmem_cache slab growing continuously overtime when
>> running a container-based workloads. Roman's kmem_cache reparenting
>> patch will hopefully solve a major part of the problem, but we still
>> need a way to confirm that by looking at how many memcg kmem_caches are
>> associated with each root kmem_cache.
> I am not disputing usefulness. Dead memcgs are showing up as a problem
> for a longer time and having a more debugging information is definitely
> useful. I am just not really sure that /proc/slabinfo is the proper
> vehicle for that information. It might be just easier to stick it there
> but that is not the best justification for adding something we will have
> to maintain for ever. Not to mention that the number of dead memcgs
> might not be enough to debug further when we can easily end up needing
> to provide more in something less "carved in stone" kinda interface like
> debugfs.
>
Fair enough.

I will rework the patch and expose the information via debugfs then.

Cheers,
Longman
diff mbox series

Patch

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 58251ba63e4a..c7aa47a99b2b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1308,13 +1308,13 @@  static void print_slabinfo_header(struct seq_file *m)
 	 * without _too_ many complaints.
 	 */
 #ifdef CONFIG_DEBUG_SLAB
-	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
+	seq_puts(m, "slabinfo - version: 2.2 (statistics)\n");
 #else
-	seq_puts(m, "slabinfo - version: 2.1\n");
+	seq_puts(m, "slabinfo - version: 2.2\n");
 #endif
 	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
 	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
-	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
+	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail> <num_caches>");
 #ifdef CONFIG_DEBUG_SLAB
 	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
 	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
@@ -1338,14 +1338,18 @@  void slab_stop(struct seq_file *m, void *p)
 	mutex_unlock(&slab_mutex);
 }
 
-static void
+/*
+ * Return number of memcg caches.
+ */
+static unsigned int
 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
 {
 	struct kmem_cache *c;
 	struct slabinfo sinfo;
+	unsigned int cnt = 0;
 
 	if (!is_root_cache(s))
-		return;
+		return 0;
 
 	for_each_memcg_cache(c, s) {
 		memset(&sinfo, 0, sizeof(sinfo));
@@ -1356,17 +1360,20 @@  memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
 		info->shared_avail += sinfo.shared_avail;
 		info->active_objs += sinfo.active_objs;
 		info->num_objs += sinfo.num_objs;
+		cnt++;
 	}
+	return cnt;
 }
 
 static void cache_show(struct kmem_cache *s, struct seq_file *m)
 {
 	struct slabinfo sinfo;
+	unsigned int nr_memcg_caches;
 
 	memset(&sinfo, 0, sizeof(sinfo));
 	get_slabinfo(s, &sinfo);
 
-	memcg_accumulate_slabinfo(s, &sinfo);
+	nr_memcg_caches = memcg_accumulate_slabinfo(s, &sinfo);
 
 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
 		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
@@ -1374,8 +1381,9 @@  static void cache_show(struct kmem_cache *s, struct seq_file *m)
 
 	seq_printf(m, " : tunables %4u %4u %4u",
 		   sinfo.limit, sinfo.batchcount, sinfo.shared);
-	seq_printf(m, " : slabdata %6lu %6lu %6lu",
-		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
+	seq_printf(m, " : slabdata %6lu %6lu %6lu %3u",
+		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail,
+		   nr_memcg_caches);
 	slabinfo_show_stats(m, s);
 	seq_putc(m, '\n');
 }