diff mbox series

[v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs

Message ID 1617712064-12264-1-git-send-email-faiyazm@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series [v3] mm: slub: move sysfs slab alloc/free interfaces to debugfs | expand

Commit Message

Faiyaz Mohammed April 6, 2021, 12:27 p.m. UTC
alloc_calls and free_calls implementation in sysfs have two issues,
one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
to "one value per file" rule.

To overcome this issues, move the alloc_calls and free_calls implemeation
to debugfs.

Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
---
 mm/slub.c | 518 ++++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 286 insertions(+), 232 deletions(-)

Comments

kernel test robot April 6, 2021, 3:03 p.m. UTC | #1
Hi Faiyaz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.12-rc6]
[cannot apply to hnaz-linux-mm/master next-20210406]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Faiyaz-Mohammed/mm-slub-move-sysfs-slab-alloc-free-interfaces-to-debugfs/20210406-202954
base:    e49d033bddf5b565044e2abe4241353959bc9120
config: mips-ath25_defconfig (attached as .config)
compiler: mips-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/00c0bfb05271bdd3e80f4e9c62da9ce5287b8f93
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Faiyaz-Mohammed/mm-slub-move-sysfs-slab-alloc-free-interfaces-to-debugfs/20210406-202954
        git checkout 00c0bfb05271bdd3e80f4e9c62da9ce5287b8f93
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mips-linux-ld: mm/slub.o: in function `__kmem_cache_create':
>> slub.c:(.text+0x2c78): undefined reference to `debugfs_slab_add'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot April 6, 2021, 4:26 p.m. UTC | #2
Hi Faiyaz,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.12-rc6]
[cannot apply to hnaz-linux-mm/master next-20210406]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Faiyaz-Mohammed/mm-slub-move-sysfs-slab-alloc-free-interfaces-to-debugfs/20210406-202954
base:    e49d033bddf5b565044e2abe4241353959bc9120
config: sh-randconfig-c024-20210406 (attached as .config)
compiler: sh4-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/00c0bfb05271bdd3e80f4e9c62da9ce5287b8f93
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Faiyaz-Mohammed/mm-slub-move-sysfs-slab-alloc-free-interfaces-to-debugfs/20210406-202954
        git checkout 00c0bfb05271bdd3e80f4e9c62da9ce5287b8f93
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=sh 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/printk.h:6,
                    from include/linux/kernel.h:16,
                    from include/asm-generic/bug.h:20,
                    from arch/sh/include/asm/bug.h:112,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/slub.c:13:
>> mm/slub.c:5869:12: error: initialization of 'initcall_t' {aka 'int (*)(void)'} from incompatible pointer type 'void (*)(void)' [-Werror=incompatible-pointer-types]
    5869 | __initcall(slab_debugfs_init);
         |            ^~~~~~~~~~~~~~~~~
   include/linux/init.h:249:41: note: in definition of macro '____define_initcall'
     249 |   __attribute__((__section__(__sec))) = fn;
         |                                         ^~
   include/linux/init.h:259:2: note: in expansion of macro '__unique_initcall'
     259 |  __unique_initcall(fn, id, __sec, __initcall_id(fn))
         |  ^~~~~~~~~~~~~~~~~
   include/linux/init.h:261:35: note: in expansion of macro '___define_initcall'
     261 | #define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
         |                                   ^~~~~~~~~~~~~~~~~~
   include/linux/init.h:290:30: note: in expansion of macro '__define_initcall'
     290 | #define device_initcall(fn)  __define_initcall(fn, 6)
         |                              ^~~~~~~~~~~~~~~~~
   include/linux/init.h:295:24: note: in expansion of macro 'device_initcall'
     295 | #define __initcall(fn) device_initcall(fn)
         |                        ^~~~~~~~~~~~~~~
   mm/slub.c:5869:1: note: in expansion of macro '__initcall'
    5869 | __initcall(slab_debugfs_init);
         | ^~~~~~~~~~
   cc1: some warnings being treated as errors


vim +5869 mm/slub.c

  5856	
  5857	static void __init slab_debugfs_init(void)
  5858	{
  5859		struct kmem_cache *s;
  5860	
  5861		slab_debugfs_root = debugfs_create_dir("slab", NULL);
  5862		if (!IS_ERR(slab_debugfs_root))
  5863			list_for_each_entry(s, &slab_caches, list)
  5864				debugfs_slab_add(s);
  5865		else
  5866			pr_err("Cannot create slab debugfs.\n");
  5867	
  5868	}
> 5869	__initcall(slab_debugfs_init);
  5870	#endif
  5871	/*
  5872	 * The /proc/slabinfo ABI
  5873	 */
  5874	#ifdef CONFIG_SLUB_DEBUG
  5875	void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
  5876	{
  5877		unsigned long nr_slabs = 0;
  5878		unsigned long nr_objs = 0;
  5879		unsigned long nr_free = 0;
  5880		int node;
  5881		struct kmem_cache_node *n;
  5882	
  5883		for_each_kmem_cache_node(s, node, n) {
  5884			nr_slabs += node_nr_slabs(n);
  5885			nr_objs += node_nr_objs(n);
  5886			nr_free += count_partial(n, count_free);
  5887		}
  5888	
  5889		sinfo->active_objs = nr_objs - nr_free;
  5890		sinfo->num_objs = nr_objs;
  5891		sinfo->active_slabs = nr_slabs;
  5892		sinfo->num_slabs = nr_slabs;
  5893		sinfo->objects_per_slab = oo_objects(s->oo);
  5894		sinfo->cache_order = oo_order(s->oo);
  5895	}
  5896	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Vlastimil Babka April 6, 2021, 5:15 p.m. UTC | #3
On 4/6/21 2:27 PM, Faiyaz Mohammed wrote:
> alloc_calls and free_calls implementation in sysfs have two issues,
> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
> to "one value per file" rule.
> 
> To overcome this issues, move the alloc_calls and free_calls implemeation
> to debugfs.
> 
> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>

Good direction, thanks. But I'm afraid we need a bit more:

- I don't see debugfs_remove() (or _recursive) used anywhere. When a cache is
destroyed, do the dirs/files just linger in debugfs referencing removed
kmem_cache objects?
- There's a simple debugfs_create_dir(s->name, ...), for each cache while the
sysfs variant handles merged caches with symlinks etc. For consistency, the
hiearchy should look the same in debugfs as it does in sysfs.

Also could we avoid shifting the tracking code around mm/slub.c by leaving it in
place and adding a nested #ifdef CONFIG_DEBUG_FS where needed?

And possibly we could leave the old files around, but their content would be
something along "Deprecated, use the equvalent under <debugfs>/slab/" ? We'll
see if anyone complains. We can remove them completely after few years.

Thanks!
Vlastimil

> ---
>  mm/slub.c | 518 ++++++++++++++++++++++++++++++++++----------------------------
>  1 file changed, 286 insertions(+), 232 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 3021ce9..4d20ee0 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -36,6 +36,7 @@
>  #include <linux/memcontrol.h>
>  #include <linux/random.h>
>  
> +#include <linux/debugfs.h>
>  #include <trace/events/kmem.h>
>  
>  #include "internal.h"
> @@ -225,6 +226,12 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
>  							{ return 0; }
>  #endif
>  
> +#ifdef CONFIG_DEBUG_FS
> +static void debugfs_slab_add(struct kmem_cache *);
> +#else
> +static inline void debugfs_slab_add(struct kmem_cache *) { }
> +#endif
> +
>  static inline void stat(const struct kmem_cache *s, enum stat_item si)
>  {
>  #ifdef CONFIG_SLUB_STATS
> @@ -4542,6 +4549,8 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
>  	if (err)
>  		__kmem_cache_release(s);
>  
> +	debugfs_slab_add(s);
> +
>  	return err;
>  }
>  
> @@ -4682,221 +4691,6 @@ static long validate_slab_cache(struct kmem_cache *s)
>  
>  	return count;
>  }
> -/*
> - * Generate lists of code addresses where slabcache objects are allocated
> - * and freed.
> - */
> -
> -struct location {
> -	unsigned long count;
> -	unsigned long addr;
> -	long long sum_time;
> -	long min_time;
> -	long max_time;
> -	long min_pid;
> -	long max_pid;
> -	DECLARE_BITMAP(cpus, NR_CPUS);
> -	nodemask_t nodes;
> -};
> -
> -struct loc_track {
> -	unsigned long max;
> -	unsigned long count;
> -	struct location *loc;
> -};
> -
> -static void free_loc_track(struct loc_track *t)
> -{
> -	if (t->max)
> -		free_pages((unsigned long)t->loc,
> -			get_order(sizeof(struct location) * t->max));
> -}
> -
> -static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
> -{
> -	struct location *l;
> -	int order;
> -
> -	order = get_order(sizeof(struct location) * max);
> -
> -	l = (void *)__get_free_pages(flags, order);
> -	if (!l)
> -		return 0;
> -
> -	if (t->count) {
> -		memcpy(l, t->loc, sizeof(struct location) * t->count);
> -		free_loc_track(t);
> -	}
> -	t->max = max;
> -	t->loc = l;
> -	return 1;
> -}
> -
> -static int add_location(struct loc_track *t, struct kmem_cache *s,
> -				const struct track *track)
> -{
> -	long start, end, pos;
> -	struct location *l;
> -	unsigned long caddr;
> -	unsigned long age = jiffies - track->when;
> -
> -	start = -1;
> -	end = t->count;
> -
> -	for ( ; ; ) {
> -		pos = start + (end - start + 1) / 2;
> -
> -		/*
> -		 * There is nothing at "end". If we end up there
> -		 * we need to add something to before end.
> -		 */
> -		if (pos == end)
> -			break;
> -
> -		caddr = t->loc[pos].addr;
> -		if (track->addr == caddr) {
> -
> -			l = &t->loc[pos];
> -			l->count++;
> -			if (track->when) {
> -				l->sum_time += age;
> -				if (age < l->min_time)
> -					l->min_time = age;
> -				if (age > l->max_time)
> -					l->max_time = age;
> -
> -				if (track->pid < l->min_pid)
> -					l->min_pid = track->pid;
> -				if (track->pid > l->max_pid)
> -					l->max_pid = track->pid;
> -
> -				cpumask_set_cpu(track->cpu,
> -						to_cpumask(l->cpus));
> -			}
> -			node_set(page_to_nid(virt_to_page(track)), l->nodes);
> -			return 1;
> -		}
> -
> -		if (track->addr < caddr)
> -			end = pos;
> -		else
> -			start = pos;
> -	}
> -
> -	/*
> -	 * Not found. Insert new tracking element.
> -	 */
> -	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
> -		return 0;
> -
> -	l = t->loc + pos;
> -	if (pos < t->count)
> -		memmove(l + 1, l,
> -			(t->count - pos) * sizeof(struct location));
> -	t->count++;
> -	l->count = 1;
> -	l->addr = track->addr;
> -	l->sum_time = age;
> -	l->min_time = age;
> -	l->max_time = age;
> -	l->min_pid = track->pid;
> -	l->max_pid = track->pid;
> -	cpumask_clear(to_cpumask(l->cpus));
> -	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
> -	nodes_clear(l->nodes);
> -	node_set(page_to_nid(virt_to_page(track)), l->nodes);
> -	return 1;
> -}
> -
> -static void process_slab(struct loc_track *t, struct kmem_cache *s,
> -		struct page *page, enum track_item alloc)
> -{
> -	void *addr = page_address(page);
> -	void *p;
> -	unsigned long *map;
> -
> -	map = get_map(s, page);
> -	for_each_object(p, s, addr, page->objects)
> -		if (!test_bit(__obj_to_index(s, addr, p), map))
> -			add_location(t, s, get_track(s, p, alloc));
> -	put_map(map);
> -}
> -
> -static int list_locations(struct kmem_cache *s, char *buf,
> -			  enum track_item alloc)
> -{
> -	int len = 0;
> -	unsigned long i;
> -	struct loc_track t = { 0, 0, NULL };
> -	int node;
> -	struct kmem_cache_node *n;
> -
> -	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
> -			     GFP_KERNEL)) {
> -		return sysfs_emit(buf, "Out of memory\n");
> -	}
> -	/* Push back cpu slabs */
> -	flush_all(s);
> -
> -	for_each_kmem_cache_node(s, node, n) {
> -		unsigned long flags;
> -		struct page *page;
> -
> -		if (!atomic_long_read(&n->nr_slabs))
> -			continue;
> -
> -		spin_lock_irqsave(&n->list_lock, flags);
> -		list_for_each_entry(page, &n->partial, slab_list)
> -			process_slab(&t, s, page, alloc);
> -		list_for_each_entry(page, &n->full, slab_list)
> -			process_slab(&t, s, page, alloc);
> -		spin_unlock_irqrestore(&n->list_lock, flags);
> -	}
> -
> -	for (i = 0; i < t.count; i++) {
> -		struct location *l = &t.loc[i];
> -
> -		len += sysfs_emit_at(buf, len, "%7ld ", l->count);
> -
> -		if (l->addr)
> -			len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
> -		else
> -			len += sysfs_emit_at(buf, len, "<not-available>");
> -
> -		if (l->sum_time != l->min_time)
> -			len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
> -					     l->min_time,
> -					     (long)div_u64(l->sum_time,
> -							   l->count),
> -					     l->max_time);
> -		else
> -			len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
> -
> -		if (l->min_pid != l->max_pid)
> -			len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
> -					     l->min_pid, l->max_pid);
> -		else
> -			len += sysfs_emit_at(buf, len, " pid=%ld",
> -					     l->min_pid);
> -
> -		if (num_online_cpus() > 1 &&
> -		    !cpumask_empty(to_cpumask(l->cpus)))
> -			len += sysfs_emit_at(buf, len, " cpus=%*pbl",
> -					     cpumask_pr_args(to_cpumask(l->cpus)));
> -
> -		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
> -			len += sysfs_emit_at(buf, len, " nodes=%*pbl",
> -					     nodemask_pr_args(&l->nodes));
> -
> -		len += sysfs_emit_at(buf, len, "\n");
> -	}
> -
> -	free_loc_track(&t);
> -	if (!t.count)
> -		len += sysfs_emit_at(buf, len, "No data\n");
> -
> -	return len;
> -}
>  #endif	/* CONFIG_SLUB_DEBUG */
>  
>  #ifdef SLUB_RESILIENCY_TEST
> @@ -5346,21 +5140,6 @@ static ssize_t validate_store(struct kmem_cache *s,
>  }
>  SLAB_ATTR(validate);
>  
> -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
> -{
> -	if (!(s->flags & SLAB_STORE_USER))
> -		return -ENOSYS;
> -	return list_locations(s, buf, TRACK_ALLOC);
> -}
> -SLAB_ATTR_RO(alloc_calls);
> -
> -static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
> -{
> -	if (!(s->flags & SLAB_STORE_USER))
> -		return -ENOSYS;
> -	return list_locations(s, buf, TRACK_FREE);
> -}
> -SLAB_ATTR_RO(free_calls);
>  #endif /* CONFIG_SLUB_DEBUG */
>  
>  #ifdef CONFIG_FAILSLAB
> @@ -5524,8 +5303,6 @@ static struct attribute *slab_attrs[] = {
>  	&poison_attr.attr,
>  	&store_user_attr.attr,
>  	&validate_attr.attr,
> -	&alloc_calls_attr.attr,
> -	&free_calls_attr.attr,
>  #endif
>  #ifdef CONFIG_ZONE_DMA
>  	&cache_dma_attr.attr,
> @@ -5814,6 +5591,283 @@ static int __init slab_sysfs_init(void)
>  __initcall(slab_sysfs_init);
>  #endif /* CONFIG_SYSFS */
>  
> +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
> +/*
> + * Generate lists of code addresses where slabcache objects are allocated
> + * and freed.
> + */
> +
> +struct location {
> +	unsigned long count;
> +	unsigned long addr;
> +	long long sum_time;
> +	long min_time;
> +	long max_time;
> +	long min_pid;
> +	long max_pid;
> +	DECLARE_BITMAP(cpus, NR_CPUS);
> +	nodemask_t nodes;
> +};
> +
> +struct loc_track {
> +	unsigned long max;
> +	unsigned long count;
> +	struct location *loc;
> +};
> +
> +static struct dentry *slab_debugfs_root;
> +
> +static struct dentry *slab_debugfs_cache;
> +
> +static void free_loc_track(struct loc_track *t)
> +{
> +	if (t->max)
> +		free_pages((unsigned long)t->loc,
> +			get_order(sizeof(struct location) * t->max));
> +}
> +
> +static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
> +{
> +	struct location *l;
> +	int order;
> +
> +	order = get_order(sizeof(struct location) * max);
> +
> +	l = (void *)__get_free_pages(flags, order);
> +	if (!l)
> +		return 0;
> +
> +	if (t->count) {
> +		memcpy(l, t->loc, sizeof(struct location) * t->count);
> +		free_loc_track(t);
> +	}
> +	t->max = max;
> +	t->loc = l;
> +	return 1;
> +}
> +
> +static int add_location(struct loc_track *t, struct kmem_cache *s,
> +				const struct track *track)
> +{
> +	long start, end, pos;
> +	struct location *l;
> +	unsigned long caddr;
> +	unsigned long age = jiffies - track->when;
> +
> +	start = -1;
> +	end = t->count;
> +
> +	for ( ; ; ) {
> +		pos = start + (end - start + 1) / 2;
> +
> +		/*
> +		 * There is nothing at "end". If we end up there
> +		 * we need to add something to before end.
> +		 */
> +		if (pos == end)
> +			break;
> +
> +		caddr = t->loc[pos].addr;
> +		if (track->addr == caddr) {
> +
> +			l = &t->loc[pos];
> +			l->count++;
> +			if (track->when) {
> +				l->sum_time += age;
> +				if (age < l->min_time)
> +					l->min_time = age;
> +				if (age > l->max_time)
> +					l->max_time = age;
> +
> +				if (track->pid < l->min_pid)
> +					l->min_pid = track->pid;
> +				if (track->pid > l->max_pid)
> +					l->max_pid = track->pid;
> +
> +				cpumask_set_cpu(track->cpu,
> +						to_cpumask(l->cpus));
> +			}
> +			node_set(page_to_nid(virt_to_page(track)), l->nodes);
> +			return 1;
> +		}
> +
> +		if (track->addr < caddr)
> +			end = pos;
> +		else
> +			start = pos;
> +	}
> +
> +	/*
> +	 * Not found. Insert new tracking element.
> +	 */
> +	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
> +		return 0;
> +
> +	l = t->loc + pos;
> +	if (pos < t->count)
> +		memmove(l + 1, l,
> +			(t->count - pos) * sizeof(struct location));
> +	t->count++;
> +	l->count = 1;
> +	l->addr = track->addr;
> +	l->sum_time = age;
> +	l->min_time = age;
> +	l->max_time = age;
> +	l->min_pid = track->pid;
> +	l->max_pid = track->pid;
> +	cpumask_clear(to_cpumask(l->cpus));
> +	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
> +	nodes_clear(l->nodes);
> +	node_set(page_to_nid(virt_to_page(track)), l->nodes);
> +	return 1;
> +}
> +
> +static void process_slab(struct loc_track *t, struct kmem_cache *s,
> +		struct page *page, enum track_item alloc)
> +{
> +	void *addr = page_address(page);
> +	void *p;
> +	unsigned long *map;
> +
> +	map = get_map(s, page);
> +	for_each_object(p, s, addr, page->objects)
> +		if (!test_bit(__obj_to_index(s, addr, p), map))
> +			add_location(t, s, get_track(s, p, alloc));
> +	put_map(map);
> +}
> +
> +static int list_locations(struct seq_file *seq, struct kmem_cache *s,
> +						enum track_item alloc)
> +{
> +	unsigned long i;
> +	struct loc_track t = { 0, 0, NULL };
> +	int node;
> +	struct kmem_cache_node *n;
> +
> +	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
> +			     GFP_KERNEL)) {
> +		seq_puts(seq, "Out of memory\n");
> +		return -ENOMEM;
> +	}
> +	/* Push back cpu slabs */
> +	flush_all(s);
> +
> +	for_each_kmem_cache_node(s, node, n) {
> +		unsigned long flags;
> +		struct page *page;
> +
> +		if (!atomic_long_read(&n->nr_slabs))
> +			continue;
> +
> +		spin_lock_irqsave(&n->list_lock, flags);
> +		list_for_each_entry(page, &n->partial, slab_list)
> +			process_slab(&t, s, page, alloc);
> +		list_for_each_entry(page, &n->full, slab_list)
> +			process_slab(&t, s, page, alloc);
> +		spin_unlock_irqrestore(&n->list_lock, flags);
> +	}
> +
> +	for (i = 0; i < t.count; i++) {
> +		struct location *l = &t.loc[i];
> +
> +		seq_printf(seq, "%7ld ", l->count);
> +
> +		if (l->addr)
> +			seq_printf(seq, "%pS", (void *)l->addr);
> +		else
> +			seq_puts(seq, "<not-available>");
> +
> +		if (l->sum_time != l->min_time) {
> +			seq_printf(seq, " age=%ld/%ld/%ld",
> +				l->min_time,
> +				(long)div_u64(l->sum_time, l->count),
> +				l->max_time);
> +		} else
> +			seq_printf(seq, " age=%ld",
> +				l->min_time);
> +
> +		if (l->min_pid != l->max_pid)
> +			seq_printf(seq, " pid=%ld-%ld",
> +				l->min_pid, l->max_pid);
> +		else
> +			seq_printf(seq, " pid=%ld",
> +				l->min_pid);
> +
> +		if (num_online_cpus() > 1 &&
> +				!cpumask_empty(to_cpumask(l->cpus)))
> +			seq_printf(seq, " cpus=%*pbl",
> +					 cpumask_pr_args(to_cpumask(l->cpus)));
> +
> +		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
> +			seq_printf(seq, " nodes=%*pbl",
> +					 nodemask_pr_args(&l->nodes));
> +
> +		seq_puts(seq, "\n");
> +	}
> +
> +	free_loc_track(&t);
> +	if (!t.count)
> +		seq_puts(seq, "No data\n");
> +	return 0;
> +}
> +
> +static int slab_debug_trace(struct seq_file *seq, void *ignored)
> +{
> +	struct kmem_cache *s = seq->private;
> +
> +	if (!(s->flags & SLAB_STORE_USER))
> +		return 0;
> +
> +	if (strcmp(seq->file->f_path.dentry->d_name.name, "alloc_trace") == 0)
> +		return list_locations(seq, s, TRACK_ALLOC);
> +	else
> +		return list_locations(seq, s, TRACK_FREE);
> +
> +	return 0;
> +}
> +
> +static int slab_debug_trace_open(struct inode *inode, struct file *filp)
> +{
> +	return single_open(filp, slab_debug_trace,
> +				file_inode(filp)->i_private);
> +}
> +
> +static const struct file_operations slab_debug_fops = {
> +	.open    = slab_debug_trace_open,
> +	.read    = seq_read,
> +	.llseek  = seq_lseek,
> +	.release = single_release,
> +};
> +
> +static void debugfs_slab_add(struct kmem_cache *s)
> +{
> +	if (unlikely(!slab_debugfs_root))
> +		return;
> +
> +	slab_debugfs_cache = debugfs_create_dir(s->name, slab_debugfs_root);
> +	if (!IS_ERR(slab_debugfs_cache)) {
> +		debugfs_create_file("alloc_trace", 0400,
> +			slab_debugfs_cache, s, &slab_debug_fops);
> +
> +		debugfs_create_file("free_trace", 0400,
> +			slab_debugfs_cache, s, &slab_debug_fops);
> +	}
> +}
> +
> +static void __init slab_debugfs_init(void)
> +{
> +	struct kmem_cache *s;
> +
> +	slab_debugfs_root = debugfs_create_dir("slab", NULL);
> +	if (!IS_ERR(slab_debugfs_root))
> +		list_for_each_entry(s, &slab_caches, list)
> +			debugfs_slab_add(s);
> +	else
> +		pr_err("Cannot create slab debugfs.\n");
> +
> +}
> +__initcall(slab_debugfs_init);
> +#endif
>  /*
>   * The /proc/slabinfo ABI
>   */
>
Vlastimil Babka April 7, 2021, 10:30 a.m. UTC | #4
On 4/6/21 7:15 PM, Vlastimil Babka wrote:
> On 4/6/21 2:27 PM, Faiyaz Mohammed wrote:
>> alloc_calls and free_calls implementation in sysfs have two issues,
>> one is PAGE_SIZE limitiation of sysfs and other is it does not adhere
>> to "one value per file" rule.
>> 
>> To overcome this issues, move the alloc_calls and free_calls implemeation
>> to debugfs.
>> 
>> Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
> 
> Good direction, thanks. But I'm afraid we need a bit more:
> 
> - I don't see debugfs_remove() (or _recursive) used anywhere. When a cache is
> destroyed, do the dirs/files just linger in debugfs referencing removed
> kmem_cache objects?
> - There's a simple debugfs_create_dir(s->name, ...), for each cache while the
> sysfs variant handles merged caches with symlinks etc. For consistency, the
> hiearchy should look the same in debugfs as it does in sysfs.

Oh and one more suggestion. With full seq_file API (unlike sysfs) we should
really do the data gathering (to struct loc_track?) part just once per file
open, and cache it between individual reads.
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index 3021ce9..4d20ee0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -36,6 +36,7 @@ 
 #include <linux/memcontrol.h>
 #include <linux/random.h>
 
+#include <linux/debugfs.h>
 #include <trace/events/kmem.h>
 
 #include "internal.h"
@@ -225,6 +226,12 @@  static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
 #endif
 
+#ifdef CONFIG_DEBUG_FS
+static void debugfs_slab_add(struct kmem_cache *);
+#else
+static inline void debugfs_slab_add(struct kmem_cache *) { }
+#endif
+
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
 {
 #ifdef CONFIG_SLUB_STATS
@@ -4542,6 +4549,8 @@  int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
 	if (err)
 		__kmem_cache_release(s);
 
+	debugfs_slab_add(s);
+
 	return err;
 }
 
@@ -4682,221 +4691,6 @@  static long validate_slab_cache(struct kmem_cache *s)
 
 	return count;
 }
-/*
- * Generate lists of code addresses where slabcache objects are allocated
- * and freed.
- */
-
-struct location {
-	unsigned long count;
-	unsigned long addr;
-	long long sum_time;
-	long min_time;
-	long max_time;
-	long min_pid;
-	long max_pid;
-	DECLARE_BITMAP(cpus, NR_CPUS);
-	nodemask_t nodes;
-};
-
-struct loc_track {
-	unsigned long max;
-	unsigned long count;
-	struct location *loc;
-};
-
-static void free_loc_track(struct loc_track *t)
-{
-	if (t->max)
-		free_pages((unsigned long)t->loc,
-			get_order(sizeof(struct location) * t->max));
-}
-
-static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
-{
-	struct location *l;
-	int order;
-
-	order = get_order(sizeof(struct location) * max);
-
-	l = (void *)__get_free_pages(flags, order);
-	if (!l)
-		return 0;
-
-	if (t->count) {
-		memcpy(l, t->loc, sizeof(struct location) * t->count);
-		free_loc_track(t);
-	}
-	t->max = max;
-	t->loc = l;
-	return 1;
-}
-
-static int add_location(struct loc_track *t, struct kmem_cache *s,
-				const struct track *track)
-{
-	long start, end, pos;
-	struct location *l;
-	unsigned long caddr;
-	unsigned long age = jiffies - track->when;
-
-	start = -1;
-	end = t->count;
-
-	for ( ; ; ) {
-		pos = start + (end - start + 1) / 2;
-
-		/*
-		 * There is nothing at "end". If we end up there
-		 * we need to add something to before end.
-		 */
-		if (pos == end)
-			break;
-
-		caddr = t->loc[pos].addr;
-		if (track->addr == caddr) {
-
-			l = &t->loc[pos];
-			l->count++;
-			if (track->when) {
-				l->sum_time += age;
-				if (age < l->min_time)
-					l->min_time = age;
-				if (age > l->max_time)
-					l->max_time = age;
-
-				if (track->pid < l->min_pid)
-					l->min_pid = track->pid;
-				if (track->pid > l->max_pid)
-					l->max_pid = track->pid;
-
-				cpumask_set_cpu(track->cpu,
-						to_cpumask(l->cpus));
-			}
-			node_set(page_to_nid(virt_to_page(track)), l->nodes);
-			return 1;
-		}
-
-		if (track->addr < caddr)
-			end = pos;
-		else
-			start = pos;
-	}
-
-	/*
-	 * Not found. Insert new tracking element.
-	 */
-	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
-		return 0;
-
-	l = t->loc + pos;
-	if (pos < t->count)
-		memmove(l + 1, l,
-			(t->count - pos) * sizeof(struct location));
-	t->count++;
-	l->count = 1;
-	l->addr = track->addr;
-	l->sum_time = age;
-	l->min_time = age;
-	l->max_time = age;
-	l->min_pid = track->pid;
-	l->max_pid = track->pid;
-	cpumask_clear(to_cpumask(l->cpus));
-	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
-	nodes_clear(l->nodes);
-	node_set(page_to_nid(virt_to_page(track)), l->nodes);
-	return 1;
-}
-
-static void process_slab(struct loc_track *t, struct kmem_cache *s,
-		struct page *page, enum track_item alloc)
-{
-	void *addr = page_address(page);
-	void *p;
-	unsigned long *map;
-
-	map = get_map(s, page);
-	for_each_object(p, s, addr, page->objects)
-		if (!test_bit(__obj_to_index(s, addr, p), map))
-			add_location(t, s, get_track(s, p, alloc));
-	put_map(map);
-}
-
-static int list_locations(struct kmem_cache *s, char *buf,
-			  enum track_item alloc)
-{
-	int len = 0;
-	unsigned long i;
-	struct loc_track t = { 0, 0, NULL };
-	int node;
-	struct kmem_cache_node *n;
-
-	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
-			     GFP_KERNEL)) {
-		return sysfs_emit(buf, "Out of memory\n");
-	}
-	/* Push back cpu slabs */
-	flush_all(s);
-
-	for_each_kmem_cache_node(s, node, n) {
-		unsigned long flags;
-		struct page *page;
-
-		if (!atomic_long_read(&n->nr_slabs))
-			continue;
-
-		spin_lock_irqsave(&n->list_lock, flags);
-		list_for_each_entry(page, &n->partial, slab_list)
-			process_slab(&t, s, page, alloc);
-		list_for_each_entry(page, &n->full, slab_list)
-			process_slab(&t, s, page, alloc);
-		spin_unlock_irqrestore(&n->list_lock, flags);
-	}
-
-	for (i = 0; i < t.count; i++) {
-		struct location *l = &t.loc[i];
-
-		len += sysfs_emit_at(buf, len, "%7ld ", l->count);
-
-		if (l->addr)
-			len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
-		else
-			len += sysfs_emit_at(buf, len, "<not-available>");
-
-		if (l->sum_time != l->min_time)
-			len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
-					     l->min_time,
-					     (long)div_u64(l->sum_time,
-							   l->count),
-					     l->max_time);
-		else
-			len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
-
-		if (l->min_pid != l->max_pid)
-			len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
-					     l->min_pid, l->max_pid);
-		else
-			len += sysfs_emit_at(buf, len, " pid=%ld",
-					     l->min_pid);
-
-		if (num_online_cpus() > 1 &&
-		    !cpumask_empty(to_cpumask(l->cpus)))
-			len += sysfs_emit_at(buf, len, " cpus=%*pbl",
-					     cpumask_pr_args(to_cpumask(l->cpus)));
-
-		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
-			len += sysfs_emit_at(buf, len, " nodes=%*pbl",
-					     nodemask_pr_args(&l->nodes));
-
-		len += sysfs_emit_at(buf, len, "\n");
-	}
-
-	free_loc_track(&t);
-	if (!t.count)
-		len += sysfs_emit_at(buf, len, "No data\n");
-
-	return len;
-}
 #endif	/* CONFIG_SLUB_DEBUG */
 
 #ifdef SLUB_RESILIENCY_TEST
@@ -5346,21 +5140,6 @@  static ssize_t validate_store(struct kmem_cache *s,
 }
 SLAB_ATTR(validate);
 
-static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
-{
-	if (!(s->flags & SLAB_STORE_USER))
-		return -ENOSYS;
-	return list_locations(s, buf, TRACK_ALLOC);
-}
-SLAB_ATTR_RO(alloc_calls);
-
-static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
-{
-	if (!(s->flags & SLAB_STORE_USER))
-		return -ENOSYS;
-	return list_locations(s, buf, TRACK_FREE);
-}
-SLAB_ATTR_RO(free_calls);
 #endif /* CONFIG_SLUB_DEBUG */
 
 #ifdef CONFIG_FAILSLAB
@@ -5524,8 +5303,6 @@  static struct attribute *slab_attrs[] = {
 	&poison_attr.attr,
 	&store_user_attr.attr,
 	&validate_attr.attr,
-	&alloc_calls_attr.attr,
-	&free_calls_attr.attr,
 #endif
 #ifdef CONFIG_ZONE_DMA
 	&cache_dma_attr.attr,
@@ -5814,6 +5591,283 @@  static int __init slab_sysfs_init(void)
 __initcall(slab_sysfs_init);
 #endif /* CONFIG_SYSFS */
 
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
+/*
+ * Generate lists of code addresses where slabcache objects are allocated
+ * and freed.
+ */
+
+struct location {
+	unsigned long count;
+	unsigned long addr;
+	long long sum_time;
+	long min_time;
+	long max_time;
+	long min_pid;
+	long max_pid;
+	DECLARE_BITMAP(cpus, NR_CPUS);
+	nodemask_t nodes;
+};
+
+struct loc_track {
+	unsigned long max;
+	unsigned long count;
+	struct location *loc;
+};
+
+static struct dentry *slab_debugfs_root;
+
+static struct dentry *slab_debugfs_cache;
+
+static void free_loc_track(struct loc_track *t)
+{
+	if (t->max)
+		free_pages((unsigned long)t->loc,
+			get_order(sizeof(struct location) * t->max));
+}
+
+static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
+{
+	struct location *l;
+	int order;
+
+	order = get_order(sizeof(struct location) * max);
+
+	l = (void *)__get_free_pages(flags, order);
+	if (!l)
+		return 0;
+
+	if (t->count) {
+		memcpy(l, t->loc, sizeof(struct location) * t->count);
+		free_loc_track(t);
+	}
+	t->max = max;
+	t->loc = l;
+	return 1;
+}
+
+static int add_location(struct loc_track *t, struct kmem_cache *s,
+				const struct track *track)
+{
+	long start, end, pos;
+	struct location *l;
+	unsigned long caddr;
+	unsigned long age = jiffies - track->when;
+
+	start = -1;
+	end = t->count;
+
+	for ( ; ; ) {
+		pos = start + (end - start + 1) / 2;
+
+		/*
+		 * There is nothing at "end". If we end up there
+		 * we need to add something to before end.
+		 */
+		if (pos == end)
+			break;
+
+		caddr = t->loc[pos].addr;
+		if (track->addr == caddr) {
+
+			l = &t->loc[pos];
+			l->count++;
+			if (track->when) {
+				l->sum_time += age;
+				if (age < l->min_time)
+					l->min_time = age;
+				if (age > l->max_time)
+					l->max_time = age;
+
+				if (track->pid < l->min_pid)
+					l->min_pid = track->pid;
+				if (track->pid > l->max_pid)
+					l->max_pid = track->pid;
+
+				cpumask_set_cpu(track->cpu,
+						to_cpumask(l->cpus));
+			}
+			node_set(page_to_nid(virt_to_page(track)), l->nodes);
+			return 1;
+		}
+
+		if (track->addr < caddr)
+			end = pos;
+		else
+			start = pos;
+	}
+
+	/*
+	 * Not found. Insert new tracking element.
+	 */
+	if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
+		return 0;
+
+	l = t->loc + pos;
+	if (pos < t->count)
+		memmove(l + 1, l,
+			(t->count - pos) * sizeof(struct location));
+	t->count++;
+	l->count = 1;
+	l->addr = track->addr;
+	l->sum_time = age;
+	l->min_time = age;
+	l->max_time = age;
+	l->min_pid = track->pid;
+	l->max_pid = track->pid;
+	cpumask_clear(to_cpumask(l->cpus));
+	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
+	nodes_clear(l->nodes);
+	node_set(page_to_nid(virt_to_page(track)), l->nodes);
+	return 1;
+}
+
+static void process_slab(struct loc_track *t, struct kmem_cache *s,
+		struct page *page, enum track_item alloc)
+{
+	void *addr = page_address(page);
+	void *p;
+	unsigned long *map;
+
+	map = get_map(s, page);
+	for_each_object(p, s, addr, page->objects)
+		if (!test_bit(__obj_to_index(s, addr, p), map))
+			add_location(t, s, get_track(s, p, alloc));
+	put_map(map);
+}
+
+static int list_locations(struct seq_file *seq, struct kmem_cache *s,
+						enum track_item alloc)
+{
+	unsigned long i;
+	struct loc_track t = { 0, 0, NULL };
+	int node;
+	struct kmem_cache_node *n;
+
+	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+			     GFP_KERNEL)) {
+		seq_puts(seq, "Out of memory\n");
+		return -ENOMEM;
+	}
+	/* Push back cpu slabs */
+	flush_all(s);
+
+	for_each_kmem_cache_node(s, node, n) {
+		unsigned long flags;
+		struct page *page;
+
+		if (!atomic_long_read(&n->nr_slabs))
+			continue;
+
+		spin_lock_irqsave(&n->list_lock, flags);
+		list_for_each_entry(page, &n->partial, slab_list)
+			process_slab(&t, s, page, alloc);
+		list_for_each_entry(page, &n->full, slab_list)
+			process_slab(&t, s, page, alloc);
+		spin_unlock_irqrestore(&n->list_lock, flags);
+	}
+
+	for (i = 0; i < t.count; i++) {
+		struct location *l = &t.loc[i];
+
+		seq_printf(seq, "%7ld ", l->count);
+
+		if (l->addr)
+			seq_printf(seq, "%pS", (void *)l->addr);
+		else
+			seq_puts(seq, "<not-available>");
+
+		if (l->sum_time != l->min_time) {
+			seq_printf(seq, " age=%ld/%ld/%ld",
+				l->min_time,
+				(long)div_u64(l->sum_time, l->count),
+				l->max_time);
+		} else
+			seq_printf(seq, " age=%ld",
+				l->min_time);
+
+		if (l->min_pid != l->max_pid)
+			seq_printf(seq, " pid=%ld-%ld",
+				l->min_pid, l->max_pid);
+		else
+			seq_printf(seq, " pid=%ld",
+				l->min_pid);
+
+		if (num_online_cpus() > 1 &&
+				!cpumask_empty(to_cpumask(l->cpus)))
+			seq_printf(seq, " cpus=%*pbl",
+					 cpumask_pr_args(to_cpumask(l->cpus)));
+
+		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
+			seq_printf(seq, " nodes=%*pbl",
+					 nodemask_pr_args(&l->nodes));
+
+		seq_puts(seq, "\n");
+	}
+
+	free_loc_track(&t);
+	if (!t.count)
+		seq_puts(seq, "No data\n");
+	return 0;
+}
+
+static int slab_debug_trace(struct seq_file *seq, void *ignored)
+{
+	struct kmem_cache *s = seq->private;
+
+	if (!(s->flags & SLAB_STORE_USER))
+		return 0;
+
+	if (strcmp(seq->file->f_path.dentry->d_name.name, "alloc_trace") == 0)
+		return list_locations(seq, s, TRACK_ALLOC);
+	else
+		return list_locations(seq, s, TRACK_FREE);
+
+	return 0;
+}
+
+static int slab_debug_trace_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, slab_debug_trace,
+				file_inode(filp)->i_private);
+}
+
+static const struct file_operations slab_debug_fops = {
+	.open    = slab_debug_trace_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+static void debugfs_slab_add(struct kmem_cache *s)
+{
+	if (unlikely(!slab_debugfs_root))
+		return;
+
+	slab_debugfs_cache = debugfs_create_dir(s->name, slab_debugfs_root);
+	if (!IS_ERR(slab_debugfs_cache)) {
+		debugfs_create_file("alloc_trace", 0400,
+			slab_debugfs_cache, s, &slab_debug_fops);
+
+		debugfs_create_file("free_trace", 0400,
+			slab_debugfs_cache, s, &slab_debug_fops);
+	}
+}
+
+static void __init slab_debugfs_init(void)
+{
+	struct kmem_cache *s;
+
+	slab_debugfs_root = debugfs_create_dir("slab", NULL);
+	if (!IS_ERR(slab_debugfs_root))
+		list_for_each_entry(s, &slab_caches, list)
+			debugfs_slab_add(s);
+	else
+		pr_err("Cannot create slab debugfs.\n");
+
+}
+__initcall(slab_debugfs_init);
+#endif
 /*
  * The /proc/slabinfo ABI
  */