diff mbox series

[v4] tracing: add 'accounted' entry into output of allocation tracepoints

Message ID a1e54672-d824-3ee1-cb68-9ceaa46fc70f@openvz.org (mailing list archive)
State New
Headers show
Series [v4] tracing: add 'accounted' entry into output of allocation tracepoints | expand

Commit Message

Vasily Averin May 21, 2022, 6:36 p.m. UTC
Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "accounted" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
---
v4:
 1) replaced in patch descripion: "accounted" instead of "allocated"
 2) added "Acked-by" from Shakeel,
 3) re-addressed to akpm@

v3:
 1) rework kmem_cache_alloc* tracepoints once again,
    added struct kmem_cache argument into existing templates,
	thanks to Matthew Wilcox
 2) updated according trace_* calls
 3) added boolean "allocated" entry into trace output,
	thanks to Roman
 4) updated patch subject and description

v2:
 1) handle kmem_cache_alloc_node(), thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters.
    NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
        and therefore it was replaced by kmalloc_node.
---
 include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
 mm/slab.c                   | 10 +++++-----
 mm/slab_common.c            |  9 ++++-----
 mm/slob.c                   |  8 ++++----
 mm/slub.c                   | 20 +++++++++----------
 5 files changed, 47 insertions(+), 38 deletions(-)

Comments

Hyeonggon Yoo May 22, 2022, 3:51 a.m. UTC | #1
On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>

May I ask what information do you want to collect
using this patch?

I pointed it in another thread but I'm not sure
printing SLAB_* flags in these tracepoint is good :(

If we decide to do that, it would be better to print
something like:
slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
instead of just printing 'accounted=true/false'. This patch is too
specific to SLAB_ACCOUNT.

And if what you want to know is just total slab memory that is accounted,
what about adding something like  SlabAccounted in /proc/meminfo?

Thanks.

> ---
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||

I doubt someone will pass __GFP_ACCOUNT in gfpflags
when calling kmem_cache_alloc*().


> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmalloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DECLARE_EVENT_CLASS(kmem_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags,
>  		 int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
>  		__field(	int,		node		)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>  		__entry->node		= node;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
>  		show_gfp_flags(__entry->gfp_flags),
> -		__entry->node)
> +		__entry->node,
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..e5802445c7d6 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
>  			       cachep->object_size, cachep->size, flags);
>  
>  	return ret;
> @@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
>  	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(_RET_IP_, ret,
> +	trace_kmalloc(_RET_IP_, ret, cachep,
>  		      size, cachep->size, flags);
>  	return ret;
>  }
> @@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
>  {
>  	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
>  				    cachep->object_size, cachep->size,
>  				    flags, nodeid);
>  
> @@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
>  	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, cachep,
>  			   size, cachep->size,
>  			   flags, nodeid);
>  	return ret;
> @@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
>  	ret = slab_alloc(cachep, NULL, flags, size, caller);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(caller, ret,
> +	trace_kmalloc(caller, ret, cachep,
>  		      size, cachep->size, flags);
>  
>  	return ret;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..a345e8600e00 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
>  #include <asm/page.h>
>  #include <linux/memcontrol.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
>  #include "internal.h"
> -
>  #include "slab.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
>  enum slab_state slab_state;
>  LIST_HEAD(slab_caches);
>  DEFINE_MUTEX(slab_mutex);
> @@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
>  void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
>  {
>  	void *ret = kmalloc_order(size, flags, order);
> -	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
> +	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
>  	return ret;
>  }
>  EXPORT_SYMBOL(kmalloc_order_trace);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..dbefa0da0dfc 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  		*m = size;
>  		ret = (void *)m + minalign;
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, size + minalign, gfp, node);
>  	} else {
>  		unsigned int order = get_order(size);
> @@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  			gfp |= __GFP_COMP;
>  		ret = slob_new_pages(gfp, order, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << order, gfp, node);
>  	}
>  
> @@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>  
>  	if (c->size < PAGE_SIZE) {
>  		b = slob_alloc(c->size, flags, c->align, node, 0);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    SLOB_UNITS(c->size) * SLOB_UNIT,
>  					    flags, node);
>  	} else {
>  		b = slob_new_pages(flags, get_order(c->size), node);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    PAGE_SIZE << get_order(c->size),
>  					    flags, node);
>  	}
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..9b10591646dd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
>  				s->size, gfpflags);
>  
>  	return ret;
> @@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
>  void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
>  {
>  	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
>  	return ret;
>  }
> @@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
>  				    s->object_size, s->size, gfpflags, node);
>  
>  	return ret;
> @@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, s,
>  			   size, s->size, gfpflags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
> @@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
>  
>  	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
>  
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, flags, node);
>  
> -		trace_kmalloc_node(_RET_IP_, ret,
> +		trace_kmalloc_node(_RET_IP_, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   flags, node);
>  
> @@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  
>  	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
> +	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
>  	ret = slab_alloc(s, NULL, gfpflags, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc(caller, ret, size, s->size, gfpflags);
> +	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
>  
>  	return ret;
>  }
> @@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, gfpflags, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   gfpflags, node);
>  
> @@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
> +	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
>  
>  	return ret;
>  }
> -- 
> 2.31.1
>
Vasily Averin May 22, 2022, 4:33 a.m. UTC | #2
On 5/22/22 06:51, Hyeonggon Yoo wrote:
> On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
>> Slab caches marked with SLAB_ACCOUNT force accounting for every
>> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
>> Unfortunately, at the moment this flag is not visible in ftrace output,
>> and this makes it difficult to analyze the accounted allocations.
>>
>> This patch adds boolean "accounted" entry into trace output,
>> and set it to 'true' for calls used __GFP_ACCOUNT flag and
>> for allocations from caches marked with SLAB_ACCOUNT.
>>
>> Signed-off-by: Vasily Averin <vvs@openvz.org>
>> Acked-by: Shakeel Butt <shakeelb@google.com>
> 
> May I ask what information do you want to collect
> using this patch?

I analyze ftrace output to understand which allocations are accounted.
When some userspace operation consume memory, it's important to account
most part of memory (>2/3 of all) to avoid misuse inside memcg-limited
contianers. Otherwise memcg-limited container can consume significant 
portion of host memory, trigger global OOM, wake up OOM-killer and kill
random processes on host.
If memory consumers are accounted, it leads to memcg-OOM only. 

Now kmem tracing output looks like this:

kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true

As you can see, without new field it is quite hard to understand, 
is last allocation accounted.

This analyze helps me to identify most important allocations for given scenario
and enable accounting for selected allocations.

An example of this analyze you can found here:
https://lore.kernel.org/all/d28233ee-bccb-7bc3-c2ec-461fd7f95e6a@openvz.org/

> I pointed it in another thread but I'm not sure
> printing SLAB_* flags in these tracepoint is good :(

I'm not sure I understand your arguments correctly. Could you please
explain your position in more details?

> If we decide to do that, it would be better to print
> something like:
> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
> instead of just printing 'accounted=true/false'. This patch is too
> specific to SLAB_ACCOUNT.

Any extra output degrades performance.
For my task it's not important to know SLAB flags, I just need to understand,
is current allocation accounted or not.

> And if what you want to know is just total slab memory that is accounted,
> what about adding something like  SlabAccounted in /proc/meminfo?

It is not enough for me. I need to have per-process allocation information.

Thank you,
	Vasily Averin
Hyeonggon Yoo May 22, 2022, 5:19 a.m. UTC | #3
On Sun, May 22, 2022 at 07:33:08AM +0300, Vasily Averin wrote:
> On 5/22/22 06:51, Hyeonggon Yoo wrote:
> > On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> >> Slab caches marked with SLAB_ACCOUNT force accounting for every
> >> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> >> Unfortunately, at the moment this flag is not visible in ftrace output,
> >> and this makes it difficult to analyze the accounted allocations.
> >>
> >> This patch adds boolean "accounted" entry into trace output,
> >> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> >> for allocations from caches marked with SLAB_ACCOUNT.
> >>
> >> Signed-off-by: Vasily Averin <vvs@openvz.org>
> >> Acked-by: Shakeel Butt <shakeelb@google.com>
> > 
> > May I ask what information do you want to collect
> > using this patch?
> 
> I analyze ftrace output to understand which allocations are accounted.
> When some userspace operation consume memory, it's important to account
> most part of memory (>2/3 of all) to avoid misuse inside memcg-limited
> contianers. Otherwise memcg-limited container can consume significant 
> portion of host memory, trigger global OOM, wake up OOM-killer and kill
> random processes on host.
> If memory consumers are accounted, it leads to memcg-OOM only. 
> 
> Now kmem tracing output looks like this:
> 
> kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
> kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
> kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
> mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
> kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
> 
> As you can see, without new field it is quite hard to understand, 
> is last allocation accounted.
>
> This analyze helps me to identify most important allocations for given scenario
> and enable accounting for selected allocations.
> 
> An example of this analyze you can found here:
> https://lore.kernel.org/all/d28233ee-bccb-7bc3-c2ec-461fd7f95e6a@openvz.org/
> 

Thank you for detailed explanation. Makes sense to me.

> > If we decide to do that, it would be better to print
> > something like:
> > slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
> > instead of just printing 'accounted=true/false'. This patch is too
> > specific to SLAB_ACCOUNT.
> 
> Any extra output degrades performance.

No strong opinion but just a concern that maybe later someone want add
something similar like 'reclaimable=true/false', 'dma=true/false', ...
and I would prefer more general solution. (especially if we'll not
change tracepoints after release because of backward compatibility)

> For my task it's not important to know SLAB flags, I just need to understand,
> is current allocation accounted or not.

SLAB_ACCOUNT, SLAB_RECLAIM_ACCOUNT, SLAB_DMA, ... etc are SLAB flags.

'if current allocation is accounted or not' depends on SLAB_ACCOUNT
flag is set or not.

Thanks,
Hyeonggon

> > And if what you want to know is just total slab memory that is accounted,
> > what about adding something like  SlabAccounted in /proc/meminfo?
> 
> It is not enough for me. I need to have per-process allocation information.
> 
> Thank you,
> 	Vasily Averin
Shakeel Butt May 22, 2022, 5:42 a.m. UTC | #4
On Sat, May 21, 2022 at 10:19 PM Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
[...]
>
> No strong opinion but just a concern that maybe later someone want add
> something similar like 'reclaimable=true/false', 'dma=true/false', ...
> and I would prefer more general solution. (especially if we'll not
> change tracepoints after release because of backward compatibility)
>

There is no contract for tracepoints to be stable and can be changed.

> > For my task it's not important to know SLAB flags, I just need to understand,
> > is current allocation accounted or not.
>
> SLAB_ACCOUNT, SLAB_RECLAIM_ACCOUNT, SLAB_DMA, ... etc are SLAB flags.
>
> 'if current allocation is accounted or not' depends on SLAB_ACCOUNT
> flag is set or not.
>

allocation can be accounted due to __GFP_ACCOUNT as well.
Vasily Averin May 22, 2022, 6:53 p.m. UTC | #5
On 5/22/22 08:19, Hyeonggon Yoo wrote:
> On Sun, May 22, 2022 at 07:33:08AM +0300, Vasily Averin wrote:
>> On 5/22/22 06:51, Hyeonggon Yoo wrote:
>>> If we decide to do that, it would be better to print
>>> something like:
>>> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
>>> instead of just printing 'accounted=true/false'. This patch is too
>>> specific to SLAB_ACCOUNT.
>>
>> Any extra output degrades performance.
> 
> No strong opinion but just a concern that maybe later someone want add
> something similar like 'reclaimable=true/false', 'dma=true/false', ...
> and I would prefer more general solution. (especially if we'll not
> change tracepoints after release because of backward compatibility)

I would like to quote Steven Rostedt:
https://lore.kernel.org/all/20220515180640.0ae2ead5@gandalf.local.home/
"
> Trace events are not ABI, but if we don't have a strong reason to break it,
> I'd preserve the old order.

Ideally everyone should be using libtraceevent which will parse the format
file for the needed entries.

Nothing (important) should be parsing the raw ascii from the trace files.
It's slow and unreliable. The raw format (trace_pipe_raw) files, along with
libtraceevent will handle fining the fields you are looking for, even if
the fields move around (internally or externally).

Then there's trace-cruncher (a python script that uses libtracefs and
libtraceevent) that will work too.

  https://github.com/vmware/trace-cruncher
"

Thank you,
	Vasily Averin
Steven Rostedt May 22, 2022, 8:09 p.m. UTC | #6
On Sun, 22 May 2022 07:33:08 +0300
Vasily Averin <vvs@openvz.org> wrote:

> > slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
> > instead of just printing 'accounted=true/false'. This patch is too
> > specific to SLAB_ACCOUNT.  
> 
> Any extra output degrades performance.
> For my task it's not important to know SLAB flags, I just need to understand,
> is current allocation accounted or not.

If you do save the flags in the event, you can report that on output with
the __print_flags() macro:

 TP_fast_assign(
	[..]
	__entry->sflags = s ? s->flags;
	[..]
 )
 TP_printk("... slab_flags=%s ..",
	[..]
	__print_flags(sflags, "|",
		{ SLAB_CONSISTENCY_CHECKS, "CONSISTENCY_CHECKS" },
		{ SLAB_RED_ZONE, "RED_ZONE" },
		{ SLAB_POISON, "POISON" },
		{ SLAB_HWCACHE_ALIGN, "HWCACHE_ALIGN" },
		{ SLAB_CACHE_DMA, "CACHE_DMA" },
		{ SLAB_CACHE_DMA32, "CACHE_DMA32" },
		{ SLAB_STORE_USER, "STORE_USER" },
		{ SLAB_PANIC, "PANIC" }), ... )


And you get the flag output looking nicely, and all the processing is done
on the reader path.

That's if you find it useful at all.

-- Steve
Vasily Averin May 23, 2022, 4:03 a.m. UTC | #7
On 5/22/22 23:09, Steven Rostedt wrote:
> On Sun, 22 May 2022 07:33:08 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>>> slab_flags=SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT|SLAB_STORE_USER
>>> instead of just printing 'accounted=true/false'. This patch is too
>>> specific to SLAB_ACCOUNT.  
>>
>> Any extra output degrades performance.
>> For my task it's not important to know SLAB flags, I just need to understand,
>> is current allocation accounted or not.
> 
> If you do save the flags in the event, you can report that on output with
> the __print_flags() macro:
> 
>  TP_fast_assign(
> 	[..]
> 	__entry->sflags = s ? s->flags;
> 	[..]
>  )
>  TP_printk("... slab_flags=%s ..",
> 	[..]
> 	__print_flags(sflags, "|",
> 		{ SLAB_CONSISTENCY_CHECKS, "CONSISTENCY_CHECKS" },
> 		{ SLAB_RED_ZONE, "RED_ZONE" },
> 		{ SLAB_POISON, "POISON" },
> 		{ SLAB_HWCACHE_ALIGN, "HWCACHE_ALIGN" },
> 		{ SLAB_CACHE_DMA, "CACHE_DMA" },
> 		{ SLAB_CACHE_DMA32, "CACHE_DMA32" },
> 		{ SLAB_STORE_USER, "STORE_USER" },
> 		{ SLAB_PANIC, "PANIC" }), ... )
> 
> 
> And you get the flag output looking nicely, and all the processing is done
> on the reader path.
> 
> That's if you find it useful at all.

Thank you for explanation!
Yes, we can do it however I really doubt that any other slab flags are of interest to anyone.

Btw. in this form slab flags array causes sparse warnings,
because SLAB_* are defined as bitwise slab_flags_t.
This should be translated to unsigned long similarly to gfp_t flags.

Thank you,
	Vasily Averin
Vlastimil Babka May 23, 2022, 1:12 p.m. UTC | #8
On 5/21/22 20:36, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> ---
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)

I'd prefer the slab tree, given the files it touches and expected conflict
with Hyeonggon Yoo's v3 of kmalloc unification series. BTW the suggested
split of kmalloc/kmem_cache_alloc tracepoints [1] will be useful if
implemented, as we won't have to pass NULL kmem_cache pointers from some of
the kmalloc callers.
I would add it there after 5.19-rc1 to avoid conflict with kmem.h changes in
mm-stable, that should be part of mainline before rc1.

Thanks!

[1] https://lore.kernel.org/all/bbb97e3c-e597-dd6e-e213-55bc1779d901@suse.cz/

> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmalloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> +	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
>  		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
>  );
>  
>  DECLARE_EVENT_CLASS(kmem_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags,
>  		 int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -77,6 +83,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
>  		__field(	int,		node		)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -86,33 +93,36 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>  		__entry->node		= node;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);
>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
>  		show_gfp_flags(__entry->gfp_flags),
> -		__entry->node)
> +		__entry->node,
> +		__entry->accounted ? "true" : "false")
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
>  
>  	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> +		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
>  		 gfp_t gfp_flags, int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
>  );
>  
>  TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..e5802445c7d6 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,7 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
>  			       cachep->object_size, cachep->size, flags);
>  
>  	return ret;
> @@ -3581,7 +3581,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
>  	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(_RET_IP_, ret,
> +	trace_kmalloc(_RET_IP_, ret, cachep,
>  		      size, cachep->size, flags);
>  	return ret;
>  }
> @@ -3606,7 +3606,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
>  {
>  	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
>  				    cachep->object_size, cachep->size,
>  				    flags, nodeid);
>  
> @@ -3625,7 +3625,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
>  	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, cachep,
>  			   size, cachep->size,
>  			   flags, nodeid);
>  	return ret;
> @@ -3708,7 +3708,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
>  	ret = slab_alloc(cachep, NULL, flags, size, caller);
>  
>  	ret = kasan_kmalloc(cachep, ret, size, flags);
> -	trace_kmalloc(caller, ret,
> +	trace_kmalloc(caller, ret, cachep,
>  		      size, cachep->size, flags);
>  
>  	return ret;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..a345e8600e00 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
>  #include <asm/page.h>
>  #include <linux/memcontrol.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
>  #include "internal.h"
> -
>  #include "slab.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
>  enum slab_state slab_state;
>  LIST_HEAD(slab_caches);
>  DEFINE_MUTEX(slab_mutex);
> @@ -967,7 +966,7 @@ EXPORT_SYMBOL(kmalloc_order);
>  void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
>  {
>  	void *ret = kmalloc_order(size, flags, order);
> -	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
> +	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
>  	return ret;
>  }
>  EXPORT_SYMBOL(kmalloc_order_trace);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..dbefa0da0dfc 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -505,7 +505,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  		*m = size;
>  		ret = (void *)m + minalign;
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, size + minalign, gfp, node);
>  	} else {
>  		unsigned int order = get_order(size);
> @@ -514,7 +514,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  			gfp |= __GFP_COMP;
>  		ret = slob_new_pages(gfp, order, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << order, gfp, node);
>  	}
>  
> @@ -610,12 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>  
>  	if (c->size < PAGE_SIZE) {
>  		b = slob_alloc(c->size, flags, c->align, node, 0);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    SLOB_UNITS(c->size) * SLOB_UNIT,
>  					    flags, node);
>  	} else {
>  		b = slob_new_pages(flags, get_order(c->size), node);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> +		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
>  					    PAGE_SIZE << get_order(c->size),
>  					    flags, node);
>  	}
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..9b10591646dd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,7 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> +	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
>  				s->size, gfpflags);
>  
>  	return ret;
> @@ -3254,7 +3254,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
>  void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
>  {
>  	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
>  	return ret;
>  }
> @@ -3266,7 +3266,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
>  				    s->object_size, s->size, gfpflags, node);
>  
>  	return ret;
> @@ -3280,7 +3280,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret,
> +	trace_kmalloc_node(_RET_IP_, ret, s,
>  			   size, s->size, gfpflags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, gfpflags);
> @@ -4409,7 +4409,7 @@ void *__kmalloc(size_t size, gfp_t flags)
>  
>  	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
>  
> -	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
> +	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4443,7 +4443,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, flags, node);
>  
> -		trace_kmalloc_node(_RET_IP_, ret,
> +		trace_kmalloc_node(_RET_IP_, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   flags, node);
>  
> @@ -4457,7 +4457,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
>  
>  	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
>  
> -	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
> +	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
>  
>  	ret = kasan_kmalloc(s, ret, size, flags);
>  
> @@ -4916,7 +4916,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
>  	ret = slab_alloc(s, NULL, gfpflags, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc(caller, ret, size, s->size, gfpflags);
> +	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
>  
>  	return ret;
>  }
> @@ -4932,7 +4932,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
>  		ret = kmalloc_large_node(size, gfpflags, node);
>  
> -		trace_kmalloc_node(caller, ret,
> +		trace_kmalloc_node(caller, ret, NULL,
>  				   size, PAGE_SIZE << get_order(size),
>  				   gfpflags, node);
>  
> @@ -4947,7 +4947,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
>  	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
>  
>  	/* Honor the call site pointer we received. */
> -	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
> +	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
>  
>  	return ret;
>  }
Roman Gushchin May 25, 2022, 1:34 a.m. UTC | #9
On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> ---
> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)

LGTM

Acked-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!
Hyeonggon Yoo May 25, 2022, 7:33 a.m. UTC | #10
On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "accounted" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> Acked-by: Shakeel Butt <shakeelb@google.com>
> ---


Maybe I worried a bit much...

I failed to apply it on slab/for-next and v5.18. What tree is it based on?

> v4:
>  1) replaced in patch descripion: "accounted" instead of "allocated"
>  2) added "Acked-by" from Shakeel,
>  3) re-addressed to akpm@
> 
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated according trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
Vasily Averin May 25, 2022, 8:24 a.m. UTC | #11
On 5/25/22 10:33, Hyeonggon Yoo wrote:
> On Sat, May 21, 2022 at 09:36:54PM +0300, Vasily Averin wrote:
>> Slab caches marked with SLAB_ACCOUNT force accounting for every
>> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
>> Unfortunately, at the moment this flag is not visible in ftrace output,
>> and this makes it difficult to analyze the accounted allocations.
>>
>> This patch adds boolean "accounted" entry into trace output,
>> and set it to 'true' for calls used __GFP_ACCOUNT flag and
>> for allocations from caches marked with SLAB_ACCOUNT.
>>
>> Signed-off-by: Vasily Averin <vvs@openvz.org>
>> Acked-by: Shakeel Butt <shakeelb@google.com>
>> ---
> 
> 
> Maybe I worried a bit much...
> 
> I failed to apply it on slab/for-next and v5.18. What tree is it based on?

Its context depends on changes in my other patches already included into mm-unstable.

>> v4:
>>  1) replaced in patch descripion: "accounted" instead of "allocated"
>>  2) added "Acked-by" from Shakeel,
>>  3) re-addressed to akpm@
>>
>> v3:
>>  1) rework kmem_cache_alloc* tracepoints once again,
>>     added struct kmem_cache argument into existing templates,
>> 	thanks to Matthew Wilcox
>>  2) updated according trace_* calls
>>  3) added boolean "allocated" entry into trace output,
>> 	thanks to Roman
>>  4) updated patch subject and description
>>
>> v2:
>>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>>     of current cachep->*size parameters.
>>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>>         and therefore it was replaced by kmalloc_node.
>> ---
>>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>>  mm/slab.c                   | 10 +++++-----
>>  mm/slab_common.c            |  9 ++++-----
>>  mm/slob.c                   |  8 ++++----
>>  mm/slub.c                   | 20 +++++++++----------
>>  5 files changed, 47 insertions(+), 38 deletions(-)
diff mbox series

Patch

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..5bfeb6f276f1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@  DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -25,6 +26,7 @@  DECLARE_EVENT_CLASS(kmem_alloc,
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -33,42 +35,46 @@  DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags,
 		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -77,6 +83,7 @@  DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
 		__field(	int,		node		)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -86,33 +93,36 @@  DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+		__entry->node,
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..e5802445c7d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,7 +3492,7 @@  void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
 			       cachep->object_size, cachep->size, flags);
 
 	return ret;
@@ -3581,7 +3581,7 @@  kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(_RET_IP_, ret,
+	trace_kmalloc(_RET_IP_, ret, cachep,
 		      size, cachep->size, flags);
 	return ret;
 }
@@ -3606,7 +3606,7 @@  void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
 
@@ -3625,7 +3625,7 @@  void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, cachep,
 			   size, cachep->size,
 			   flags, nodeid);
 	return ret;
@@ -3708,7 +3708,7 @@  static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	ret = slab_alloc(cachep, NULL, flags, size, caller);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(caller, ret,
+	trace_kmalloc(caller, ret, cachep,
 		      size, cachep->size, flags);
 
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..a345e8600e00 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@ 
 #include <asm/page.h>
 #include <linux/memcontrol.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
@@ -967,7 +966,7 @@  EXPORT_SYMBOL(kmalloc_order);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
-	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..dbefa0da0dfc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -505,7 +505,7 @@  __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 		*m = size;
 		ret = (void *)m + minalign;
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, size + minalign, gfp, node);
 	} else {
 		unsigned int order = get_order(size);
@@ -514,7 +514,7 @@  __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
@@ -610,12 +610,12 @@  static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..9b10591646dd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,7 +3231,7 @@  void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
@@ -3254,7 +3254,7 @@  EXPORT_SYMBOL(kmem_cache_alloc_lru);
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
-	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
@@ -3266,7 +3266,7 @@  void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
 				    s->object_size, s->size, gfpflags, node);
 
 	return ret;
@@ -3280,7 +3280,7 @@  void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, s,
 			   size, s->size, gfpflags, node);
 
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4409,7 +4409,7 @@  void *__kmalloc(size_t size, gfp_t flags)
 
 	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
-	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4443,7 +4443,7 @@  void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 
-		trace_kmalloc_node(_RET_IP_, ret,
+		trace_kmalloc_node(_RET_IP_, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 
@@ -4457,7 +4457,7 @@  void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4916,7 +4916,7 @@  void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
 
 	return ret;
 }
@@ -4932,7 +4932,7 @@  void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 
@@ -4947,7 +4947,7 @@  void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
 
 	return ret;
 }