diff mbox series

[v3] tracing: add 'accounted' entry into output of allocation tracepoints

Message ID f6625cd8-90f9-6d48-50f6-7bb052bf479f@openvz.org (mailing list archive)
State New
Headers show
Series [v3] tracing: add 'accounted' entry into output of allocation tracepoints | expand

Commit Message

Vasily Averin May 18, 2022, 6:24 a.m. UTC
Slab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds boolean "allocated" entry into trace output,
and set it to 'true' for calls used __GFP_ACCOUNT flag and
for allocations from caches marked with SLAB_ACCOUNT.

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
v3:
 1) rework kmem_cache_alloc* tracepoints once again,
    added struct kmem_cache argument into existing templates,
	thanks to Matthew Wilcox
 2) updated the corresponding ding trace_* calls
 3) added boolean "allocated" entry into trace output,
	thanks to Roman
 4) updated patch subject and description

v2:
 1) handle kmem_cache_alloc_node(), thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters.
    NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
        and therefore it was replaced by kmalloc_node.
---
Now kmem tracing output looks like this:

kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
---
 include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
 mm/slab.c                   | 10 +++++-----
 mm/slab_common.c            |  9 ++++-----
 mm/slob.c                   |  8 ++++----
 mm/slub.c                   | 20 +++++++++----------
 5 files changed, 47 insertions(+), 38 deletions(-)

Comments

Shakeel Butt May 18, 2022, 3:09 p.m. UTC | #1
On Tue, May 17, 2022 at 11:24 PM Vasily Averin <vvs@openvz.org> wrote:
>
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
>
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
>
> Signed-off-by: Vasily Averin <vvs@openvz.org>

Acked-by: Shakeel Butt <shakeelb@google.com>
Vasily Averin May 18, 2022, 3:45 p.m. UTC | #2
On 5/18/22 09:24, Vasily Averin wrote:
> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "allocated" entry into trace output,

not "allocated" but "accounted"

> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
Steven Rostedt May 18, 2022, 8:04 p.m. UTC | #3
On Wed, 18 May 2022 09:24:51 +0300
Vasily Averin <vvs@openvz.org> wrote:

FYI, the subject should be something like: mm/tracing:

Because "tracing:" is reserved for tracing infrastructure updates.

> Slab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds boolean "allocated" entry into trace output,
> and set it to 'true' for calls used __GFP_ACCOUNT flag and
> for allocations from caches marked with SLAB_ACCOUNT.
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
> v3:
>  1) rework kmem_cache_alloc* tracepoints once again,
>     added struct kmem_cache argument into existing templates,
> 	thanks to Matthew Wilcox
>  2) updated the corresponding ding trace_* calls
>  3) added boolean "allocated" entry into trace output,
> 	thanks to Roman
>  4) updated patch subject and description
> 
> v2:
>  1) handle kmem_cache_alloc_node(), thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters.
>     NB: kmem_cache_alloc_node tracepoint in SLOB cannot use cachep,
>         and therefore it was replaced by kmalloc_node.
> ---
> Now kmem tracing output looks like this:
> 
> kmem_cache_alloc:     (getname_flags.part.0+0x2c) call_site=getname_flags.part.0+0x2c ptr=0xffff8fff022e9000 bytes_req=4096 bytes_alloc=4096 gfp_flags=GFP_KERNEL accounted=false
> kmalloc:              (alloc_bprm+0x32) call_site=alloc_bprm+0x32 ptr=0xffff8fff2b408a00 bytes_req=416 bytes_alloc=512 gfp_flags=GFP_KERNEL|__GFP_ZERO accounted=false
> kmem_cache_alloc:     (mm_alloc+0x16) call_site=mm_alloc+0x16 ptr=0xffff8fff0894d500 bytes_req=1048 bytes_alloc=1088 gfp_flags=GFP_KERNEL accounted=true
> mm_page_alloc:        page=0xffffffffa4ab8d42 pfn=0x12ad72 order=1 migratetype=0 gfp_flags=GFP_USER|__GFP_ZERO|__GFP_ACCOUNT
> kmem_cache_alloc:     (vm_area_alloc+0x1a) call_site=vm_area_alloc+0x1a ptr=0xffff8fff2af27000 bytes_req=200 bytes_alloc=200 gfp_flags=GFP_KERNEL accounted=true
> ---
>  include/trace/events/kmem.h | 38 +++++++++++++++++++++++--------------
>  mm/slab.c                   | 10 +++++-----
>  mm/slab_common.c            |  9 ++++-----
>  mm/slob.c                   |  8 ++++----
>  mm/slub.c                   | 20 +++++++++----------
>  5 files changed, 47 insertions(+), 38 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..5bfeb6f276f1 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -13,11 +13,12 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> +		 struct kmem_cache *s,
>  		 size_t bytes_req,
>  		 size_t bytes_alloc,
>  		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
> +	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
>  
>  	TP_STRUCT__entry(
>  		__field(	unsigned long,	call_site	)
> @@ -25,6 +26,7 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__field(	size_t,		bytes_req	)
>  		__field(	size_t,		bytes_alloc	)
>  		__field(	unsigned long,	gfp_flags	)
> +		__field(	bool,		accounted	)
>  	),
>  
>  	TP_fast_assign(
> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		__entry->bytes_req	= bytes_req;
>  		__entry->bytes_alloc	= bytes_alloc;
>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> +					  (s && s->flags & SLAB_ACCOUNT);

Now you could make this even faster in the fast path and save just the
s->flags.

	__entry->sflags = s ? s->flags : 0;

>  	),
>  
> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>  		(void *)__entry->call_site,
>  		__entry->ptr,
>  		__entry->bytes_req,
>  		__entry->bytes_alloc,
> -		show_gfp_flags(__entry->gfp_flags))
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->accounted ? "true" : "false")

And then have: "accounted=%s":

		(__entry->gfp_flags & __GFP_ACCOUNT) ||
		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"


-- Steve

>  );
>
Vasily Averin May 19, 2022, 11:35 a.m. UTC | #4
On 5/18/22 23:04, Steven Rostedt wrote:
> On Wed, 18 May 2022 09:24:51 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
> FYI, the subject should be something like: mm/tracing:
> Because "tracing:" is reserved for tracing infrastructure updates.

Thank you for noticing.

>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>  		__entry->bytes_req	= bytes_req;
>>  		__entry->bytes_alloc	= bytes_alloc;
>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>> +					  (s && s->flags & SLAB_ACCOUNT);
> 
> Now you could make this even faster in the fast path and save just the
> s->flags.
> 
> 	__entry->sflags = s ? s->flags : 0;
> 
>>  	),
>>  
>> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>  		(void *)__entry->call_site,
>>  		__entry->ptr,
>>  		__entry->bytes_req,
>>  		__entry->bytes_alloc,
>> -		show_gfp_flags(__entry->gfp_flags))
>> +		show_gfp_flags(__entry->gfp_flags),
>> +		__entry->accounted ? "true" : "false")
> 
> And then have: "accounted=%s":
> 
> 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
> 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"

Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
Could you please explain why your variant is faster?

Thank you,
	Vasily Averin
Steven Rostedt May 19, 2022, 2:03 p.m. UTC | #5
On Thu, 19 May 2022 14:35:46 +0300
Vasily Averin <vvs@openvz.org> wrote:

> >> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> >>  		__entry->bytes_req	= bytes_req;
> >>  		__entry->bytes_alloc	= bytes_alloc;
> >>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
> >> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
> >> +					  (s && s->flags & SLAB_ACCOUNT);  
> > 
> > Now you could make this even faster in the fast path and save just the
> > s->flags.
> > 
> > 	__entry->sflags = s ? s->flags : 0;
> >   
> >>  	),
> >>  
> >> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> >> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
> >>  		(void *)__entry->call_site,
> >>  		__entry->ptr,
> >>  		__entry->bytes_req,
> >>  		__entry->bytes_alloc,
> >> -		show_gfp_flags(__entry->gfp_flags))
> >> +		show_gfp_flags(__entry->gfp_flags),
> >> +		__entry->accounted ? "true" : "false")  
> > 
> > And then have: "accounted=%s":
> > 
> > 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
> > 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"  
> 
> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
> Could you please explain why your variant is faster?

Micro-optimization, grant you, but it is faster because it moves some of
the logic into the slow path (the read side), and takes it out of the fast
path (the write side).

The idea of tracing is to squeeze out every cycle we can to keep the
tracing overhead down.

But it's really up to you if you need that. I'm not going to let this be a
blocker. This is more of an FYI than anything else.

-- Steve
Vasily Averin May 19, 2022, 4:29 p.m. UTC | #6
On 5/19/22 17:03, Steven Rostedt wrote:
> On Thu, 19 May 2022 14:35:46 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>>>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>>>>  		__entry->bytes_req	= bytes_req;
>>>>  		__entry->bytes_alloc	= bytes_alloc;
>>>>  		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
>>>> +		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
>>>> +					  (s && s->flags & SLAB_ACCOUNT);  
>>>
>>> Now you could make this even faster in the fast path and save just the
>>> s->flags.
>>>
>>> 	__entry->sflags = s ? s->flags : 0;
>>>   
>>>>  	),
>>>>  
>>>> -	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
>>>> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
>>>>  		(void *)__entry->call_site,
>>>>  		__entry->ptr,
>>>>  		__entry->bytes_req,
>>>>  		__entry->bytes_alloc,
>>>> -		show_gfp_flags(__entry->gfp_flags))
>>>> +		show_gfp_flags(__entry->gfp_flags),
>>>> +		__entry->accounted ? "true" : "false")  
>>>
>>> And then have: "accounted=%s":
>>>
>>> 		(__entry->gfp_flags & __GFP_ACCOUNT) ||
>>> 		(__entry->sflags & SLAB_ACCOUNT) ? "true" : "false"  
>>
>> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts.
>> Could you please explain why your variant is faster?
> 
> Micro-optimization, grant you, but it is faster because it moves some of
> the logic into the slow path (the read side), and takes it out of the fast
> path (the write side).
> 
> The idea of tracing is to squeeze out every cycle we can to keep the
> tracing overhead down.
> 
> But it's really up to you if you need that. I'm not going to let this be a
> blocker. This is more of an FYI than anything else.

Frankly speaking I vote for performance with both hands.
However I'm still would like to avoid new sparse warnings.
Christoph Hellwig just recently taught me, "never add '__force' before
thinking hard about them", but in this case I would need to use it three times.

I found that bitwise typecasts can be avoided by using translation unions. 

What do you think about following trick?

diff --git a/mm/slab.h b/mm/slab.h
index 95eb34174c1b..f676612ca40f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -882,4 +882,14 @@ void __check_heap_object(const void *ptr, unsigned long n,
 }
 #endif
 
+union gfp_flags_u {
+	unsigned long ulong;
+	gfp_t flags;
+};
+
+union slab_flags_u {
+	unsigned int uint;
+	slab_flags_t sflags;
+};
+
 #endif /* MM_SLAB_H */

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..91632a61e16d 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,18 +13,20 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
 		__field(	const void *,	ptr		)
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
-		__field(	unsigned long,	gfp_flags	)
+		__field_struct(	union gfp_flags_u,	gfp	)
+		__field_struct(	union slab_flags_u,	s	)
 	),
 
 	TP_fast_assign(
@@ -32,51 +34,57 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->ptr		= ptr;
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->gfp.flags	= gfp_flags;
+		__entry->s.sflags	= s ? s->flags : 0;
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp.ulong),
+		((__entry->gfp.flags & __GFP_ACCOUNT) ||
+		 (__entry->s.sflags & SLAB_ACCOUNT)) ? "true" : "false")
 );
 
Thank you,
	Vasily Averin
Steven Rostedt May 19, 2022, 4:32 p.m. UTC | #7
On Thu, 19 May 2022 19:29:36 +0300
Vasily Averin <vvs@openvz.org> wrote:

> Frankly speaking I vote for performance with both hands.
> However I'm still would like to avoid new sparse warnings.
> Christoph Hellwig just recently taught me, "never add '__force' before
> thinking hard about them", but in this case I would need to use it three times.
> 
> I found that bitwise typecasts can be avoided by using translation unions. 
> 
> What do you think about following trick?

It's really up to you memory management folks. Although I may need to
update libtraceevent to handle the union case. That may be a bit tricky.

-- Steve
Vasily Averin May 21, 2022, 6:32 p.m. UTC | #8
On 5/19/22 19:32, Steven Rostedt wrote:
> On Thu, 19 May 2022 19:29:36 +0300
> Vasily Averin <vvs@openvz.org> wrote:
> 
>> Frankly speaking I vote for performance with both hands.
>> However I'm still would like to avoid new sparse warnings.
>> Christoph Hellwig just recently taught me, "never add '__force' before
>> thinking hard about them", but in this case I would need to use it three times.
>>
>> I found that bitwise typecasts can be avoided by using translation unions. 
>>
>> What do you think about following trick?
> 
> It's really up to you memory management folks. Although I may need to
> update libtraceevent to handle the union case. That may be a bit tricky.

In this case, I would like to keep sub-optimal v3 patch version.
If necessary, we can improve the performance later, and right now
I will send v4 with minor patch description changes and ask
Andrew Morton to include it in the mm tree.

Thank you,
	Vasily Averin
diff mbox series

Patch

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..5bfeb6f276f1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -13,11 +13,12 @@  DECLARE_EVENT_CLASS(kmem_alloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -25,6 +26,7 @@  DECLARE_EVENT_CLASS(kmem_alloc,
 		__field(	size_t,		bytes_req	)
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -33,42 +35,46 @@  DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->bytes_req	= bytes_req;
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		show_gfp_flags(__entry->gfp_flags))
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc, kmalloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
+	TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
 		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
 );
 
 DECLARE_EVENT_CLASS(kmem_alloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
+		 struct kmem_cache *s,
 		 size_t bytes_req,
 		 size_t bytes_alloc,
 		 gfp_t gfp_flags,
 		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	call_site	)
@@ -77,6 +83,7 @@  DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__field(	size_t,		bytes_alloc	)
 		__field(	unsigned long,	gfp_flags	)
 		__field(	int,		node		)
+		__field(	bool,		accounted	)
 	),
 
 	TP_fast_assign(
@@ -86,33 +93,36 @@  DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->bytes_alloc	= bytes_alloc;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
+		__entry->accounted	= (gfp_flags & __GFP_ACCOUNT) ||
+					  (s && s->flags & SLAB_ACCOUNT);
 	),
 
-	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
 		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
-		__entry->node)
+		__entry->node,
+		__entry->accounted ? "true" : "false")
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
 
 	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
+		 struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
 		 gfp_t gfp_flags, int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..e5802445c7d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,7 +3492,7 @@  void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
 			       cachep->object_size, cachep->size, flags);
 
 	return ret;
@@ -3581,7 +3581,7 @@  kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 	ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(_RET_IP_, ret,
+	trace_kmalloc(_RET_IP_, ret, cachep,
 		      size, cachep->size, flags);
 	return ret;
 }
@@ -3606,7 +3606,7 @@  void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
 
@@ -3625,7 +3625,7 @@  void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, cachep,
 			   size, cachep->size,
 			   flags, nodeid);
 	return ret;
@@ -3708,7 +3708,7 @@  static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	ret = slab_alloc(cachep, NULL, flags, size, caller);
 
 	ret = kasan_kmalloc(cachep, ret, size, flags);
-	trace_kmalloc(caller, ret,
+	trace_kmalloc(caller, ret, cachep,
 		      size, cachep->size, flags);
 
 	return ret;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..a345e8600e00 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@ 
 #include <asm/page.h>
 #include <linux/memcontrol.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
@@ -967,7 +966,7 @@  EXPORT_SYMBOL(kmalloc_order);
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	void *ret = kmalloc_order(size, flags, order);
-	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+	trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order_trace);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..dbefa0da0dfc 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -505,7 +505,7 @@  __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 		*m = size;
 		ret = (void *)m + minalign;
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, size + minalign, gfp, node);
 	} else {
 		unsigned int order = get_order(size);
@@ -514,7 +514,7 @@  __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 			gfp |= __GFP_COMP;
 		ret = slob_new_pages(gfp, order, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << order, gfp, node);
 	}
 
@@ -610,12 +610,12 @@  static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    SLOB_UNITS(c->size) * SLOB_UNIT,
 					    flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
+		trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
 					    PAGE_SIZE << get_order(c->size),
 					    flags, node);
 	}
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..9b10591646dd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,7 +3231,7 @@  void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
 				s->size, gfpflags);
 
 	return ret;
@@ -3254,7 +3254,7 @@  EXPORT_SYMBOL(kmem_cache_alloc_lru);
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
 	void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
-	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
@@ -3266,7 +3266,7 @@  void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
 				    s->object_size, s->size, gfpflags, node);
 
 	return ret;
@@ -3280,7 +3280,7 @@  void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret,
+	trace_kmalloc_node(_RET_IP_, ret, s,
 			   size, s->size, gfpflags, node);
 
 	ret = kasan_kmalloc(s, ret, size, gfpflags);
@@ -4409,7 +4409,7 @@  void *__kmalloc(size_t size, gfp_t flags)
 
 	ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
 
-	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+	trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4443,7 +4443,7 @@  void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, flags, node);
 
-		trace_kmalloc_node(_RET_IP_, ret,
+		trace_kmalloc_node(_RET_IP_, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   flags, node);
 
@@ -4457,7 +4457,7 @@  void *__kmalloc_node(size_t size, gfp_t flags, int node)
 
 	ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
 
-	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+	trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
 
 	ret = kasan_kmalloc(s, ret, size, flags);
 
@@ -4916,7 +4916,7 @@  void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 	ret = slab_alloc(s, NULL, gfpflags, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+	trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
 
 	return ret;
 }
@@ -4932,7 +4932,7 @@  void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
 		ret = kmalloc_large_node(size, gfpflags, node);
 
-		trace_kmalloc_node(caller, ret,
+		trace_kmalloc_node(caller, ret, NULL,
 				   size, PAGE_SIZE << get_order(size),
 				   gfpflags, node);
 
@@ -4947,7 +4947,7 @@  void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
 
 	/* Honor the call site pointer we received. */
-	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+	trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
 
 	return ret;
 }