diff mbox series

[-trace] trace: fix TASK_COMM_LEN in trace event format file

Message ID 20230210155921.4610-1-laoar.shao@gmail.com (mailing list archive)
State Superseded
Headers show
Series [-trace] trace: fix TASK_COMM_LEN in trace event format file | expand

Commit Message

Yafang Shao Feb. 10, 2023, 3:59 p.m. UTC
After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"),
the content of the format file under
/sys/kernel/debug/tracing/events/task/task_newtask was changed from
  field:char comm[16];    offset:12;    size:16;    signed:0;
to
  field:char comm[TASK_COMM_LEN];    offset:12;    size:16;    signed:0;

John reported that this change breaks older versions of perfetto.
Then Mathieu pointed out that this behavioral change was caused by the
use of __stringify(_len), which happens to work on macros, but not on enum
labels. And he also gave the suggestion on how to fix it:
  :One possible solution to make this more robust would be to extend
  :struct trace_event_fields with one more field that indicates the length
  :of an array as an actual integer, without storing it in its stringified
  :form in the type, and do the formatting in f_show where it belongs.

The result as follows after this change,
$ cat /sys/kernel/debug/tracing/events/task/task_newtask/format
        field:char comm[16];    offset:12;      size:16;        signed:0;

Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN")
Reported-by: John Stultz <jstultz@google.com>
Debugged-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Kajetan Puchalski <kajetan.puchalski@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: John Stultz <jstultz@google.com>
Cc: stable@vger.kernel.org # v5.17+
---
 include/linux/trace_events.h               |  1 +
 include/trace/stages/stage4_event_fields.h |  3 ++-
 kernel/trace/trace.h                       |  1 +
 kernel/trace/trace_events.c                | 24 ++++++++++++++++--------
 4 files changed, 20 insertions(+), 9 deletions(-)

Comments

Mathieu Desnoyers Feb. 10, 2023, 4:27 p.m. UTC | #1
On 2023-02-10 10:59, Yafang Shao wrote:
> After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"),
> the content of the format file under
> /sys/kernel/debug/tracing/events/task/task_newtask was changed from
>    field:char comm[16];    offset:12;    size:16;    signed:0;
> to
>    field:char comm[TASK_COMM_LEN];    offset:12;    size:16;    signed:0;
> 
> John reported that this change breaks older versions of perfetto.
> Then Mathieu pointed out that this behavioral change was caused by the
> use of __stringify(_len), which happens to work on macros, but not on enum
> labels. And he also gave the suggestion on how to fix it:
>    :One possible solution to make this more robust would be to extend
>    :struct trace_event_fields with one more field that indicates the length
>    :of an array as an actual integer, without storing it in its stringified
>    :form in the type, and do the formatting in f_show where it belongs.
> 
> The result as follows after this change,
> $ cat /sys/kernel/debug/tracing/events/task/task_newtask/format
>          field:char comm[16];    offset:12;      size:16;        signed:0;
> 
> Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN")
> Reported-by: John Stultz <jstultz@google.com>
> Debugged-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Cc: Kajetan Puchalski <kajetan.puchalski@arm.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: John Stultz <jstultz@google.com>
> Cc: stable@vger.kernel.org # v5.17+
> ---
>   include/linux/trace_events.h               |  1 +
>   include/trace/stages/stage4_event_fields.h |  3 ++-
>   kernel/trace/trace.h                       |  1 +
>   kernel/trace/trace_events.c                | 24 ++++++++++++++++--------
>   4 files changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 4342e99..0e37322 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -270,6 +270,7 @@ struct trace_event_fields {
>   			const int  align;
>   			const int  is_signed;
>   			const int  filter_type;
> +			const int  len;
>   		};
>   		int (*define_fields)(struct trace_event_call *);
>   	};
> diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
> index affd541..306f39a 100644
> --- a/include/trace/stages/stage4_event_fields.h
> +++ b/include/trace/stages/stage4_event_fields.h
> @@ -26,7 +26,8 @@
>   #define __array(_type, _item, _len) {					\
>   	.type = #_type"["__stringify(_len)"]", .name = #_item,		\

Just out of curiosity, is the content of __stringify(_len) ever used 
after this patch ? Perhaps we could remove it and just leave:

.type = #_type"[]"

considering that f_show appears to use the opening square bracket to 
detect arrays. This would remove a few useless bytes of data.

Thanks,

Mathieu


>   	.size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type),	\
> -	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
> +	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER, \
> +	.len = _len},
>   
>   #undef __dynamic_array
>   #define __dynamic_array(_type, _item, _len) {				\
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index e46a492..19caf15 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -1282,6 +1282,7 @@ struct ftrace_event_field {
>   	int			offset;
>   	int			size;
>   	int			is_signed;
> +	int			len;
>   };
>   
>   struct prog_entry;
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 33e0b4f..70f6725 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -114,7 +114,7 @@ struct ftrace_event_field *
>   
>   static int __trace_define_field(struct list_head *head, const char *type,
>   				const char *name, int offset, int size,
> -				int is_signed, int filter_type)
> +				int is_signed, int filter_type, int len)
>   {
>   	struct ftrace_event_field *field;
>   
> @@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
>   	field->offset = offset;
>   	field->size = size;
>   	field->is_signed = is_signed;
> +	field->len = len;
>   
>   	list_add(&field->link, head);
>   
> @@ -150,14 +151,14 @@ int trace_define_field(struct trace_event_call *call, const char *type,
>   
>   	head = trace_get_fields(call);
>   	return __trace_define_field(head, type, name, offset, size,
> -				    is_signed, filter_type);
> +				    is_signed, filter_type, 0);
>   }
>   EXPORT_SYMBOL_GPL(trace_define_field);
>   
>   #define __generic_field(type, item, filter_type)			\
>   	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
>   				   #item, 0, 0, is_signed_type(type),	\
> -				   filter_type);			\
> +				   filter_type, 0);			\
>   	if (ret)							\
>   		return ret;
>   
> @@ -166,7 +167,7 @@ int trace_define_field(struct trace_event_call *call, const char *type,
>   				   "common_" #item,			\
>   				   offsetof(typeof(ent), item),		\
>   				   sizeof(ent.item),			\
> -				   is_signed_type(type), FILTER_OTHER);	\
> +				   is_signed_type(type), FILTER_OTHER, 0);	\
>   	if (ret)							\
>   		return ret;
>   
> @@ -1589,10 +1590,10 @@ static int f_show(struct seq_file *m, void *v)
>   			   field->type, field->name, field->offset,
>   			   field->size, !!field->is_signed);
>   	else
> -		seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
> +		seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
>   			   (int)(array_descriptor - field->type),
>   			   field->type, field->name,
> -			   array_descriptor, field->offset,
> +			   field->len, field->offset,
>   			   field->size, !!field->is_signed);
>   
>   	return 0;
> @@ -2371,6 +2372,7 @@ static int ftrace_event_release(struct inode *inode, struct file *file)
>   	if (list_empty(head)) {
>   		struct trace_event_fields *field = call->class->fields_array;
>   		unsigned int offset = sizeof(struct trace_entry);
> +		struct list_head *head;
>   
>   		for (; field->type; field++) {
>   			if (field->type == TRACE_FUNCTION_TYPE) {
> @@ -2379,9 +2381,15 @@ static int ftrace_event_release(struct inode *inode, struct file *file)
>   			}
>   
>   			offset = ALIGN(offset, field->align);
> -			ret = trace_define_field(call, field->type, field->name,
> +			if (WARN_ON(!call->class)) {
> +				offset += field->size;
> +				continue;
> +			}
> +			head = trace_get_fields(call);
> +			ret = __trace_define_field(head, field->type, field->name,
>   						 offset, field->size,
> -						 field->is_signed, field->filter_type);
> +						 field->is_signed, field->filter_type,
> +						 field->len);
>   			if (WARN_ON_ONCE(ret)) {
>   				pr_err("error code is %d\n", ret);
>   				break;
Kajetan Puchalski Feb. 10, 2023, 6:12 p.m. UTC | #2
Hi Yafang,

> After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"),
> the content of the format file under
> /sys/kernel/debug/tracing/events/task/task_newtask was changed from
>   field:char comm[16];    offset:12;    size:16;    signed:0;
> to
>   field:char comm[TASK_COMM_LEN];    offset:12;    size:16;    signed:0;
> 
> John reported that this change breaks older versions of perfetto.
> Then Mathieu pointed out that this behavioral change was caused by the
> use of __stringify(_len), which happens to work on macros, but not on enum
> labels. And he also gave the suggestion on how to fix it:
>   :One possible solution to make this more robust would be to extend
>   :struct trace_event_fields with one more field that indicates the length
>   :of an array as an actual integer, without storing it in its stringified
>   :form in the type, and do the formatting in f_show where it belongs.
> 
> The result as follows after this change,
> $ cat /sys/kernel/debug/tracing/events/task/task_newtask/format
>         field:char comm[16];    offset:12;      size:16;        signed:0;
> 
> Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN")
> Reported-by: John Stultz <jstultz@google.com>
> Debugged-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Cc: Kajetan Puchalski <kajetan.puchalski@arm.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: John Stultz <jstultz@google.com>
> Cc: stable@vger.kernel.org # v5.17+

From my testing this works the same with older Perfetto as the previous
diff you sent in the older thread, ie this type of errors:

Name	Value	Type
mismatched_sched_switch_tids	2439	error (analysis)
systrace_parse_failure	3853	error (analysis)

Meaning that applying this patch on top of the old one ends up with
different results than just reverting the old one so not a 100% fix
but as I said before, still an improvement.

Thanks,
Kajetan
Alexei Starovoitov Feb. 10, 2023, 10:32 p.m. UTC | #3
On Fri, Feb 10, 2023 at 10:13 AM Kajetan Puchalski
<kajetan.puchalski@arm.com> wrote:
>
> Hi Yafang,
>
> > After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"),
> > the content of the format file under
> > /sys/kernel/debug/tracing/events/task/task_newtask was changed from
> >   field:char comm[16];    offset:12;    size:16;    signed:0;
> > to
> >   field:char comm[TASK_COMM_LEN];    offset:12;    size:16;    signed:0;
> >
> > John reported that this change breaks older versions of perfetto.
> > Then Mathieu pointed out that this behavioral change was caused by the
> > use of __stringify(_len), which happens to work on macros, but not on enum
> > labels. And he also gave the suggestion on how to fix it:
> >   :One possible solution to make this more robust would be to extend
> >   :struct trace_event_fields with one more field that indicates the length
> >   :of an array as an actual integer, without storing it in its stringified
> >   :form in the type, and do the formatting in f_show where it belongs.
> >
> > The result as follows after this change,
> > $ cat /sys/kernel/debug/tracing/events/task/task_newtask/format
> >         field:char comm[16];    offset:12;      size:16;        signed:0;
> >
> > Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN")
> > Reported-by: John Stultz <jstultz@google.com>
> > Debugged-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> > Cc: Kajetan Puchalski <kajetan.puchalski@arm.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: John Stultz <jstultz@google.com>
> > Cc: stable@vger.kernel.org # v5.17+
>
> From my testing this works the same with older Perfetto as the previous
> diff you sent in the older thread, ie this type of errors:
>
> Name    Value   Type
> mismatched_sched_switch_tids    2439    error (analysis)
> systrace_parse_failure  3853    error (analysis)
>
> Meaning that applying this patch on top of the old one ends up with
> different results than just reverting the old one so not a 100% fix
> but as I said before, still an improvement.

fwiw I don't mind reverting commit 3087c61ed2c4.
For the record it didn't go through the bpf tree.
It went through mm.

But first we need to define which part of ftrace format is an abi.
I think it's a format of tracing/event/foo/format file
and not the contents of it.
The tracepoints come and go. Their arguments get changed too.
So the contents of ftrace format files change.

In this case Perfetto stumbled on parsing
field:char comm[TASK_COMM_LEN];    offset:8;

We probably should define that 'field: value  offset: value'
is an abi, but value-s can change.
Say offset:8 becomes offset:+8, for whatever reason.
If Perfetto fails to parse it, it's a Perfetto bug.

In this case it failed to parse char comm[TASK_COMM_LEN];
But that's not the only such "field:".
These three were there for a long time.
    field:u32 rates[NUM_NL80211_BANDS];    offset:16;    size:24;
    field:int mcast_rate[NUM_NL80211_BANDS];    offset:60;    size:24;
    field:int mcast_rate[NUM_NL80211_BANDS];    offset:108;    size:24;

I suspect Perfetto didn't have a use case to parse them,
so the bug stayed dormant and a change in TASK_COMM_LEN triggered it.

We can use Yafang's patch to do:
-field:%.*s %s%s;
+field:%.*s %s[%d];
but it will affect both TASK_COMM_LEN and NUM_NL80211_BANDS.

In summary the TASK_COMM_LEN change from #define to enum didn't
introduce anything new in the kind of "value"s being printed
in ftrace files. Hence I'm arguing there is no abi breakage.

There was a question why change from #define to enum is useful
to bpf. The reason is that #define-s are not seen in dwarf
whereas enums and their values are. bpf tooling has ways to extract
that data and auto-adjust bpf programs when enum-s disappear
or their values change.
Steven Rostedt Feb. 11, 2023, 7:28 p.m. UTC | #4
On Fri, 10 Feb 2023 14:32:13 -0800
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> fwiw I don't mind reverting commit 3087c61ed2c4.
> For the record it didn't go through the bpf tree.
> It went through mm.

I think the solution being presented can work, but still needs some work.

> 
> But first we need to define which part of ftrace format is an abi.
> I think it's a format of tracing/event/foo/format file
> and not the contents of it.
> The tracepoints come and go. Their arguments get changed too.
> So the contents of ftrace format files change.

As Linus always says. The abi is what user space uses. ;-)

> 
> In this case Perfetto stumbled on parsing
> field:char comm[TASK_COMM_LEN];    offset:8;

Perfetto always expected that to be a number, so it must remain one.

> 
> We probably should define that 'field: value  offset: value'
> is an abi, but value-s can change.
> Say offset:8 becomes offset:+8, for whatever reason.
> If Perfetto fails to parse it, it's a Perfetto bug.
> 
> In this case it failed to parse char comm[TASK_COMM_LEN];
> But that's not the only such "field:".
> These three were there for a long time.
>     field:u32 rates[NUM_NL80211_BANDS];    offset:16;    size:24;
>     field:int mcast_rate[NUM_NL80211_BANDS];    offset:60;    size:24;
>     field:int mcast_rate[NUM_NL80211_BANDS];    offset:108;    size:24;
> 
> I suspect Perfetto didn't have a use case to parse them,
> so the bug stayed dormant and a change in TASK_COMM_LEN triggered it.

Correct. Perfetto picks and chooses what events it needs. It does not parse
all events. So it wouldn't notice these. In fact, some tools require these
fields to be numbers and have used the TRACE_EVENT_ENUM() to convert them.

But with this change, we can likely also remove the parsing of the fields
on boot up. Which is a good thing.


> 
> We can use Yafang's patch to do:
> -field:%.*s %s%s;
> +field:%.*s %s[%d];
> but it will affect both TASK_COMM_LEN and NUM_NL80211_BANDS.

Nothing appears to care about the NUM_NL80211_BANDS being there. It's
basically useless information. If nothing complains about it changing, then
it isn't a breakage of user space.

Remember, Linus's rule is not "do not modify user space interfaces", it is
"don't break user space". If a user space interface changes and no user
space tool notices, did it really break? The tree in a forest analogy.

> 
> In summary the TASK_COMM_LEN change from #define to enum didn't
> introduce anything new in the kind of "value"s being printed
> in ftrace files. Hence I'm arguing there is no abi breakage.
> 
> There was a question why change from #define to enum is useful
> to bpf. The reason is that #define-s are not seen in dwarf
> whereas enums and their values are. bpf tooling has ways to extract
> that data and auto-adjust bpf programs when enum-s disappear
> or their values change.

In most cases, having all the [xxx] turn into useful numbers is what we
want. There's a few things broken with the current patch, but I can help
fix those.

-- Steve
Steven Rostedt Feb. 11, 2023, 7:34 p.m. UTC | #5
On Fri, 10 Feb 2023 11:27:18 -0500
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:

> > --- a/include/trace/stages/stage4_event_fields.h
> > +++ b/include/trace/stages/stage4_event_fields.h
> > @@ -26,7 +26,8 @@
> >   #define __array(_type, _item, _len) {					\
> >   	.type = #_type"["__stringify(_len)"]", .name = #_item,		\  
> 
> Just out of curiosity, is the content of __stringify(_len) ever used 
> after this patch ? Perhaps we could remove it and just leave:
> 
> .type = #_type"[]"
> 
> considering that f_show appears to use the opening square bracket to 
> detect arrays. This would remove a few useless bytes of data.

I agree that we can optimize this. But lets make that a separate patch, and
not worry about it for this. That's a clean up that can be handled later.

I'd like to not add any more side effects than it may already have.

-- Steve
Mathieu Desnoyers Feb. 11, 2023, 7:35 p.m. UTC | #6
On 2023-02-11 14:34, Steven Rostedt wrote:
> On Fri, 10 Feb 2023 11:27:18 -0500
> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> 
>>> --- a/include/trace/stages/stage4_event_fields.h
>>> +++ b/include/trace/stages/stage4_event_fields.h
>>> @@ -26,7 +26,8 @@
>>>    #define __array(_type, _item, _len) {					\
>>>    	.type = #_type"["__stringify(_len)"]", .name = #_item,		\
>>
>> Just out of curiosity, is the content of __stringify(_len) ever used
>> after this patch ? Perhaps we could remove it and just leave:
>>
>> .type = #_type"[]"
>>
>> considering that f_show appears to use the opening square bracket to
>> detect arrays. This would remove a few useless bytes of data.
> 
> I agree that we can optimize this. But lets make that a separate patch, and
> not worry about it for this. That's a clean up that can be handled later.
> 
> I'd like to not add any more side effects than it may already have.

Fully agreed, hence my "just out of curiosity". :)

Thanks,

Mathieu
Steven Rostedt Feb. 11, 2023, 8:23 p.m. UTC | #7
On Fri, 10 Feb 2023 15:59:21 +0000
Yafang Shao <laoar.shao@gmail.com> wrote:

> After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"),
> the content of the format file under
> /sys/kernel/debug/tracing/events/task/task_newtask was changed from
>   field:char comm[16];    offset:12;    size:16;    signed:0;
> to
>   field:char comm[TASK_COMM_LEN];    offset:12;    size:16;    signed:0;
> 
> John reported that this change breaks older versions of perfetto.
> Then Mathieu pointed out that this behavioral change was caused by the
> use of __stringify(_len), which happens to work on macros, but not on enum
> labels. And he also gave the suggestion on how to fix it:
>   :One possible solution to make this more robust would be to extend
>   :struct trace_event_fields with one more field that indicates the length
>   :of an array as an actual integer, without storing it in its stringified
>   :form in the type, and do the formatting in f_show where it belongs.
> 
> The result as follows after this change,
> $ cat /sys/kernel/debug/tracing/events/task/task_newtask/format
>         field:char comm[16];    offset:12;      size:16;        signed:0;
> 
> Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN")
> Reported-by: John Stultz <jstultz@google.com>
> Debugged-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Cc: Kajetan Puchalski <kajetan.puchalski@arm.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: John Stultz <jstultz@google.com>
> Cc: stable@vger.kernel.org # v5.17+
> ---

The below patch appears to not break[*] anything, as I did a diff off
all events, before and after, and it looks like a nice clean up!

I had to cover the "ftrace events" which are not trace events (for example,
trace_printk, function, function_graph). They are created by their own
macros that are similar. But your original patch broke them.

Please use this patch, and resubmit. I'll push it through my tree after
it goes through all my normal testing. You can include:

Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>

[*] I haven't run it through all my tests yet.

Thanks.

-- Steve

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 4342e996bcdb..0e373222a6df 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -270,6 +270,7 @@ struct trace_event_fields {
 			const int  align;
 			const int  is_signed;
 			const int  filter_type;
+			const int  len;
 		};
 		int (*define_fields)(struct trace_event_call *);
 	};
diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
index affd541fd25e..306f39aab480 100644
--- a/include/trace/stages/stage4_event_fields.h
+++ b/include/trace/stages/stage4_event_fields.h
@@ -26,7 +26,8 @@
 #define __array(_type, _item, _len) {					\
 	.type = #_type"["__stringify(_len)"]", .name = #_item,		\
 	.size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type),	\
-	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER, \
+	.len = _len },
 
 #undef __dynamic_array
 #define __dynamic_array(_type, _item, _len) {				\
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f10bf804dd2b..f3aae2be1d53 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1286,6 +1286,7 @@ struct ftrace_event_field {
 	int			offset;
 	int			size;
 	int			is_signed;
+	int			len;
 };
 
 struct prog_entry;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index cf3fd74fa675..60ab2f2108fe 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -114,7 +114,7 @@ trace_find_event_field(struct trace_event_call *call, char *name)
 
 static int __trace_define_field(struct list_head *head, const char *type,
 				const char *name, int offset, int size,
-				int is_signed, int filter_type)
+				int is_signed, int filter_type, int len)
 {
 	struct ftrace_event_field *field;
 
@@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
 	field->offset = offset;
 	field->size = size;
 	field->is_signed = is_signed;
+	field->len = len;
 
 	list_add(&field->link, head);
 
@@ -150,14 +151,28 @@ int trace_define_field(struct trace_event_call *call, const char *type,
 
 	head = trace_get_fields(call);
 	return __trace_define_field(head, type, name, offset, size,
-				    is_signed, filter_type);
+				    is_signed, filter_type, 0);
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
+int trace_define_field_ext(struct trace_event_call *call, const char *type,
+		       const char *name, int offset, int size, int is_signed,
+		       int filter_type, int len)
+{
+	struct list_head *head;
+
+	if (WARN_ON(!call->class))
+		return 0;
+
+	head = trace_get_fields(call);
+	return __trace_define_field(head, type, name, offset, size,
+				    is_signed, filter_type, len);
+}
+
 #define __generic_field(type, item, filter_type)			\
 	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
 				   #item, 0, 0, is_signed_type(type),	\
-				   filter_type);			\
+				   filter_type, 0);			\
 	if (ret)							\
 		return ret;
 
@@ -166,7 +181,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 				   "common_" #item,			\
 				   offsetof(typeof(ent), item),		\
 				   sizeof(ent.item),			\
-				   is_signed_type(type), FILTER_OTHER);	\
+				   is_signed_type(type), FILTER_OTHER, 0);	\
 	if (ret)							\
 		return ret;
 
@@ -1588,12 +1603,17 @@ static int f_show(struct seq_file *m, void *v)
 		seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
 			   field->type, field->name, field->offset,
 			   field->size, !!field->is_signed);
-	else
-		seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+	else if (field->len)
+		seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
 			   (int)(array_descriptor - field->type),
 			   field->type, field->name,
-			   array_descriptor, field->offset,
+			   field->len, field->offset,
 			   field->size, !!field->is_signed);
+	else
+		seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+			   (int)(array_descriptor - field->type),
+			   field->type, field->name,
+			   field->offset, field->size, !!field->is_signed);
 
 	return 0;
 }
@@ -2379,9 +2399,10 @@ event_define_fields(struct trace_event_call *call)
 			}
 
 			offset = ALIGN(offset, field->align);
-			ret = trace_define_field(call, field->type, field->name,
+			ret = trace_define_field_ext(call, field->type, field->name,
 						 offset, field->size,
-						 field->is_signed, field->filter_type);
+						 field->is_signed, field->filter_type,
+						 field->len);
 			if (WARN_ON_ONCE(ret)) {
 				pr_err("error code is %d\n", ret);
 				break;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d960f6b11b5e..9d0a6ea4c076 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -111,7 +111,8 @@ static void __always_unused ____ftrace_check_##name(void)		\
 #define __array(_type, _item, _len) {					\
 	.type = #_type"["__stringify(_len)"]", .name = #_item,		\
 	.size = sizeof(_type[_len]), .align = __alignof__(_type),	\
-	is_signed_type(_type), .filter_type = FILTER_OTHER },
+	is_signed_type(_type), .filter_type = FILTER_OTHER,		\
+	.len = _len },							\
 
 #undef __array_desc
 #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
Yafang Shao Feb. 12, 2023, 11:42 a.m. UTC | #8
On Sun, Feb 12, 2023 at 4:23 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 10 Feb 2023 15:59:21 +0000
> Yafang Shao <laoar.shao@gmail.com> wrote:
>
> > After commit 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN"),
> > the content of the format file under
> > /sys/kernel/debug/tracing/events/task/task_newtask was changed from
> >   field:char comm[16];    offset:12;    size:16;    signed:0;
> > to
> >   field:char comm[TASK_COMM_LEN];    offset:12;    size:16;    signed:0;
> >
> > John reported that this change breaks older versions of perfetto.
> > Then Mathieu pointed out that this behavioral change was caused by the
> > use of __stringify(_len), which happens to work on macros, but not on enum
> > labels. And he also gave the suggestion on how to fix it:
> >   :One possible solution to make this more robust would be to extend
> >   :struct trace_event_fields with one more field that indicates the length
> >   :of an array as an actual integer, without storing it in its stringified
> >   :form in the type, and do the formatting in f_show where it belongs.
> >
> > The result as follows after this change,
> > $ cat /sys/kernel/debug/tracing/events/task/task_newtask/format
> >         field:char comm[16];    offset:12;      size:16;        signed:0;
> >
> > Fixes: 3087c61ed2c4 ("tools/testing/selftests/bpf: replace open-coded 16 with TASK_COMM_LEN")
> > Reported-by: John Stultz <jstultz@google.com>
> > Debugged-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> > Cc: Kajetan Puchalski <kajetan.puchalski@arm.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: John Stultz <jstultz@google.com>
> > Cc: stable@vger.kernel.org # v5.17+
> > ---
>
> The below patch appears to not break[*] anything, as I did a diff off
> all events, before and after, and it looks like a nice clean up!
>
> I had to cover the "ftrace events" which are not trace events (for example,
> trace_printk, function, function_graph). They are created by their own
> macros that are similar. But your original patch broke them.
>
> Please use this patch, and resubmit. I'll push it through my tree after
> it goes through all my normal testing. You can include:
>
> Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
>

Thanks for the suggestion. I will do it.

> [*] I haven't run it through all my tests yet.

I will run selftests/ftrace before sending it.

>
> Thanks.
>
> -- Steve
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 4342e996bcdb..0e373222a6df 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -270,6 +270,7 @@ struct trace_event_fields {
>                         const int  align;
>                         const int  is_signed;
>                         const int  filter_type;
> +                       const int  len;
>                 };
>                 int (*define_fields)(struct trace_event_call *);
>         };
> diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
> index affd541fd25e..306f39aab480 100644
> --- a/include/trace/stages/stage4_event_fields.h
> +++ b/include/trace/stages/stage4_event_fields.h
> @@ -26,7 +26,8 @@
>  #define __array(_type, _item, _len) {                                  \
>         .type = #_type"["__stringify(_len)"]", .name = #_item,          \
>         .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \
> -       .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
> +       .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER, \
> +       .len = _len },
>
>  #undef __dynamic_array
>  #define __dynamic_array(_type, _item, _len) {                          \
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index f10bf804dd2b..f3aae2be1d53 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -1286,6 +1286,7 @@ struct ftrace_event_field {
>         int                     offset;
>         int                     size;
>         int                     is_signed;
> +       int                     len;
>  };
>
>  struct prog_entry;
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index cf3fd74fa675..60ab2f2108fe 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -114,7 +114,7 @@ trace_find_event_field(struct trace_event_call *call, char *name)
>
>  static int __trace_define_field(struct list_head *head, const char *type,
>                                 const char *name, int offset, int size,
> -                               int is_signed, int filter_type)
> +                               int is_signed, int filter_type, int len)
>  {
>         struct ftrace_event_field *field;
>
> @@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
>         field->offset = offset;
>         field->size = size;
>         field->is_signed = is_signed;
> +       field->len = len;
>
>         list_add(&field->link, head);
>
> @@ -150,14 +151,28 @@ int trace_define_field(struct trace_event_call *call, const char *type,
>
>         head = trace_get_fields(call);
>         return __trace_define_field(head, type, name, offset, size,
> -                                   is_signed, filter_type);
> +                                   is_signed, filter_type, 0);
>  }
>  EXPORT_SYMBOL_GPL(trace_define_field);
>
> +int trace_define_field_ext(struct trace_event_call *call, const char *type,
> +                      const char *name, int offset, int size, int is_signed,
> +                      int filter_type, int len)
> +{
> +       struct list_head *head;
> +
> +       if (WARN_ON(!call->class))
> +               return 0;
> +
> +       head = trace_get_fields(call);
> +       return __trace_define_field(head, type, name, offset, size,
> +                                   is_signed, filter_type, len);
> +}
> +
>  #define __generic_field(type, item, filter_type)                       \
>         ret = __trace_define_field(&ftrace_generic_fields, #type,       \
>                                    #item, 0, 0, is_signed_type(type),   \
> -                                  filter_type);                        \
> +                                  filter_type, 0);                     \
>         if (ret)                                                        \
>                 return ret;
>
> @@ -166,7 +181,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
>                                    "common_" #item,                     \
>                                    offsetof(typeof(ent), item),         \
>                                    sizeof(ent.item),                    \
> -                                  is_signed_type(type), FILTER_OTHER); \
> +                                  is_signed_type(type), FILTER_OTHER, 0);      \
>         if (ret)                                                        \
>                 return ret;
>
> @@ -1588,12 +1603,17 @@ static int f_show(struct seq_file *m, void *v)
>                 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
>                            field->type, field->name, field->offset,
>                            field->size, !!field->is_signed);
> -       else
> -               seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
> +       else if (field->len)
> +               seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
>                            (int)(array_descriptor - field->type),
>                            field->type, field->name,
> -                          array_descriptor, field->offset,
> +                          field->len, field->offset,
>                            field->size, !!field->is_signed);
> +       else
> +               seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
> +                          (int)(array_descriptor - field->type),
> +                          field->type, field->name,
> +                          field->offset, field->size, !!field->is_signed);
>
>         return 0;
>  }
> @@ -2379,9 +2399,10 @@ event_define_fields(struct trace_event_call *call)
>                         }
>
>                         offset = ALIGN(offset, field->align);
> -                       ret = trace_define_field(call, field->type, field->name,
> +                       ret = trace_define_field_ext(call, field->type, field->name,
>                                                  offset, field->size,
> -                                                field->is_signed, field->filter_type);
> +                                                field->is_signed, field->filter_type,
> +                                                field->len);
>                         if (WARN_ON_ONCE(ret)) {
>                                 pr_err("error code is %d\n", ret);
>                                 break;
> diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
> index d960f6b11b5e..9d0a6ea4c076 100644
> --- a/kernel/trace/trace_export.c
> +++ b/kernel/trace/trace_export.c
> @@ -111,7 +111,8 @@ static void __always_unused ____ftrace_check_##name(void)           \
>  #define __array(_type, _item, _len) {                                  \
>         .type = #_type"["__stringify(_len)"]", .name = #_item,          \
>         .size = sizeof(_type[_len]), .align = __alignof__(_type),       \
> -       is_signed_type(_type), .filter_type = FILTER_OTHER },
> +       is_signed_type(_type), .filter_type = FILTER_OTHER,             \
> +       .len = _len },                                                  \
>
>  #undef __array_desc
>  #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
diff mbox series

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 4342e99..0e37322 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -270,6 +270,7 @@  struct trace_event_fields {
 			const int  align;
 			const int  is_signed;
 			const int  filter_type;
+			const int  len;
 		};
 		int (*define_fields)(struct trace_event_call *);
 	};
diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
index affd541..306f39a 100644
--- a/include/trace/stages/stage4_event_fields.h
+++ b/include/trace/stages/stage4_event_fields.h
@@ -26,7 +26,8 @@ 
 #define __array(_type, _item, _len) {					\
 	.type = #_type"["__stringify(_len)"]", .name = #_item,		\
 	.size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type),	\
-	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER, \
+	.len = _len},
 
 #undef __dynamic_array
 #define __dynamic_array(_type, _item, _len) {				\
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e46a492..19caf15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1282,6 +1282,7 @@  struct ftrace_event_field {
 	int			offset;
 	int			size;
 	int			is_signed;
+	int			len;
 };
 
 struct prog_entry;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 33e0b4f..70f6725 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -114,7 +114,7 @@  struct ftrace_event_field *
 
 static int __trace_define_field(struct list_head *head, const char *type,
 				const char *name, int offset, int size,
-				int is_signed, int filter_type)
+				int is_signed, int filter_type, int len)
 {
 	struct ftrace_event_field *field;
 
@@ -133,6 +133,7 @@  static int __trace_define_field(struct list_head *head, const char *type,
 	field->offset = offset;
 	field->size = size;
 	field->is_signed = is_signed;
+	field->len = len;
 
 	list_add(&field->link, head);
 
@@ -150,14 +151,14 @@  int trace_define_field(struct trace_event_call *call, const char *type,
 
 	head = trace_get_fields(call);
 	return __trace_define_field(head, type, name, offset, size,
-				    is_signed, filter_type);
+				    is_signed, filter_type, 0);
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
 #define __generic_field(type, item, filter_type)			\
 	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
 				   #item, 0, 0, is_signed_type(type),	\
-				   filter_type);			\
+				   filter_type, 0);			\
 	if (ret)							\
 		return ret;
 
@@ -166,7 +167,7 @@  int trace_define_field(struct trace_event_call *call, const char *type,
 				   "common_" #item,			\
 				   offsetof(typeof(ent), item),		\
 				   sizeof(ent.item),			\
-				   is_signed_type(type), FILTER_OTHER);	\
+				   is_signed_type(type), FILTER_OTHER, 0);	\
 	if (ret)							\
 		return ret;
 
@@ -1589,10 +1590,10 @@  static int f_show(struct seq_file *m, void *v)
 			   field->type, field->name, field->offset,
 			   field->size, !!field->is_signed);
 	else
-		seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+		seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
 			   (int)(array_descriptor - field->type),
 			   field->type, field->name,
-			   array_descriptor, field->offset,
+			   field->len, field->offset,
 			   field->size, !!field->is_signed);
 
 	return 0;
@@ -2371,6 +2372,7 @@  static int ftrace_event_release(struct inode *inode, struct file *file)
 	if (list_empty(head)) {
 		struct trace_event_fields *field = call->class->fields_array;
 		unsigned int offset = sizeof(struct trace_entry);
+		struct list_head *head;
 
 		for (; field->type; field++) {
 			if (field->type == TRACE_FUNCTION_TYPE) {
@@ -2379,9 +2381,15 @@  static int ftrace_event_release(struct inode *inode, struct file *file)
 			}
 
 			offset = ALIGN(offset, field->align);
-			ret = trace_define_field(call, field->type, field->name,
+			if (WARN_ON(!call->class)) {
+				offset += field->size;
+				continue;
+			}
+			head = trace_get_fields(call);
+			ret = __trace_define_field(head, field->type, field->name,
 						 offset, field->size,
-						 field->is_signed, field->filter_type);
+						 field->is_signed, field->filter_type,
+						 field->len);
 			if (WARN_ON_ONCE(ret)) {
 				pr_err("error code is %d\n", ret);
 				break;