diff mbox series

[v6,1/2] libtraceevent: sync state char array with the kernel

Message ID 20230803030955.380537-1-zegao@tencent.com (mailing list archive)
State Accepted
Commit 9b2e543f91f7b49f5900b55eb4558bf083b2f163
Headers show
Series [v6,1/2] libtraceevent: sync state char array with the kernel | expand

Commit Message

Ze Gao Aug. 3, 2023, 3:09 a.m. UTC
Update state char array to match the latest kernel
definitions.

Signed-off-by: Ze Gao <zegao@tencent.com>
---
 plugins/plugin_sched_switch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Steven Rostedt Dec. 6, 2023, 10:55 p.m. UTC | #1
On Wed,  2 Aug 2023 23:09:54 -0400
Ze Gao <zegao2021@gmail.com> wrote:

> Update state char array to match the latest kernel
> definitions.
> 
> Signed-off-by: Ze Gao <zegao@tencent.com>
> ---
>  plugins/plugin_sched_switch.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
> index 8752cae..e0986ac 100644
> --- a/plugins/plugin_sched_switch.c
> +++ b/plugins/plugin_sched_switch.c
> @@ -11,7 +11,7 @@
>  
>  static void write_state(struct trace_seq *s, int val)
>  {
> -	const char states[] = "SDTtZXxW";
> +	const char states[] = "SDTtXZPI";
>  	int found = 0;
>  	int i;
>  

Hi Ze,

As your update never made it into the kernel I never pulled these patches
in. But we do have an issue with the state getting out of sync with what
the kernel actually uses.

I did this "hack" but it seems to work. What I do is to look at the
sched_switch event print parsing arguments, find the __print_flags
arguments for the "prev_state" field. And then parse them, creating a new
output and I use that for the write_state() function.

I guess this should work for you too?

I still need to pull in this patch and use that as the default. I'll do
that before applying this one, and have your above update be the default.

Thanks!

-- Steve

diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
index 8752caea5c38..760d224f5507 100644
--- a/plugins/plugin_sched_switch.c
+++ b/plugins/plugin_sched_switch.c
@@ -9,13 +9,132 @@
 #include "event-parse.h"
 #include "trace-seq.h"
 
-static void write_state(struct trace_seq *s, int val)
+static const char *convert_sym(struct tep_print_flag_sym *sym)
 {
-	const char states[] = "SDTtZXxW";
+	static char save_states[33];
+
+	memset(save_states, 0, sizeof(save_states));
+
+	/* This is the flags for the prev_state_field, now make them into a string */
+	for (; sym; sym = sym->next) {
+		long bitmask = strtoul(sym->value, NULL, 0);
+		int i;
+
+		for (i = 0; !(bitmask & 1); i++)
+			bitmask >>= 1;
+
+		if (i > 32)
+			continue; // warn?
+
+		save_states[i] = sym->str[0];
+	}
+
+	return save_states;
+}
+
+static struct tep_print_arg_field *
+find_arg_field(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
+{
+	struct tep_print_arg_field *field;
+
+	if (!arg)
+		return NULL;
+
+	if (arg->type == TEP_PRINT_FIELD)
+		return &arg->field;
+
+	if (arg->type == TEP_PRINT_OP) {
+		field = find_arg_field(prev_state_field, arg->op.left);
+		if (field && field->field == prev_state_field)
+			return field;
+		field = find_arg_field(prev_state_field, arg->op.right);
+		if (field && field->field == prev_state_field)
+			return field;
+	}
+	return NULL;
+}
+
+static struct tep_print_flag_sym *
+test_flags(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
+{
+	struct tep_print_arg_field *field;
+
+	field = find_arg_field(prev_state_field, arg->flags.field);
+	if (!field)
+		return NULL;
+
+	return arg->flags.flags;
+}
+
+static struct tep_print_flag_sym *
+search_op(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
+{
+	struct tep_print_flag_sym *sym = NULL;
+
+	if (!arg)
+		return NULL;
+
+	if (arg->type == TEP_PRINT_OP) {
+		sym = search_op(prev_state_field, arg->op.left);
+		if (sym)
+			return sym;
+
+		sym = search_op(prev_state_field, arg->op.right);
+		if (sym)
+			return sym;
+	} else if (arg->type == TEP_PRINT_FLAGS) {
+		sym = test_flags(prev_state_field, arg);
+	}
+
+	return sym;
+}
+
+static const char *get_states(struct tep_format_field *prev_state_field)
+{
+	struct tep_print_flag_sym *sym;
+	struct tep_print_arg *arg;
+	struct tep_event *event;
+
+	event = prev_state_field->event;
+
+	/*
+	 * Look at the event format fields, and search for where
+	 * the prev_state is parsed via the format flags.
+	 */
+	for (arg = event->print_fmt.args; arg; arg = arg->next) {
+		/*
+		 * Currently, the __print_flags() for the prev_state
+		 * is embedded in operations, so they too must be
+		 * searched.
+		 */
+		sym = search_op(prev_state_field, arg);
+		if (sym)
+			return convert_sym(sym);
+	}
+	return NULL;
+}
+
+static void write_state(struct trace_seq *s, struct tep_format_field *field,
+			struct tep_record *record)
+{
+	static struct tep_format_field *prev_state_field;
+	static const char *states;
+	unsigned long long val;
 	int found = 0;
+	int len;
 	int i;
 
-	for (i = 0; i < (sizeof(states) - 1); i++) {
+	if (field != prev_state_field) {
+		states = get_states(field);
+		if (!states)
+			states = "SDTtZXxW";
+		prev_state_field = field;
+	}
+
+	tep_read_number_field(field, record->data, &val);
+
+	len = strlen(states);
+	for (i = 0; i < len; i++) {
 		if (!(val & (1 << i)))
 			continue;
 
@@ -99,8 +218,8 @@ static int sched_switch_handler(struct trace_seq *s,
 	if (tep_get_field_val(s, event, "prev_prio", record, &val, 1) == 0)
 		trace_seq_printf(s, "[%d] ", (int) val);
 
-	if (tep_get_field_val(s,  event, "prev_state", record, &val, 1) == 0)
-		write_state(s, val);
+	field = tep_find_any_field(event, "prev_state");
+	write_state(s, field, record);
 
 	trace_seq_puts(s, " ==> ");
Ze Gao Dec. 7, 2023, 2:52 a.m. UTC | #2
On Thu, Dec 7, 2023 at 6:54 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed,  2 Aug 2023 23:09:54 -0400
> Ze Gao <zegao2021@gmail.com> wrote:
>
> > Update state char array to match the latest kernel
> > definitions.
> >
> > Signed-off-by: Ze Gao <zegao@tencent.com>
> > ---
> >  plugins/plugin_sched_switch.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
> > index 8752cae..e0986ac 100644
> > --- a/plugins/plugin_sched_switch.c
> > +++ b/plugins/plugin_sched_switch.c
> > @@ -11,7 +11,7 @@
> >
> >  static void write_state(struct trace_seq *s, int val)
> >  {
> > -     const char states[] = "SDTtZXxW";
> > +     const char states[] = "SDTtXZPI";
> >       int found = 0;
> >       int i;
> >
>
> Hi Ze,
>
> As your update never made it into the kernel I never pulled these patches
> in. But we do have an issue with the state getting out of sync with what
> the kernel actually uses.
>
> I did this "hack" but it seems to work. What I do is to look at the
> sched_switch event print parsing arguments, find the __print_flags
> arguments for the "prev_state" field. And then parse them, creating a new
> output and I use that for the write_state() function.

Thanks for your updates, I remembered to reply with my "hacks"  which
fix both libtraceevent and "perf sched timehist --state".
FYI,  for libtraceevent in this thread:
https://lore.kernel.org/all/CAD8CoPDqZrd2HOUCVkrOq9JgJEQwQnqBn9zD1A3M4aKLZ4mLvg@mail.gmail.com
and here for perf:
https://lore.kernel.org/all/CAD8CoPDqZrd2HOUCVkrOq9JgJEQwQnqBn9zD1A3M4aKLZ4mLvg@mail.gmail.com/

My only concern at that time was the very hacky way to pinpoint the
TEP_PRINT_FLAGS due to the lack of knowledge of libtraceevent design.
So I was not actively seeking to push my fixes to the upstream.

> I guess this should work for you too?

I'll give it a try definitely.

> I still need to pull in this patch and use that as the default. I'll do
> that before applying this one, and have your above update be the default.
>
> Thanks!


> -- Steve
>
> diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
> index 8752caea5c38..760d224f5507 100644
> --- a/plugins/plugin_sched_switch.c
> +++ b/plugins/plugin_sched_switch.c
> @@ -9,13 +9,132 @@
>  #include "event-parse.h"
>  #include "trace-seq.h"
>
> -static void write_state(struct trace_seq *s, int val)
> +static const char *convert_sym(struct tep_print_flag_sym *sym)
>  {
> -       const char states[] = "SDTtZXxW";
> +       static char save_states[33];

how about a gobal counter instead of fixed sized constant here,
and we can warn if it outbounds, u know, just in case.

> +       memset(save_states, 0, sizeof(save_states));
> +
> +       /* This is the flags for the prev_state_field, now make them into a string */
> +       for (; sym; sym = sym->next) {
> +               long bitmask = strtoul(sym->value, NULL, 0);
> +               int i;
> +
> +               for (i = 0; !(bitmask & 1); i++)
> +                       bitmask >>= 1;
> +
> +               if (i > 32)
> +                       continue; // warn?
> +
> +               save_states[i] = sym->str[0];
> +       }
> +
> +       return save_states;
> +}
> +
> +static struct tep_print_arg_field *
> +find_arg_field(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
> +{
> +       struct tep_print_arg_field *field;
> +
> +       if (!arg)
> +               return NULL;
> +
> +       if (arg->type == TEP_PRINT_FIELD)
> +               return &arg->field;
> +
> +       if (arg->type == TEP_PRINT_OP) {
> +               field = find_arg_field(prev_state_field, arg->op.left);
> +               if (field && field->field == prev_state_field)
> +                       return field;
> +               field = find_arg_field(prev_state_field, arg->op.right);
> +               if (field && field->field == prev_state_field)
> +                       return field;
> +       }
> +       return NULL;
> +}
> +
> +static struct tep_print_flag_sym *
> +test_flags(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
> +{
> +       struct tep_print_arg_field *field;
> +
> +       field = find_arg_field(prev_state_field, arg->flags.field);
> +       if (!field)
> +               return NULL;
> +
> +       return arg->flags.flags;
> +}
> +
> +static struct tep_print_flag_sym *
> +search_op(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
> +{
> +       struct tep_print_flag_sym *sym = NULL;
> +
> +       if (!arg)
> +               return NULL;
> +
> +       if (arg->type == TEP_PRINT_OP) {
> +               sym = search_op(prev_state_field, arg->op.left);
> +               if (sym)
> +                       return sym;
> +
> +               sym = search_op(prev_state_field, arg->op.right);
> +               if (sym)
> +                       return sym;
> +       } else if (arg->type == TEP_PRINT_FLAGS) {
> +               sym = test_flags(prev_state_field, arg);
> +       }
> +
> +       return sym;
> +}
> +
> +static const char *get_states(struct tep_format_field *prev_state_field)
> +{
> +       struct tep_print_flag_sym *sym;
> +       struct tep_print_arg *arg;
> +       struct tep_event *event;
> +
> +       event = prev_state_field->event;
> +
> +       /*
> +        * Look at the event format fields, and search for where
> +        * the prev_state is parsed via the format flags.
> +        */
> +       for (arg = event->print_fmt.args; arg; arg = arg->next) {
> +               /*
> +                * Currently, the __print_flags() for the prev_state
> +                * is embedded in operations, so they too must be
> +                * searched.
> +                */
> +               sym = search_op(prev_state_field, arg);
> +               if (sym)
> +                       return convert_sym(sym);
> +       }
> +       return NULL;
> +}
> +
> +static void write_state(struct trace_seq *s, struct tep_format_field *field,
> +                       struct tep_record *record)
> +{
> +       static struct tep_format_field *prev_state_field;
> +       static const char *states;
> +       unsigned long long val;
>         int found = 0;
> +       int len;
>         int i;
>
> -       for (i = 0; i < (sizeof(states) - 1); i++) {
> +       if (field != prev_state_field) {
> +               states = get_states(field);

I think we only need to build the task_state_char_array once,
and there are no other possibilities here. Correct me if I'm stupid :)
So here we can check if the char array is built, if so, we can
directly look it up in the mapping, right?

> +               if (!states)
> +                       states = "SDTtZXxW";

I think I've done experiments on the very old kernel in which the TP_print
is brought in the sched_switch tracepoint. The "hack" works for it too,
so this should not happen. Anyway, I'll give it a try and keep this
thread updated
if anything goes wrong.


Regards,
Ze Gao

> +               prev_state_field = field;
> +       }
> +
> +       tep_read_number_field(field, record->data, &val);
> +
> +       len = strlen(states);
> +       for (i = 0; i < len; i++) {
>                 if (!(val & (1 << i)))
>                         continue;
>
> @@ -99,8 +218,8 @@ static int sched_switch_handler(struct trace_seq *s,
>         if (tep_get_field_val(s, event, "prev_prio", record, &val, 1) == 0)
>                 trace_seq_printf(s, "[%d] ", (int) val);
>
> -       if (tep_get_field_val(s,  event, "prev_state", record, &val, 1) == 0)
> -               write_state(s, val);
> +       field = tep_find_any_field(event, "prev_state");
> +       write_state(s, field, record);
>
>         trace_seq_puts(s, " ==> ");
>
Steven Rostedt Dec. 7, 2023, 1:20 p.m. UTC | #3
On Thu, 7 Dec 2023 10:52:35 +0800
Ze Gao <zegao2021@gmail.com> wrote:

> On Thu, Dec 7, 2023 at 6:54 AM Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> > On Wed,  2 Aug 2023 23:09:54 -0400
> > Ze Gao <zegao2021@gmail.com> wrote:
> >  
> > As your update never made it into the kernel I never pulled these patches
> > in. But we do have an issue with the state getting out of sync with what
> > the kernel actually uses.
> >
> > I did this "hack" but it seems to work. What I do is to look at the
> > sched_switch event print parsing arguments, find the __print_flags
> > arguments for the "prev_state" field. And then parse them, creating a new
> > output and I use that for the write_state() function.  
> 
> Thanks for your updates, I remembered to reply with my "hacks"  which
> fix both libtraceevent and "perf sched timehist --state".
> FYI,  for libtraceevent in this thread:
> https://lore.kernel.org/all/CAD8CoPDqZrd2HOUCVkrOq9JgJEQwQnqBn9zD1A3M4aKLZ4mLvg@mail.gmail.com

And somehow I missed this reply :-/

Sorry about that.

> and here for perf:
> https://lore.kernel.org/all/CAD8CoPDqZrd2HOUCVkrOq9JgJEQwQnqBn9zD1A3M4aKLZ4mLvg@mail.gmail.com/
> 
> My only concern at that time was the very hacky way to pinpoint the
> TEP_PRINT_FLAGS due to the lack of knowledge of libtraceevent design.
> So I was not actively seeking to push my fixes to the upstream.
> 
> > I guess this should work for you too?  
> 
> I'll give it a try definitely.
> 
> > I still need to pull in this patch and use that as the default. I'll do
> > that before applying this one, and have your above update be the default.
> >
> > Thanks!  
> 
> 
> > -- Steve
> >
> > diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
> > index 8752caea5c38..760d224f5507 100644
> > --- a/plugins/plugin_sched_switch.c
> > +++ b/plugins/plugin_sched_switch.c
> > @@ -9,13 +9,132 @@
> >  #include "event-parse.h"
> >  #include "trace-seq.h"
> >
> > -static void write_state(struct trace_seq *s, int val)
> > +static const char *convert_sym(struct tep_print_flag_sym *sym)
> >  {
> > -       const char states[] = "SDTtZXxW";
> > +       static char save_states[33];  
> 
> how about a gobal counter instead of fixed sized constant here,
> and we can warn if it outbounds, u know, just in case.

I picked 33 to be 1 greater than 32 as that's the max size of prev_state on
32 bit systems. I should change that to be a macro though.

> 
> > +       memset(save_states, 0, sizeof(save_states));
> > +
> > +       /* This is the flags for the prev_state_field, now make them into a string */
> > +       for (; sym; sym = sym->next) {
> > +               long bitmask = strtoul(sym->value, NULL, 0);
> > +               int i;
> > +
> > +               for (i = 0; !(bitmask & 1); i++)
> > +                       bitmask >>= 1;
> > +
> > +               if (i > 32)
> > +                       continue; // warn?
> > +
> > +               save_states[i] = sym->str[0];
> > +       }
> > +
> > +       return save_states;
> > +}
> > +


> > +static void write_state(struct trace_seq *s, struct tep_format_field *field,
> > +                       struct tep_record *record)
> > +{
> > +       static struct tep_format_field *prev_state_field;
> > +       static const char *states;
> > +       unsigned long long val;
> >         int found = 0;
> > +       int len;
> >         int i;
> >
> > -       for (i = 0; i < (sizeof(states) - 1); i++) {
> > +       if (field != prev_state_field) {
> > +               states = get_states(field);  
> 
> I think we only need to build the task_state_char_array once,
> and there are no other possibilities here. Correct me if I'm stupid :)

libtracecmd can be used for multiple trace.dat files, that in theory can
have more than one version of sched_switch. And yes this can be called with
two different versions of prev_state.

  trace-cmd report -i trace1.dat -i trace2.dat

This is commonly done to see host and guest trace data interleaved, where
the host and guest could have different versions of sched_switch.

> So here we can check if the char array is built, if so, we can
> directly look it up in the mapping, right?
> 
> > +               if (!states)
> > +                       states = "SDTtZXxW";  
> 
> I think I've done experiments on the very old kernel in which the TP_print
> is brought in the sched_switch tracepoint. The "hack" works for it too,
> so this should not happen. Anyway, I'll give it a try and keep this

I agree that "this should not happen" but I prefer to be safe than sorry,
and always try to write robust code. If in never happens we just waste a
branch, if it does happen, it can give some weird result. I don't know what
is in the future.

> thread updated
> if anything goes wrong.

Thanks,

-- Steve
diff mbox series

Patch

diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
index 8752cae..e0986ac 100644
--- a/plugins/plugin_sched_switch.c
+++ b/plugins/plugin_sched_switch.c
@@ -11,7 +11,7 @@ 
 
 static void write_state(struct trace_seq *s, int val)
 {
-	const char states[] = "SDTtZXxW";
+	const char states[] = "SDTtXZPI";
 	int found = 0;
 	int i;