[v3,3/6] perf cs-etm: Support thread stack
diff mbox series

Message ID 20191005091614.11635-4-leo.yan@linaro.org
State New
Headers show
Series
  • perf cs-etm: Support thread stack and callchain
Related show

Commit Message

Leo Yan Oct. 5, 2019, 9:16 a.m. UTC
Since Arm CoreSight doesn't support thread stack, the decoding cannot
display symbols with indented spaces to reflect the stack depth.

This patch adds support thread stack for Arm CoreSight, this allows
'perf script' to display properly for option '-F,+callindent'.

Before:

  # perf script -F,+callindent
            main  2808          1          branches: coresight_test1                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
            main  2808          1          branches: printf@plt                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
            main  2808          1          branches: printf@plt                           aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
            main  2808          1          branches: _init                                aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
            main  2808          1          branches: _dl_fixup                            ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.so)
            main  2808          1          branches: _dl_lookup_symbol_x                  ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
  [...]

After:

  # perf script -F,+callindent
            main  2808          1          branches:                 coresight_test1                                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
            main  2808          1          branches:                 printf@plt                                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
            main  2808          1          branches:                     printf@plt                                       aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
            main  2808          1          branches:                     _init                                            aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
            main  2808          1          branches:                     _dl_fixup                                        ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.s
            main  2808          1          branches:                         _dl_lookup_symbol_x                          ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
  [...]

Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

Comments

Mathieu Poirier Oct. 11, 2019, 5:53 p.m. UTC | #1
On Sat, Oct 05, 2019 at 05:16:11PM +0800, Leo Yan wrote:
> Since Arm CoreSight doesn't support thread stack, the decoding cannot
> display symbols with indented spaces to reflect the stack depth.
> 
> This patch adds support thread stack for Arm CoreSight, this allows
> 'perf script' to display properly for option '-F,+callindent'.
> 
> Before:
> 
>   # perf script -F,+callindent
>             main  2808          1          branches: coresight_test1                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
>             main  2808          1          branches: printf@plt                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
>             main  2808          1          branches: printf@plt                           aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
>             main  2808          1          branches: _init                                aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
>             main  2808          1          branches: _dl_fixup                            ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.so)
>             main  2808          1          branches: _dl_lookup_symbol_x                  ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
>   [...]
> 
> After:
> 
>   # perf script -F,+callindent
>             main  2808          1          branches:                 coresight_test1                                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
>             main  2808          1          branches:                 printf@plt                                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
>             main  2808          1          branches:                     printf@plt                                       aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
>             main  2808          1          branches:                     _init                                            aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
>             main  2808          1          branches:                     _dl_fixup                                        ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.s
>             main  2808          1          branches:                         _dl_lookup_symbol_x                          ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
>   [...]
> 
> Signed-off-by: Leo Yan <leo.yan@linaro.org>
> ---
>  tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 44 insertions(+)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 58ceba7b91d5..780abbfd1833 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -1117,6 +1117,45 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
>  			   sample->insn_len, (void *)sample->insn);
>  }
>  
> +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
> +				    struct cs_etm_traceid_queue *tidq)
> +{
> +	struct cs_etm_auxtrace *etm = etmq->etm;
> +	u8 trace_chan_id = tidq->trace_chan_id;
> +	int insn_len;
> +	u64 from_ip, to_ip;
> +
> +	if (etm->synth_opts.thread_stack) {
> +		from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
> +		to_ip = cs_etm__first_executed_instr(tidq->packet);
> +
> +		insn_len = cs_etm__instr_size(etmq, trace_chan_id,
> +					      tidq->prev_packet->isa, from_ip);
> +
> +		/*
> +		 * Create thread stacks by keeping track of calls and returns;
> +		 * any call pushes thread stack, return pops the stack, and
> +		 * flush stack when the trace is discontinuous.
> +		 */
> +		thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
> +				    tidq->prev_packet->flags,
> +				    from_ip, to_ip, insn_len,
> +				    etmq->buffer->buffer_nr);

Details are a little fuzzy in my head but I'm pretty sure
we want trace_chan_id here.  


> +	} else {
> +		/*
> +		 * The thread stack can be output via thread_stack__process();
> +		 * thus the detailed information about paired calls and returns
> +		 * will be facilitated by Python script for the db-export.
> +		 *
> +		 * Need to set trace buffer number and flush thread stack if the
> +		 * trace buffer number has been alternate.
> +		 */
> +		thread_stack__set_trace_nr(tidq->thread,
> +					   tidq->prev_packet->cpu,
> +					   etmq->buffer->buffer_nr);

Same here.

> +	}
> +}
> +
>  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
>  					    struct cs_etm_traceid_queue *tidq,
>  					    u64 addr, u64 period)
> @@ -1393,6 +1432,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
>  		tidq->period_instructions = instrs_over;
>  	}
>  
> +	if (tidq->prev_packet->last_instr_taken_branch)
> +		cs_etm__add_stack_event(etmq, tidq);
> +
>  	if (etm->sample_branches) {
>  		bool generate_sample = false;
>  
> @@ -2593,6 +2635,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
>  		itrace_synth_opts__set_default(&etm->synth_opts,
>  				session->itrace_synth_opts->default_no_sample);
>  		etm->synth_opts.callchain = false;
> +		etm->synth_opts.thread_stack =
> +				session->itrace_synth_opts->thread_stack;
>  	}
>  
>  	err = cs_etm__synth_events(etm, session);
> -- 
> 2.17.1
>
Leo Yan Oct. 15, 2019, 3:33 a.m. UTC | #2
Hi Mathieu,

On Fri, Oct 11, 2019 at 11:53:53AM -0600, Mathieu Poirier wrote:
> On Sat, Oct 05, 2019 at 05:16:11PM +0800, Leo Yan wrote:
> > Since Arm CoreSight doesn't support thread stack, the decoding cannot
> > display symbols with indented spaces to reflect the stack depth.
> > 
> > This patch adds support thread stack for Arm CoreSight, this allows
> > 'perf script' to display properly for option '-F,+callindent'.
> > 
> > Before:
> > 
> >   # perf script -F,+callindent
> >             main  2808          1          branches: coresight_test1                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> >             main  2808          1          branches: printf@plt                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> >             main  2808          1          branches: printf@plt                           aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> >             main  2808          1          branches: _init                                aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> >             main  2808          1          branches: _dl_fixup                            ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.so)
> >             main  2808          1          branches: _dl_lookup_symbol_x                  ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> >   [...]
> > 
> > After:
> > 
> >   # perf script -F,+callindent
> >             main  2808          1          branches:                 coresight_test1                                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> >             main  2808          1          branches:                 printf@plt                                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> >             main  2808          1          branches:                     printf@plt                                       aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> >             main  2808          1          branches:                     _init                                            aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> >             main  2808          1          branches:                     _dl_fixup                                        ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.s
> >             main  2808          1          branches:                         _dl_lookup_symbol_x                          ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> >   [...]
> > 
> > Signed-off-by: Leo Yan <leo.yan@linaro.org>
> > ---
> >  tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 44 insertions(+)
> > 
> > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > index 58ceba7b91d5..780abbfd1833 100644
> > --- a/tools/perf/util/cs-etm.c
> > +++ b/tools/perf/util/cs-etm.c
> > @@ -1117,6 +1117,45 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
> >  			   sample->insn_len, (void *)sample->insn);
> >  }
> >  
> > +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
> > +				    struct cs_etm_traceid_queue *tidq)
> > +{
> > +	struct cs_etm_auxtrace *etm = etmq->etm;
> > +	u8 trace_chan_id = tidq->trace_chan_id;
> > +	int insn_len;
> > +	u64 from_ip, to_ip;
> > +
> > +	if (etm->synth_opts.thread_stack) {
> > +		from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
> > +		to_ip = cs_etm__first_executed_instr(tidq->packet);
> > +
> > +		insn_len = cs_etm__instr_size(etmq, trace_chan_id,
> > +					      tidq->prev_packet->isa, from_ip);
> > +
> > +		/*
> > +		 * Create thread stacks by keeping track of calls and returns;
> > +		 * any call pushes thread stack, return pops the stack, and
> > +		 * flush stack when the trace is discontinuous.
> > +		 */
> > +		thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
> > +				    tidq->prev_packet->flags,
> > +				    from_ip, to_ip, insn_len,
> > +				    etmq->buffer->buffer_nr);
> 
> Details are a little fuzzy in my head but I'm pretty sure
> we want trace_chan_id here.  

Thanks a lot for reviewing!

This is good point.  After a quick look for the code, seems
TO_CS_QUEUE_NR() is the right thing to use at here for buffer
number.

Will look more closely to the code and apply changes in next
version.

Thanks,
Leo Yan

> > +	} else {
> > +		/*
> > +		 * The thread stack can be output via thread_stack__process();
> > +		 * thus the detailed information about paired calls and returns
> > +		 * will be facilitated by Python script for the db-export.
> > +		 *
> > +		 * Need to set trace buffer number and flush thread stack if the
> > +		 * trace buffer number has been alternate.
> > +		 */
> > +		thread_stack__set_trace_nr(tidq->thread,
> > +					   tidq->prev_packet->cpu,
> > +					   etmq->buffer->buffer_nr);
> 
> Same here.
> 
> > +	}
> > +}
> > +
> >  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
> >  					    struct cs_etm_traceid_queue *tidq,
> >  					    u64 addr, u64 period)
> > @@ -1393,6 +1432,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
> >  		tidq->period_instructions = instrs_over;
> >  	}
> >  
> > +	if (tidq->prev_packet->last_instr_taken_branch)
> > +		cs_etm__add_stack_event(etmq, tidq);
> > +
> >  	if (etm->sample_branches) {
> >  		bool generate_sample = false;
> >  
> > @@ -2593,6 +2635,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
> >  		itrace_synth_opts__set_default(&etm->synth_opts,
> >  				session->itrace_synth_opts->default_no_sample);
> >  		etm->synth_opts.callchain = false;
> > +		etm->synth_opts.thread_stack =
> > +				session->itrace_synth_opts->thread_stack;
> >  	}
> >  
> >  	err = cs_etm__synth_events(etm, session);
> > -- 
> > 2.17.1
> >
Leo Yan Oct. 22, 2019, 5:03 a.m. UTC | #3
Hi Mathieu,

On Fri, Oct 11, 2019 at 11:53:53AM -0600, Mathieu Poirier wrote:
> On Sat, Oct 05, 2019 at 05:16:11PM +0800, Leo Yan wrote:
> > Since Arm CoreSight doesn't support thread stack, the decoding cannot
> > display symbols with indented spaces to reflect the stack depth.
> > 
> > This patch adds support thread stack for Arm CoreSight, this allows
> > 'perf script' to display properly for option '-F,+callindent'.
> > 
> > Before:
> > 
> >   # perf script -F,+callindent
> >             main  2808          1          branches: coresight_test1                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> >             main  2808          1          branches: printf@plt                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> >             main  2808          1          branches: printf@plt                           aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> >             main  2808          1          branches: _init                                aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> >             main  2808          1          branches: _dl_fixup                            ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.so)
> >             main  2808          1          branches: _dl_lookup_symbol_x                  ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> >   [...]
> > 
> > After:
> > 
> >   # perf script -F,+callindent
> >             main  2808          1          branches:                 coresight_test1                                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> >             main  2808          1          branches:                 printf@plt                                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> >             main  2808          1          branches:                     printf@plt                                       aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> >             main  2808          1          branches:                     _init                                            aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> >             main  2808          1          branches:                     _dl_fixup                                        ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.s
> >             main  2808          1          branches:                         _dl_lookup_symbol_x                          ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> >   [...]
> > 
> > Signed-off-by: Leo Yan <leo.yan@linaro.org>
> > ---
> >  tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 44 insertions(+)
> > 
> > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > index 58ceba7b91d5..780abbfd1833 100644
> > --- a/tools/perf/util/cs-etm.c
> > +++ b/tools/perf/util/cs-etm.c
> > @@ -1117,6 +1117,45 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
> >  			   sample->insn_len, (void *)sample->insn);
> >  }
> >  
> > +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
> > +				    struct cs_etm_traceid_queue *tidq)
> > +{
> > +	struct cs_etm_auxtrace *etm = etmq->etm;
> > +	u8 trace_chan_id = tidq->trace_chan_id;
> > +	int insn_len;
> > +	u64 from_ip, to_ip;
> > +
> > +	if (etm->synth_opts.thread_stack) {
> > +		from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
> > +		to_ip = cs_etm__first_executed_instr(tidq->packet);
> > +
> > +		insn_len = cs_etm__instr_size(etmq, trace_chan_id,
> > +					      tidq->prev_packet->isa, from_ip);
> > +
> > +		/*
> > +		 * Create thread stacks by keeping track of calls and returns;
> > +		 * any call pushes thread stack, return pops the stack, and
> > +		 * flush stack when the trace is discontinuous.
> > +		 */
> > +		thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
> > +				    tidq->prev_packet->flags,
> > +				    from_ip, to_ip, insn_len,
> > +				    etmq->buffer->buffer_nr);
> 
> Details are a little fuzzy in my head but I'm pretty sure
> we want trace_chan_id here.  

I spent some time to look into this question, and I think we don't
need to add extra info for trace_chan_id.

The main reason is for CPU wide tracing, if one task is migrated from
CPU_a to CPU_b, if we append 'trace_chan_id' for the buffer number, then
it will tell the thread_stack that the buffer has been changed (or it
will be considered the trace is discontinuous), then thread stack will
be flushed.  Actually, this is not what we want; if a task is migrated
from one CPU to another, we still need to keep its thread stack if the
trace data comes from the same buffer_nr.

To be honest, I struggled to understand what's the purpose for
'buffer->buffer_nr', from the code, I think 'buffer->buffer_nr' is
mainly used to trace the splitted buffers (e.g. the buffers are splitted
into different queues so the trace data coming from different trace
chunk?).  Now I observe 'buffer->buffer_nr' is always zero since the
buffer is not used with splitted mode.  If later we support 1:1 map
between tracers and sinks, then we need to set 'buffer->buffer_nr' so
can reflect the correct buffer mapping, but we don't need to use
trace_chan_id as extra info at here.

Please let me know what you think about this?  If you agree with this,
I will send out patch v4 soon with addressing other comments.

Thanks,
Leo Yan

> > +	} else {
> > +		/*
> > +		 * The thread stack can be output via thread_stack__process();
> > +		 * thus the detailed information about paired calls and returns
> > +		 * will be facilitated by Python script for the db-export.
> > +		 *
> > +		 * Need to set trace buffer number and flush thread stack if the
> > +		 * trace buffer number has been alternate.
> > +		 */
> > +		thread_stack__set_trace_nr(tidq->thread,
> > +					   tidq->prev_packet->cpu,
> > +					   etmq->buffer->buffer_nr);
> 
> Same here.
> 
> > +	}
> > +}
> > +
> >  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
> >  					    struct cs_etm_traceid_queue *tidq,
> >  					    u64 addr, u64 period)
> > @@ -1393,6 +1432,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
> >  		tidq->period_instructions = instrs_over;
> >  	}
> >  
> > +	if (tidq->prev_packet->last_instr_taken_branch)
> > +		cs_etm__add_stack_event(etmq, tidq);
> > +
> >  	if (etm->sample_branches) {
> >  		bool generate_sample = false;
> >  
> > @@ -2593,6 +2635,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
> >  		itrace_synth_opts__set_default(&etm->synth_opts,
> >  				session->itrace_synth_opts->default_no_sample);
> >  		etm->synth_opts.callchain = false;
> > +		etm->synth_opts.thread_stack =
> > +				session->itrace_synth_opts->thread_stack;
> >  	}
> >  
> >  	err = cs_etm__synth_events(etm, session);
> > -- 
> > 2.17.1
> >
Mathieu Poirier Oct. 28, 2019, 10:43 p.m. UTC | #4
On Mon, 21 Oct 2019 at 23:03, Leo Yan <leo.yan@linaro.org> wrote:
>
> Hi Mathieu,
>
> On Fri, Oct 11, 2019 at 11:53:53AM -0600, Mathieu Poirier wrote:
> > On Sat, Oct 05, 2019 at 05:16:11PM +0800, Leo Yan wrote:
> > > Since Arm CoreSight doesn't support thread stack, the decoding cannot
> > > display symbols with indented spaces to reflect the stack depth.
> > >
> > > This patch adds support thread stack for Arm CoreSight, this allows
> > > 'perf script' to display properly for option '-F,+callindent'.
> > >
> > > Before:
> > >
> > >   # perf script -F,+callindent
> > >             main  2808          1          branches: coresight_test1                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> > >             main  2808          1          branches: printf@plt                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> > >             main  2808          1          branches: printf@plt                           aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> > >             main  2808          1          branches: _init                                aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> > >             main  2808          1          branches: _dl_fixup                            ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.so)
> > >             main  2808          1          branches: _dl_lookup_symbol_x                  ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> > >   [...]
> > >
> > > After:
> > >
> > >   # perf script -F,+callindent
> > >             main  2808          1          branches:                 coresight_test1                                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> > >             main  2808          1          branches:                 printf@plt                                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> > >             main  2808          1          branches:                     printf@plt                                       aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> > >             main  2808          1          branches:                     _init                                            aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> > >             main  2808          1          branches:                     _dl_fixup                                        ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.s
> > >             main  2808          1          branches:                         _dl_lookup_symbol_x                          ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> > >   [...]
> > >
> > > Signed-off-by: Leo Yan <leo.yan@linaro.org>
> > > ---
> > >  tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 44 insertions(+)
> > >
> > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > > index 58ceba7b91d5..780abbfd1833 100644
> > > --- a/tools/perf/util/cs-etm.c
> > > +++ b/tools/perf/util/cs-etm.c
> > > @@ -1117,6 +1117,45 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
> > >                        sample->insn_len, (void *)sample->insn);
> > >  }
> > >
> > > +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
> > > +                               struct cs_etm_traceid_queue *tidq)
> > > +{
> > > +   struct cs_etm_auxtrace *etm = etmq->etm;
> > > +   u8 trace_chan_id = tidq->trace_chan_id;
> > > +   int insn_len;
> > > +   u64 from_ip, to_ip;
> > > +
> > > +   if (etm->synth_opts.thread_stack) {
> > > +           from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
> > > +           to_ip = cs_etm__first_executed_instr(tidq->packet);
> > > +
> > > +           insn_len = cs_etm__instr_size(etmq, trace_chan_id,
> > > +                                         tidq->prev_packet->isa, from_ip);
> > > +
> > > +           /*
> > > +            * Create thread stacks by keeping track of calls and returns;
> > > +            * any call pushes thread stack, return pops the stack, and
> > > +            * flush stack when the trace is discontinuous.
> > > +            */
> > > +           thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
> > > +                               tidq->prev_packet->flags,
> > > +                               from_ip, to_ip, insn_len,
> > > +                               etmq->buffer->buffer_nr);
> >
> > Details are a little fuzzy in my head but I'm pretty sure
> > we want trace_chan_id here.
>
> I spent some time to look into this question, and I think we don't
> need to add extra info for trace_chan_id.
>
> The main reason is for CPU wide tracing, if one task is migrated from
> CPU_a to CPU_b, if we append 'trace_chan_id' for the buffer number, then
> it will tell the thread_stack that the buffer has been changed (or it
> will be considered the trace is discontinuous), then thread stack will
> be flushed.  Actually, this is not what we want; if a task is migrated
> from one CPU to another, we still need to keep its thread stack if the
> trace data comes from the same buffer_nr.

After reviewing the code I conclude that using etmq->buffer->buffer_nr
is the correct way to proceed.

That being said you have sent this new set [1], which is a rework of
some of the code you have in the current set.  As such the only way
forward is for you to wait until [1] I has been applied and rebase the
remaining work in this set on top of it.

Let me know if you have questions.

Thanks,
Mathieu

[1]. https://patchwork.kernel.org/cover/11130213/

>
> To be honest, I struggled to understand what's the purpose for
> 'buffer->buffer_nr', from the code, I think 'buffer->buffer_nr' is
> mainly used to trace the splitted buffers (e.g. the buffers are splitted
> into different queues so the trace data coming from different trace
> chunk?).  Now I observe 'buffer->buffer_nr' is always zero since the
> buffer is not used with splitted mode.  If later we support 1:1 map
> between tracers and sinks, then we need to set 'buffer->buffer_nr' so
> can reflect the correct buffer mapping, but we don't need to use
> trace_chan_id as extra info at here.
>
> Please let me know what you think about this?  If you agree with this,
> I will send out patch v4 soon with addressing other comments.
>
> Thanks,
> Leo Yan
>
> > > +   } else {
> > > +           /*
> > > +            * The thread stack can be output via thread_stack__process();
> > > +            * thus the detailed information about paired calls and returns
> > > +            * will be facilitated by Python script for the db-export.
> > > +            *
> > > +            * Need to set trace buffer number and flush thread stack if the
> > > +            * trace buffer number has been alternate.
> > > +            */
> > > +           thread_stack__set_trace_nr(tidq->thread,
> > > +                                      tidq->prev_packet->cpu,
> > > +                                      etmq->buffer->buffer_nr);
> >
> > Same here.
> >
> > > +   }
> > > +}
> > > +
> > >  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
> > >                                         struct cs_etm_traceid_queue *tidq,
> > >                                         u64 addr, u64 period)
> > > @@ -1393,6 +1432,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
> > >             tidq->period_instructions = instrs_over;
> > >     }
> > >
> > > +   if (tidq->prev_packet->last_instr_taken_branch)
> > > +           cs_etm__add_stack_event(etmq, tidq);
> > > +
> > >     if (etm->sample_branches) {
> > >             bool generate_sample = false;
> > >
> > > @@ -2593,6 +2635,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
> > >             itrace_synth_opts__set_default(&etm->synth_opts,
> > >                             session->itrace_synth_opts->default_no_sample);
> > >             etm->synth_opts.callchain = false;
> > > +           etm->synth_opts.thread_stack =
> > > +                           session->itrace_synth_opts->thread_stack;
> > >     }
> > >
> > >     err = cs_etm__synth_events(etm, session);
> > > --
> > > 2.17.1
> > >
Leo Yan Oct. 29, 2019, 4:11 a.m. UTC | #5
Hi Mathieu,

On Mon, Oct 28, 2019 at 04:43:57PM -0600, Mathieu Poirier wrote:
> On Mon, 21 Oct 2019 at 23:03, Leo Yan <leo.yan@linaro.org> wrote:
> >
> > Hi Mathieu,
> >
> > On Fri, Oct 11, 2019 at 11:53:53AM -0600, Mathieu Poirier wrote:
> > > On Sat, Oct 05, 2019 at 05:16:11PM +0800, Leo Yan wrote:
> > > > Since Arm CoreSight doesn't support thread stack, the decoding cannot
> > > > display symbols with indented spaces to reflect the stack depth.
> > > >
> > > > This patch adds support thread stack for Arm CoreSight, this allows
> > > > 'perf script' to display properly for option '-F,+callindent'.
> > > >
> > > > Before:
> > > >
> > > >   # perf script -F,+callindent
> > > >             main  2808          1          branches: coresight_test1                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> > > >             main  2808          1          branches: printf@plt                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> > > >             main  2808          1          branches: printf@plt                           aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> > > >             main  2808          1          branches: _init                                aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> > > >             main  2808          1          branches: _dl_fixup                            ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.so)
> > > >             main  2808          1          branches: _dl_lookup_symbol_x                  ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> > > >   [...]
> > > >
> > > > After:
> > > >
> > > >   # perf script -F,+callindent
> > > >             main  2808          1          branches:                 coresight_test1                                      ffff8634f5c8 coresight_test1+0x3c (/root/coresight_test/libcstest.so)
> > > >             main  2808          1          branches:                 printf@plt                                           aaaaba8d37ec main+0x28 (/root/coresight_test/main)
> > > >             main  2808          1          branches:                     printf@plt                                       aaaaba8d36bc printf@plt+0xc (/root/coresight_test/main)
> > > >             main  2808          1          branches:                     _init                                            aaaaba8d3650 _init+0x30 (/root/coresight_test/main)
> > > >             main  2808          1          branches:                     _dl_fixup                                        ffff86373b4c _dl_runtime_resolve+0x40 (/lib/aarch64-linux-gnu/ld-2.28.s
> > > >             main  2808          1          branches:                         _dl_lookup_symbol_x                          ffff8636e078 _dl_fixup+0xb8 (/lib/aarch64-linux-gnu/ld-2.28.so)
> > > >   [...]
> > > >
> > > > Signed-off-by: Leo Yan <leo.yan@linaro.org>
> > > > ---
> > > >  tools/perf/util/cs-etm.c | 44 ++++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 44 insertions(+)
> > > >
> > > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > > > index 58ceba7b91d5..780abbfd1833 100644
> > > > --- a/tools/perf/util/cs-etm.c
> > > > +++ b/tools/perf/util/cs-etm.c
> > > > @@ -1117,6 +1117,45 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
> > > >                        sample->insn_len, (void *)sample->insn);
> > > >  }
> > > >
> > > > +static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
> > > > +                               struct cs_etm_traceid_queue *tidq)
> > > > +{
> > > > +   struct cs_etm_auxtrace *etm = etmq->etm;
> > > > +   u8 trace_chan_id = tidq->trace_chan_id;
> > > > +   int insn_len;
> > > > +   u64 from_ip, to_ip;
> > > > +
> > > > +   if (etm->synth_opts.thread_stack) {
> > > > +           from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
> > > > +           to_ip = cs_etm__first_executed_instr(tidq->packet);
> > > > +
> > > > +           insn_len = cs_etm__instr_size(etmq, trace_chan_id,
> > > > +                                         tidq->prev_packet->isa, from_ip);
> > > > +
> > > > +           /*
> > > > +            * Create thread stacks by keeping track of calls and returns;
> > > > +            * any call pushes thread stack, return pops the stack, and
> > > > +            * flush stack when the trace is discontinuous.
> > > > +            */
> > > > +           thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
> > > > +                               tidq->prev_packet->flags,
> > > > +                               from_ip, to_ip, insn_len,
> > > > +                               etmq->buffer->buffer_nr);
> > >
> > > Details are a little fuzzy in my head but I'm pretty sure
> > > we want trace_chan_id here.
> >
> > I spent some time to look into this question, and I think we don't
> > need to add extra info for trace_chan_id.
> >
> > The main reason is for CPU wide tracing, if one task is migrated from
> > CPU_a to CPU_b, if we append 'trace_chan_id' for the buffer number, then
> > it will tell the thread_stack that the buffer has been changed (or it
> > will be considered the trace is discontinuous), then thread stack will
> > be flushed.  Actually, this is not what we want; if a task is migrated
> > from one CPU to another, we still need to keep its thread stack if the
> > trace data comes from the same buffer_nr.
> 
> After reviewing the code I conclude that using etmq->buffer->buffer_nr
> is the correct way to proceed.

Thanks for reviewing and confirmation.

> That being said you have sent this new set [1], which is a rework of
> some of the code you have in the current set.  As such the only way
> forward is for you to wait until [1] I has been applied and rebase the
> remaining work in this set on top of it.

Right.

Seems the shared link is incorrect :)  Let's firstly focus on the patch
set: 'perf cs-etm: Fix synthesizing instruction samples' [2] and after
it is merged I will send new patch set for cs-etm callchain support as
soon as possible.

Thanks,
Leo Yan

[2] https://patchwork.kernel.org/cover/11209991/

> Let me know if you have questions.
> 
> Thanks,
> Mathieu
> 
> [1]. https://patchwork.kernel.org/cover/11130213/
> 
> >
> > To be honest, I struggled to understand what's the purpose for
> > 'buffer->buffer_nr', from the code, I think 'buffer->buffer_nr' is
> > mainly used to trace the splitted buffers (e.g. the buffers are splitted
> > into different queues so the trace data coming from different trace
> > chunk?).  Now I observe 'buffer->buffer_nr' is always zero since the
> > buffer is not used with splitted mode.  If later we support 1:1 map
> > between tracers and sinks, then we need to set 'buffer->buffer_nr' so
> > can reflect the correct buffer mapping, but we don't need to use
> > trace_chan_id as extra info at here.
> >
> > Please let me know what you think about this?  If you agree with this,
> > I will send out patch v4 soon with addressing other comments.
> >
> > Thanks,
> > Leo Yan
> >
> > > > +   } else {
> > > > +           /*
> > > > +            * The thread stack can be output via thread_stack__process();
> > > > +            * thus the detailed information about paired calls and returns
> > > > +            * will be facilitated by Python script for the db-export.
> > > > +            *
> > > > +            * Need to set trace buffer number and flush thread stack if the
> > > > +            * trace buffer number has been alternate.
> > > > +            */
> > > > +           thread_stack__set_trace_nr(tidq->thread,
> > > > +                                      tidq->prev_packet->cpu,
> > > > +                                      etmq->buffer->buffer_nr);
> > >
> > > Same here.
> > >
> > > > +   }
> > > > +}
> > > > +
> > > >  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
> > > >                                         struct cs_etm_traceid_queue *tidq,
> > > >                                         u64 addr, u64 period)
> > > > @@ -1393,6 +1432,9 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
> > > >             tidq->period_instructions = instrs_over;
> > > >     }
> > > >
> > > > +   if (tidq->prev_packet->last_instr_taken_branch)
> > > > +           cs_etm__add_stack_event(etmq, tidq);
> > > > +
> > > >     if (etm->sample_branches) {
> > > >             bool generate_sample = false;
> > > >
> > > > @@ -2593,6 +2635,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
> > > >             itrace_synth_opts__set_default(&etm->synth_opts,
> > > >                             session->itrace_synth_opts->default_no_sample);
> > > >             etm->synth_opts.callchain = false;
> > > > +           etm->synth_opts.thread_stack =
> > > > +                           session->itrace_synth_opts->thread_stack;
> > > >     }
> > > >
> > > >     err = cs_etm__synth_events(etm, session);
> > > > --
> > > > 2.17.1
> > > >

Patch
diff mbox series

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 58ceba7b91d5..780abbfd1833 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1117,6 +1117,45 @@  static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
 			   sample->insn_len, (void *)sample->insn);
 }
 
+static void cs_etm__add_stack_event(struct cs_etm_queue *etmq,
+				    struct cs_etm_traceid_queue *tidq)
+{
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	u8 trace_chan_id = tidq->trace_chan_id;
+	int insn_len;
+	u64 from_ip, to_ip;
+
+	if (etm->synth_opts.thread_stack) {
+		from_ip = cs_etm__last_executed_instr(tidq->prev_packet);
+		to_ip = cs_etm__first_executed_instr(tidq->packet);
+
+		insn_len = cs_etm__instr_size(etmq, trace_chan_id,
+					      tidq->prev_packet->isa, from_ip);
+
+		/*
+		 * Create thread stacks by keeping track of calls and returns;
+		 * any call pushes thread stack, return pops the stack, and
+		 * flush stack when the trace is discontinuous.
+		 */
+		thread_stack__event(tidq->thread, tidq->prev_packet->cpu,
+				    tidq->prev_packet->flags,
+				    from_ip, to_ip, insn_len,
+				    etmq->buffer->buffer_nr);
+	} else {
+		/*
+		 * The thread stack can be output via thread_stack__process();
+		 * thus the detailed information about paired calls and returns
+		 * will be facilitated by Python script for the db-export.
+		 *
+		 * Need to set trace buffer number and flush thread stack if the
+		 * trace buffer number has been alternate.
+		 */
+		thread_stack__set_trace_nr(tidq->thread,
+					   tidq->prev_packet->cpu,
+					   etmq->buffer->buffer_nr);
+	}
+}
+
 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 					    struct cs_etm_traceid_queue *tidq,
 					    u64 addr, u64 period)
@@ -1393,6 +1432,9 @@  static int cs_etm__sample(struct cs_etm_queue *etmq,
 		tidq->period_instructions = instrs_over;
 	}
 
+	if (tidq->prev_packet->last_instr_taken_branch)
+		cs_etm__add_stack_event(etmq, tidq);
+
 	if (etm->sample_branches) {
 		bool generate_sample = false;
 
@@ -2593,6 +2635,8 @@  int cs_etm__process_auxtrace_info(union perf_event *event,
 		itrace_synth_opts__set_default(&etm->synth_opts,
 				session->itrace_synth_opts->default_no_sample);
 		etm->synth_opts.callchain = false;
+		etm->synth_opts.thread_stack =
+				session->itrace_synth_opts->thread_stack;
 	}
 
 	err = cs_etm__synth_events(etm, session);