diff mbox series

[2/4] perf cs-etm: Use previous thread for branch sample source IP

Message ID 20230524131958.2139331-3-james.clark@arm.com (mailing list archive)
State New, archived
Headers show
Series perf cs-etm: Track exception level | expand

Commit Message

James Clark May 24, 2023, 1:19 p.m. UTC
Branch samples currently use the IP of the previous packet as the from
IP, and the IP of the current packet as the to IP. But it incorrectly
uses the current thread. In some cases like a jump into a different
exception level this will attribute to the incorrect process.

Fix it by tracking the previous thread in the same way the previous
packet is tracked.

Signed-off-by: James Clark <james.clark@arm.com>
---
 tools/perf/util/cs-etm.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

Comments

Mike Leach May 25, 2023, 11:01 a.m. UTC | #1
On Wed, 24 May 2023 at 14:20, James Clark <james.clark@arm.com> wrote:
>
> Branch samples currently use the IP of the previous packet as the from
> IP, and the IP of the current packet as the to IP. But it incorrectly
> uses the current thread. In some cases like a jump into a different
> exception level this will attribute to the incorrect process.
>
> Fix it by tracking the previous thread in the same way the previous
> packet is tracked.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> ---
>  tools/perf/util/cs-etm.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index ebffc9052561..a997fe79d458 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -86,6 +86,7 @@ struct cs_etm_traceid_queue {
>         size_t last_branch_pos;
>         union perf_event *event_buf;
>         struct thread *thread;
> +       struct thread *prev_thread;
>         struct branch_stack *last_branch;
>         struct branch_stack *last_branch_rb;
>         struct cs_etm_packet *prev_packet;
> @@ -480,6 +481,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
>         tidq->trace_chan_id = trace_chan_id;
>         tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
>                                                queue->tid);
> +       tidq->prev_thread = machine__idle_thread(&etm->session->machines.host);
>
>         tidq->packet = zalloc(sizeof(struct cs_etm_packet));
>         if (!tidq->packet)
> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
>                 tmp = tidq->packet;
>                 tidq->packet = tidq->prev_packet;
>                 tidq->prev_packet = tmp;
> +               thread__put(tidq->prev_thread);
> +               tidq->prev_thread = thread__get(tidq->thread);
>         }
>  }
>
> @@ -791,6 +795,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>                 /* Free this traceid_queue from the array */
>                 tidq = etmq->traceid_queues[idx];
>                 thread__zput(tidq->thread);
> +               thread__zput(tidq->prev_thread);
>                 zfree(&tidq->event_buf);
>                 zfree(&tidq->last_branch);
>                 zfree(&tidq->last_branch_rb);
> @@ -1450,8 +1455,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>         sample.time = cs_etm__resolve_sample_time(etmq, tidq);
>
>         sample.ip = ip;
> -       sample.pid = tidq->thread->pid_;
> -       sample.tid = tidq->thread->tid;
> +       sample.pid = tidq->prev_thread->pid_;
> +       sample.tid = tidq->prev_thread->tid;
>         sample.addr = cs_etm__first_executed_instr(tidq->packet);
>         sample.id = etmq->etm->branches_id;
>         sample.stream_id = etmq->etm->branches_id;
> --
> 2.34.1
>

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Leo Yan May 27, 2023, 9:06 a.m. UTC | #2
On Wed, May 24, 2023 at 02:19:56PM +0100, James Clark wrote:
> Branch samples currently use the IP of the previous packet as the from
> IP, and the IP of the current packet as the to IP. But it incorrectly
> uses the current thread. In some cases like a jump into a different
> exception level this will attribute to the incorrect process.

It's about the timing that branch has taken or not taken :)

If we think the branch sample as 'branch has taken', then current code
is doning right thing, otherwise, we need this fix.

> Fix it by tracking the previous thread in the same way the previous
> packet is tracked.
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> ---
>  tools/perf/util/cs-etm.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index ebffc9052561..a997fe79d458 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -86,6 +86,7 @@ struct cs_etm_traceid_queue {
>  	size_t last_branch_pos;
>  	union perf_event *event_buf;
>  	struct thread *thread;
> +	struct thread *prev_thread;
>  	struct branch_stack *last_branch;
>  	struct branch_stack *last_branch_rb;
>  	struct cs_etm_packet *prev_packet;
> @@ -480,6 +481,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
>  	tidq->trace_chan_id = trace_chan_id;
>  	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
>  					       queue->tid);
> +	tidq->prev_thread = machine__idle_thread(&etm->session->machines.host);
>  
>  	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
>  	if (!tidq->packet)
> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
>  		tmp = tidq->packet;
>  		tidq->packet = tidq->prev_packet;
>  		tidq->prev_packet = tmp;
> +		thread__put(tidq->prev_thread);
> +		tidq->prev_thread = thread__get(tidq->thread);

Maybe cs_etm__packet_swap() is not the best place to update
"tidq->prev_thread", since swapping packet doesn't mean it's necessarily
thread switching; can we move this change into the cs_etm__set_thread()?

Thanks,
Leo

>  	}
>  }
>  
> @@ -791,6 +795,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>  		/* Free this traceid_queue from the array */
>  		tidq = etmq->traceid_queues[idx];
>  		thread__zput(tidq->thread);
> +		thread__zput(tidq->prev_thread);
>  		zfree(&tidq->event_buf);
>  		zfree(&tidq->last_branch);
>  		zfree(&tidq->last_branch_rb);
> @@ -1450,8 +1455,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>  	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
>  
>  	sample.ip = ip;
> -	sample.pid = tidq->thread->pid_;
> -	sample.tid = tidq->thread->tid;
> +	sample.pid = tidq->prev_thread->pid_;
> +	sample.tid = tidq->prev_thread->tid;
>  	sample.addr = cs_etm__first_executed_instr(tidq->packet);
>  	sample.id = etmq->etm->branches_id;
>  	sample.stream_id = etmq->etm->branches_id;
> -- 
> 2.34.1
>
James Clark May 30, 2023, 2:28 p.m. UTC | #3
On 27/05/2023 10:06, Leo Yan wrote:
> On Wed, May 24, 2023 at 02:19:56PM +0100, James Clark wrote:
>> Branch samples currently use the IP of the previous packet as the from
>> IP, and the IP of the current packet as the to IP. But it incorrectly
>> uses the current thread. In some cases like a jump into a different
>> exception level this will attribute to the incorrect process.
> 
> It's about the timing that branch has taken or not taken :)
> 
> If we think the branch sample as 'branch has taken', then current code
> is doning right thing, otherwise, we need this fix.
> 

If you diff the outputs side by side you can see it mainly has an effect
where there is a discontinuity. At this point we set either the from or
the to IPs to 0.

For example here is a before and after perf script output. Without the
change it looks like stress was running before it actually was. The
schedule function that was attributed to ls on the first line hasn't
finished running yet. But it's attributed to stress on the second line
even though the destination IP is 0 meaning we don't even know where it
went.

Before:

    ls  8350 [006] ... __schedule+0x394 => schedule+0x5c
stress  8357 [006] ... schedule+0x84 => 0 [unknown]
stress  8357 [006] ... 0 [unknown] => __unix_dgram_recvmsg+0x130

After:

    ls  8350 [006] ... __schedule+0x394 => schedule+0x5c
    ls  8357 [006] ... schedule+0x84 => 0 [unknown]
stress  8357 [006] ... 0 [unknown] => __unix_dgram_recvmsg+0x130

I didn't see any decode differences that weren't around these
discontinuity points, so it seems like a low risk change.

>> Fix it by tracking the previous thread in the same way the previous
>> packet is tracked.
>>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> ---
>>  tools/perf/util/cs-etm.c | 9 +++++++--
>>  1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>> index ebffc9052561..a997fe79d458 100644
>> --- a/tools/perf/util/cs-etm.c
>> +++ b/tools/perf/util/cs-etm.c
>> @@ -86,6 +86,7 @@ struct cs_etm_traceid_queue {
>>  	size_t last_branch_pos;
>>  	union perf_event *event_buf;
>>  	struct thread *thread;
>> +	struct thread *prev_thread;
>>  	struct branch_stack *last_branch;
>>  	struct branch_stack *last_branch_rb;
>>  	struct cs_etm_packet *prev_packet;
>> @@ -480,6 +481,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
>>  	tidq->trace_chan_id = trace_chan_id;
>>  	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
>>  					       queue->tid);
>> +	tidq->prev_thread = machine__idle_thread(&etm->session->machines.host);
>>  
>>  	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
>>  	if (!tidq->packet)
>> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
>>  		tmp = tidq->packet;
>>  		tidq->packet = tidq->prev_packet;
>>  		tidq->prev_packet = tmp;
>> +		thread__put(tidq->prev_thread);
>> +		tidq->prev_thread = thread__get(tidq->thread);
> 
> Maybe cs_etm__packet_swap() is not the best place to update
> "tidq->prev_thread", since swapping packet doesn't mean it's necessarily
> thread switching; can we move this change into the cs_etm__set_thread()?
> 

Yeah that might make more sense. I can move it there if we decide to
keep this change.

> Thanks,
> Leo
> 
>>  	}
>>  }
>>  
>> @@ -791,6 +795,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>>  		/* Free this traceid_queue from the array */
>>  		tidq = etmq->traceid_queues[idx];
>>  		thread__zput(tidq->thread);
>> +		thread__zput(tidq->prev_thread);
>>  		zfree(&tidq->event_buf);
>>  		zfree(&tidq->last_branch);
>>  		zfree(&tidq->last_branch_rb);
>> @@ -1450,8 +1455,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>>  	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
>>  
>>  	sample.ip = ip;
>> -	sample.pid = tidq->thread->pid_;
>> -	sample.tid = tidq->thread->tid;
>> +	sample.pid = tidq->prev_thread->pid_;
>> +	sample.tid = tidq->prev_thread->tid;
>>  	sample.addr = cs_etm__first_executed_instr(tidq->packet);
>>  	sample.id = etmq->etm->branches_id;
>>  	sample.stream_id = etmq->etm->branches_id;
>> -- 
>> 2.34.1
>>
Leo Yan June 6, 2023, 12:44 a.m. UTC | #4
On Tue, May 30, 2023 at 03:28:09PM +0100, James Clark wrote:

[...]

> > On Wed, May 24, 2023 at 02:19:56PM +0100, James Clark wrote:
> >> Branch samples currently use the IP of the previous packet as the from
> >> IP, and the IP of the current packet as the to IP. But it incorrectly
> >> uses the current thread. In some cases like a jump into a different
> >> exception level this will attribute to the incorrect process.
> > 
> > It's about the timing that branch has taken or not taken :)
> > 
> > If we think the branch sample as 'branch has taken', then current code
> > is doning right thing, otherwise, we need this fix.
> > 
> 
> If you diff the outputs side by side you can see it mainly has an effect
> where there is a discontinuity. At this point we set either the from or
> the to IPs to 0.
> 
> For example here is a before and after perf script output. Without the
> change it looks like stress was running before it actually was. The
> schedule function that was attributed to ls on the first line hasn't
> finished running yet. But it's attributed to stress on the second line
> even though the destination IP is 0 meaning we don't even know where it
> went.

Yeah, this is a good improvement for me.  Thanks for sharing the
detailed comparison result.

> Before:
> 
>     ls  8350 [006] ... __schedule+0x394 => schedule+0x5c
> stress  8357 [006] ... schedule+0x84 => 0 [unknown]
> stress  8357 [006] ... 0 [unknown] => __unix_dgram_recvmsg+0x130
> 
> After:
> 
>     ls  8350 [006] ... __schedule+0x394 => schedule+0x5c
>     ls  8357 [006] ... schedule+0x84 => 0 [unknown]
> stress  8357 [006] ... 0 [unknown] => __unix_dgram_recvmsg+0x130
> 
> I didn't see any decode differences that weren't around these
> discontinuity points, so it seems like a low risk change.

[...]

> >> @@ -480,6 +481,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
> >>  	tidq->trace_chan_id = trace_chan_id;
> >>  	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
> >>  					       queue->tid);
> >> +	tidq->prev_thread = machine__idle_thread(&etm->session->machines.host);
> >>  
> >>  	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
> >>  	if (!tidq->packet)
> >> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
> >>  		tmp = tidq->packet;
> >>  		tidq->packet = tidq->prev_packet;
> >>  		tidq->prev_packet = tmp;
> >> +		thread__put(tidq->prev_thread);
> >> +		tidq->prev_thread = thread__get(tidq->thread);
> > 
> > Maybe cs_etm__packet_swap() is not the best place to update
> > "tidq->prev_thread", since swapping packet doesn't mean it's necessarily
> > thread switching; can we move this change into the cs_etm__set_thread()?
> > 
> 
> Yeah that might make more sense. I can move it there if we decide to
> keep this change.

Please refine the patch for this.  Thanks and sorry my late replying.

Leo
James Clark June 8, 2023, 9:34 a.m. UTC | #5
On 30/05/2023 15:28, James Clark wrote:
> 
> 
> On 27/05/2023 10:06, Leo Yan wrote:
>> On Wed, May 24, 2023 at 02:19:56PM +0100, James Clark wrote:
>>> Branch samples currently use the IP of the previous packet as the from
>>> IP, and the IP of the current packet as the to IP. But it incorrectly
>>> uses the current thread. In some cases like a jump into a different
>>> exception level this will attribute to the incorrect process.
>>
>> It's about the timing that branch has taken or not taken :)
>>
>> If we think the branch sample as 'branch has taken', then current code
>> is doning right thing, otherwise, we need this fix.
>>
> 
> If you diff the outputs side by side you can see it mainly has an effect
> where there is a discontinuity. At this point we set either the from or
> the to IPs to 0.
> 
> For example here is a before and after perf script output. Without the
> change it looks like stress was running before it actually was. The
> schedule function that was attributed to ls on the first line hasn't
> finished running yet. But it's attributed to stress on the second line
> even though the destination IP is 0 meaning we don't even know where it
> went.
> 
> Before:
> 
>     ls  8350 [006] ... __schedule+0x394 => schedule+0x5c
> stress  8357 [006] ... schedule+0x84 => 0 [unknown]
> stress  8357 [006] ... 0 [unknown] => __unix_dgram_recvmsg+0x130
> 
> After:
> 
>     ls  8350 [006] ... __schedule+0x394 => schedule+0x5c
>     ls  8357 [006] ... schedule+0x84 => 0 [unknown]
> stress  8357 [006] ... 0 [unknown] => __unix_dgram_recvmsg+0x130
> 
> I didn't see any decode differences that weren't around these
> discontinuity points, so it seems like a low risk change.
> 
>>> Fix it by tracking the previous thread in the same way the previous
>>> packet is tracked.
>>>
>>> Signed-off-by: James Clark <james.clark@arm.com>
>>> ---
>>>  tools/perf/util/cs-etm.c | 9 +++++++--
>>>  1 file changed, 7 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>> index ebffc9052561..a997fe79d458 100644
>>> --- a/tools/perf/util/cs-etm.c
>>> +++ b/tools/perf/util/cs-etm.c
>>> @@ -86,6 +86,7 @@ struct cs_etm_traceid_queue {
>>>  	size_t last_branch_pos;
>>>  	union perf_event *event_buf;
>>>  	struct thread *thread;
>>> +	struct thread *prev_thread;
>>>  	struct branch_stack *last_branch;
>>>  	struct branch_stack *last_branch_rb;
>>>  	struct cs_etm_packet *prev_packet;
>>> @@ -480,6 +481,7 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
>>>  	tidq->trace_chan_id = trace_chan_id;
>>>  	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
>>>  					       queue->tid);
>>> +	tidq->prev_thread = machine__idle_thread(&etm->session->machines.host);
>>>  
>>>  	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
>>>  	if (!tidq->packet)
>>> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
>>>  		tmp = tidq->packet;
>>>  		tidq->packet = tidq->prev_packet;
>>>  		tidq->prev_packet = tmp;
>>> +		thread__put(tidq->prev_thread);
>>> +		tidq->prev_thread = thread__get(tidq->thread);
>>
>> Maybe cs_etm__packet_swap() is not the best place to update
>> "tidq->prev_thread", since swapping packet doesn't mean it's necessarily
>> thread switching; can we move this change into the cs_etm__set_thread()?
>>
> 
> Yeah that might make more sense. I can move it there if we decide to
> keep this change.
> 

Unfortunately I don't think I can make this change. It seems like
putting the previous thread swap in  cs_etm__set_thread() has different
semantics to keeping all the swaps together in cs_etm__packet_swap().

This is because if you swap the thread in cs_etm__packet_swap() the
previous packet and next packet can have the _same_ thread if there
happened to be no change. However if you only swap previous thread in
cs_etm__set_thread(), that means that the previous thread is always
different to the next one. This has a huge difference on the decoding
because two adjacent packets on the same thread will say they branched
from the previous thread that ran, not the previous thread on the
previous packet.

>> Thanks,
>> Leo
>>
>>>  	}
>>>  }
>>>  
>>> @@ -791,6 +795,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>>>  		/* Free this traceid_queue from the array */
>>>  		tidq = etmq->traceid_queues[idx];
>>>  		thread__zput(tidq->thread);
>>> +		thread__zput(tidq->prev_thread);
>>>  		zfree(&tidq->event_buf);
>>>  		zfree(&tidq->last_branch);
>>>  		zfree(&tidq->last_branch_rb);
>>> @@ -1450,8 +1455,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
>>>  	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
>>>  
>>>  	sample.ip = ip;
>>> -	sample.pid = tidq->thread->pid_;
>>> -	sample.tid = tidq->thread->tid;
>>> +	sample.pid = tidq->prev_thread->pid_;
>>> +	sample.tid = tidq->prev_thread->tid;
>>>  	sample.addr = cs_etm__first_executed_instr(tidq->packet);
>>>  	sample.id = etmq->etm->branches_id;
>>>  	sample.stream_id = etmq->etm->branches_id;
>>> -- 
>>> 2.34.1
>>>
Leo Yan June 8, 2023, 10:25 a.m. UTC | #6
On Thu, Jun 08, 2023 at 10:34:42AM +0100, James Clark wrote:

[...]

> >>> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
> >>>  		tmp = tidq->packet;
> >>>  		tidq->packet = tidq->prev_packet;
> >>>  		tidq->prev_packet = tmp;
> >>> +		thread__put(tidq->prev_thread);
> >>> +		tidq->prev_thread = thread__get(tidq->thread);
> >>
> >> Maybe cs_etm__packet_swap() is not the best place to update
> >> "tidq->prev_thread", since swapping packet doesn't mean it's necessarily
> >> thread switching; can we move this change into the cs_etm__set_thread()?
> >>
> > 
> > Yeah that might make more sense. I can move it there if we decide to
> > keep this change.
> > 
> 
> Unfortunately I don't think I can make this change. It seems like
> putting the previous thread swap in  cs_etm__set_thread() has different
> semantics to keeping all the swaps together in cs_etm__packet_swap().

Thanks for trying this.

> This is because if you swap the thread in cs_etm__packet_swap() the
> previous packet and next packet can have the _same_ thread if there
> happened to be no change. However if you only swap previous thread in
> cs_etm__set_thread(), that means that the previous thread is always
> different to the next one. This has a huge difference on the decoding
> because two adjacent packets on the same thread will say they branched
> from the previous thread that ran, not the previous thread on the
> previous packet.

Seems to me, this is a synchronization issue between the field
'tidq->prev_thread' and 'tidq->prev_packet'.

It's still hard for me to understand "two adjacent packets on the same
thread will say they branched from the previous thread that ran", IIUC,
even we move thread swapping into cs_etm__set_thread(), if the two
adjacent packets are in the same thread context, we can skip to update
fields 'tidq->prev_thread' and 'tidq->prev_packet'.

So I am curious if below cs_etm__set_thread() works or not?

static void cs_etm__set_thread(struct cs_etm_auxtrace *etm,
			       struct cs_etm_traceid_queue *tidq, pid_t tid)
{
	struct machine *machine = &etm->session->machines.host;

	/* No context switching, bail out */
	if ((tidq->thread->tid != tid)
		return;

	/* If tid is -1, we simply use idle thread context */
	if (tid == -1)
		goto find_idle_thread;

	/*
	 * The new incoming tid is different from current thread,
	 * so it's to switch to the next thread context.
	 */

	/* Swap thread contexts */
	thread__put(tidq->prev_thread);
	tidq->prev_thread = thread__get(tidq->thread);

	/* Find thread context for new tid */
	thread__zput(tidq->thread);
	tidq->thread = machine__find_thread(machine, -1, tid);

find_idle_thread:
	/* Couldn't find a known thread */
	if (!tidq->thread)
		tidq->thread = machine__idle_thread(machine);
}

Thanks,
Leo
Leo Yan June 8, 2023, 10:32 a.m. UTC | #7
On Thu, Jun 08, 2023 at 06:25:55PM +0800, Leo Yan wrote:

[...]


> Seems to me, this is a synchronization issue between the field
> 'tidq->prev_thread' and 'tidq->prev_packet'.
> 
> It's still hard for me to understand "two adjacent packets on the same
> thread will say they branched from the previous thread that ran", IIUC,
> even we move thread swapping into cs_etm__set_thread(), if the two
> adjacent packets are in the same thread context, we can skip to update
> fields 'tidq->prev_thread' and 'tidq->prev_packet'.

Sorry for typo, here should be:

... skip to update fields 'tidq->prev_thread' and 'tidq->thread'.

> So I am curious if below cs_etm__set_thread() works or not?
> 
> static void cs_etm__set_thread(struct cs_etm_auxtrace *etm,
> 			       struct cs_etm_traceid_queue *tidq, pid_t tid)
> {
> 	struct machine *machine = &etm->session->machines.host;
> 
> 	/* No context switching, bail out */
> 	if ((tidq->thread->tid != tid)
> 		return;
> 
> 	/* If tid is -1, we simply use idle thread context */
> 	if (tid == -1)
> 		goto find_idle_thread;
> 
> 	/*
> 	 * The new incoming tid is different from current thread,
> 	 * so it's to switch to the next thread context.
> 	 */
> 
> 	/* Swap thread contexts */
> 	thread__put(tidq->prev_thread);
> 	tidq->prev_thread = thread__get(tidq->thread);
> 
> 	/* Find thread context for new tid */
> 	thread__zput(tidq->thread);
> 	tidq->thread = machine__find_thread(machine, -1, tid);
> 
> find_idle_thread:
> 	/* Couldn't find a known thread */
> 	if (!tidq->thread)
> 		tidq->thread = machine__idle_thread(machine);
> }
> 
> Thanks,
> Leo
James Clark June 9, 2023, 11 a.m. UTC | #8
On 08/06/2023 11:25, Leo Yan wrote:
> On Thu, Jun 08, 2023 at 10:34:42AM +0100, James Clark wrote:
> 
> [...]
> 
>>>>> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
>>>>>  		tmp = tidq->packet;
>>>>>  		tidq->packet = tidq->prev_packet;
>>>>>  		tidq->prev_packet = tmp;
>>>>> +		thread__put(tidq->prev_thread);
>>>>> +		tidq->prev_thread = thread__get(tidq->thread);
>>>>
>>>> Maybe cs_etm__packet_swap() is not the best place to update
>>>> "tidq->prev_thread", since swapping packet doesn't mean it's necessarily
>>>> thread switching; can we move this change into the cs_etm__set_thread()?
>>>>
>>>
>>> Yeah that might make more sense. I can move it there if we decide to
>>> keep this change.
>>>
>>
>> Unfortunately I don't think I can make this change. It seems like
>> putting the previous thread swap in  cs_etm__set_thread() has different
>> semantics to keeping all the swaps together in cs_etm__packet_swap().
> 
> Thanks for trying this.
> 
>> This is because if you swap the thread in cs_etm__packet_swap() the
>> previous packet and next packet can have the _same_ thread if there
>> happened to be no change. However if you only swap previous thread in
>> cs_etm__set_thread(), that means that the previous thread is always
>> different to the next one. This has a huge difference on the decoding
>> because two adjacent packets on the same thread will say they branched
>> from the previous thread that ran, not the previous thread on the
>> previous packet.
> 
> Seems to me, this is a synchronization issue between the field
> 'tidq->prev_thread' and 'tidq->prev_packet'.
> 
> It's still hard for me to understand "two adjacent packets on the same
> thread will say they branched from the previous thread that ran", IIUC,
> even we move thread swapping into cs_etm__set_thread(), if the two
> adjacent packets are in the same thread context, we can skip to update
> fields 'tidq->prev_thread' and 'tidq->prev_packet'.
> 
> So I am curious if below cs_etm__set_thread() works or not?
> 
> static void cs_etm__set_thread(struct cs_etm_auxtrace *etm,
> 			       struct cs_etm_traceid_queue *tidq, pid_t tid)
> {
> 	struct machine *machine = &etm->session->machines.host;
> 
> 	/* No context switching, bail out */
> 	if ((tidq->thread->tid != tid)
> 		return;
> 
> 	/* If tid is -1, we simply use idle thread context */
> 	if (tid == -1)
> 		goto find_idle_thread;
> 
> 	/*
> 	 * The new incoming tid is different from current thread,
> 	 * so it's to switch to the next thread context.
> 	 */
> 
> 	/* Swap thread contexts */
> 	thread__put(tidq->prev_thread);
> 	tidq->prev_thread = thread__get(tidq->thread);
> 
> 	/* Find thread context for new tid */
> 	thread__zput(tidq->thread);
> 	tidq->thread = machine__find_thread(machine, -1, tid);
> 
> find_idle_thread:
> 	/* Couldn't find a known thread */
> 	if (!tidq->thread)
> 		tidq->thread = machine__idle_thread(machine);
> }
> 

I tried this change but I still don't think it's giving the right
results. Tracking previous thread in cs_etm__set_thread() changes the
semantics of being "the thread for the previous packet" to being "the
previous different thread of an unknown old packet". If you imagine the
packets and thread changes are like this (where <d> is a discontinuity
packet):

    <--thread 1--> <--thread 2-------------------> <------thread 3-->
<d> <--packet 1--> <d> <--packet 2--> <packet 3--> <d> <--packet 4-->

Branches are generated using the last IP of the previous packet, and the
first IP of the next packet, ignoring everything in between as they are
just sequential instructions.

So assuming there are discontinuity packets between the thread switches
we should get:

  thread 1 branches from packet 1 to 0x0
  thread 2 branches from 0x0 to packet 2
  thread 2 branches from packet 2 to packet 3
  thread 2 branches from packet 3 to 0x0
  thread 3 branches from 0x0 to packet 4

By tracking the previous thread for each packet, packet 2 and 3 stay in
thread 2.

If we track the previous thread instead, then both packet 2 and 3 would
continue to look like they branch from thread 1 like this:

  thread ? branches from packet 1 to 0x0
  thread 1 branches from 0x0 to packet 2
  thread 1 branches from packet 2 to packet 3
  thread 1 branches from packet 3 to 0x0
  thread 2 branches from 0x0 to packet 4

Everything gets shifted back by 1 thread. I don't see the issue of
keeping all the swap stuff together in one place. Maybe there is an
issue with the naming of prev_thread? It's not really the previous
thread, it's the previous _packets_ thread. It might be the same thread
as the current one if there was no switch:

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index ca01109c3fc4..f3c73c86010a 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -86,8 +86,8 @@ struct cs_etm_traceid_queue {
        size_t last_branch_pos;
        union perf_event *event_buf;
        struct thread *thread;
-       struct thread *prev_thread;
-       ocsd_ex_level prev_el;
+       struct thread *prev_packet_thread;
+       ocsd_ex_level prev_packet_el;
        ocsd_ex_level el;
        struct branch_stack *last_branch;

> Thanks,
> Leo
Leo Yan June 10, 2023, 1:20 a.m. UTC | #9
On Fri, Jun 09, 2023 at 12:00:27PM +0100, James Clark wrote:
> On 08/06/2023 11:25, Leo Yan wrote:
> > On Thu, Jun 08, 2023 at 10:34:42AM +0100, James Clark wrote:

[...]

> >>>>> @@ -616,6 +618,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
> >>>>>  		tmp = tidq->packet;
> >>>>>  		tidq->packet = tidq->prev_packet;
> >>>>>  		tidq->prev_packet = tmp;
> >>>>> +		thread__put(tidq->prev_thread);
> >>>>> +		tidq->prev_thread = thread__get(tidq->thread);
> >>>>
> >>>> Maybe cs_etm__packet_swap() is not the best place to update
> >>>> "tidq->prev_thread", since swapping packet doesn't mean it's necessarily
> >>>> thread switching; can we move this change into the cs_etm__set_thread()?
> >>>>
> >>>
> >>> Yeah that might make more sense. I can move it there if we decide to
> >>> keep this change.
> >>>
> >>
> >> Unfortunately I don't think I can make this change. It seems like
> >> putting the previous thread swap in  cs_etm__set_thread() has different
> >> semantics to keeping all the swaps together in cs_etm__packet_swap().
> > 
> > Thanks for trying this.
> > 
> >> This is because if you swap the thread in cs_etm__packet_swap() the
> >> previous packet and next packet can have the _same_ thread if there
> >> happened to be no change. However if you only swap previous thread in
> >> cs_etm__set_thread(), that means that the previous thread is always
> >> different to the next one. This has a huge difference on the decoding
> >> because two adjacent packets on the same thread will say they branched
> >> from the previous thread that ran, not the previous thread on the
> >> previous packet.
> > 
> > Seems to me, this is a synchronization issue between the field
> > 'tidq->prev_thread' and 'tidq->prev_packet'.
> > 
> > It's still hard for me to understand "two adjacent packets on the same
> > thread will say they branched from the previous thread that ran", IIUC,
> > even we move thread swapping into cs_etm__set_thread(), if the two
> > adjacent packets are in the same thread context, we can skip to update
> > fields 'tidq->prev_thread' and 'tidq->prev_packet'.
> > 
> > So I am curious if below cs_etm__set_thread() works or not?
> > 
> > static void cs_etm__set_thread(struct cs_etm_auxtrace *etm,
> > 			       struct cs_etm_traceid_queue *tidq, pid_t tid)
> > {
> > 	struct machine *machine = &etm->session->machines.host;
> > 
> > 	/* No context switching, bail out */
> > 	if ((tidq->thread->tid != tid)
> > 		return;
> > 
> > 	/* If tid is -1, we simply use idle thread context */
> > 	if (tid == -1)
> > 		goto find_idle_thread;
> > 
> > 	/*
> > 	 * The new incoming tid is different from current thread,
> > 	 * so it's to switch to the next thread context.
> > 	 */
> > 
> > 	/* Swap thread contexts */
> > 	thread__put(tidq->prev_thread);
> > 	tidq->prev_thread = thread__get(tidq->thread);
> > 
> > 	/* Find thread context for new tid */
> > 	thread__zput(tidq->thread);
> > 	tidq->thread = machine__find_thread(machine, -1, tid);
> > 
> > find_idle_thread:
> > 	/* Couldn't find a known thread */
> > 	if (!tidq->thread)
> > 		tidq->thread = machine__idle_thread(machine);
> > }
> > 
> 
> I tried this change but I still don't think it's giving the right
> results. Tracking previous thread in cs_etm__set_thread() changes the
> semantics of being "the thread for the previous packet" to being "the
> previous different thread of an unknown old packet". If you imagine the
> packets and thread changes are like this (where <d> is a discontinuity
> packet):
> 
>     <--thread 1--> <--thread 2-------------------> <------thread 3-->
> <d> <--packet 1--> <d> <--packet 2--> <packet 3--> <d> <--packet 4-->
> 
> Branches are generated using the last IP of the previous packet, and the
> first IP of the next packet, ignoring everything in between as they are
> just sequential instructions.
> 
> So assuming there are discontinuity packets between the thread switches
> we should get:
> 
>   thread 1 branches from packet 1 to 0x0
>   thread 2 branches from 0x0 to packet 2
>   thread 2 branches from packet 2 to packet 3
>   thread 2 branches from packet 3 to 0x0
>   thread 3 branches from 0x0 to packet 4
> 
> By tracking the previous thread for each packet, packet 2 and 3 stay in
> thread 2.
> 
> If we track the previous thread instead, then both packet 2 and 3 would
> continue to look like they branch from thread 1 like this:
> 
>   thread ? branches from packet 1 to 0x0
>   thread 1 branches from 0x0 to packet 2
>   thread 1 branches from packet 2 to packet 3
>   thread 1 branches from packet 3 to 0x0
>   thread 2 branches from 0x0 to packet 4

Thanks a lot for writing very details, James!

Now it's much clear for me to understand the issue.

> Everything gets shifted back by 1 thread. I don't see the issue of
> keeping all the swap stuff together in one place. Maybe there is an
> issue with the naming of prev_thread? It's not really the previous
> thread, it's the previous _packets_ thread. It might be the same thread
> as the current one if there was no switch:

Agreed.  It makes sense for me to rename the thread variable as
"prev_packet_thread", this would be better for reflecting the purpose.

Here you are trying to change how to track thread contexts: rather than
tracking thread context as a global variable and sharing it cross packets,
we track the thread context as an associated info for every packet to
avoid any mismatching between packets and threads (e.g. caused by
discontinuity packets).

My feeling is this part would be a bit difficult for maintenance, maybe
you could add a comment when spin a new patch?  Thanks!

Leo

> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index ca01109c3fc4..f3c73c86010a 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -86,8 +86,8 @@ struct cs_etm_traceid_queue {
>         size_t last_branch_pos;
>         union perf_event *event_buf;
>         struct thread *thread;
> -       struct thread *prev_thread;
> -       ocsd_ex_level prev_el;
> +       struct thread *prev_packet_thread;
> +       ocsd_ex_level prev_packet_el;
>         ocsd_ex_level el;
>         struct branch_stack *last_branch;
> 
> > Thanks,
> > Leo
diff mbox series

Patch

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index ebffc9052561..a997fe79d458 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -86,6 +86,7 @@  struct cs_etm_traceid_queue {
 	size_t last_branch_pos;
 	union perf_event *event_buf;
 	struct thread *thread;
+	struct thread *prev_thread;
 	struct branch_stack *last_branch;
 	struct branch_stack *last_branch_rb;
 	struct cs_etm_packet *prev_packet;
@@ -480,6 +481,7 @@  static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
 	tidq->trace_chan_id = trace_chan_id;
 	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
 					       queue->tid);
+	tidq->prev_thread = machine__idle_thread(&etm->session->machines.host);
 
 	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
 	if (!tidq->packet)
@@ -616,6 +618,8 @@  static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
 		tmp = tidq->packet;
 		tidq->packet = tidq->prev_packet;
 		tidq->prev_packet = tmp;
+		thread__put(tidq->prev_thread);
+		tidq->prev_thread = thread__get(tidq->thread);
 	}
 }
 
@@ -791,6 +795,7 @@  static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
 		/* Free this traceid_queue from the array */
 		tidq = etmq->traceid_queues[idx];
 		thread__zput(tidq->thread);
+		thread__zput(tidq->prev_thread);
 		zfree(&tidq->event_buf);
 		zfree(&tidq->last_branch);
 		zfree(&tidq->last_branch_rb);
@@ -1450,8 +1455,8 @@  static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
 	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
 
 	sample.ip = ip;
-	sample.pid = tidq->thread->pid_;
-	sample.tid = tidq->thread->tid;
+	sample.pid = tidq->prev_thread->pid_;
+	sample.tid = tidq->prev_thread->tid;
 	sample.addr = cs_etm__first_executed_instr(tidq->packet);
 	sample.id = etmq->etm->branches_id;
 	sample.stream_id = etmq->etm->branches_id;