Message ID | 20211109115020.31623-5-german.gomez@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | perf arm-spe: Track pid/tid for Arm SPE samples | expand |
On Tue, Nov 9, 2021 at 3:50 AM German Gomez <german.gomez@arm.com> wrote: > > If Arm SPE traces contain CONTEXT packets with TID info, use these > values for tracking tid of samples. Otherwise fall back to using context > switch events and display a message warning the user of possible timing > inaccuracies [1]. > > [1] https://lore.kernel.org/lkml/f877cfa6-9b25-6445-3806-ca44a4042eaf@arm.com/ > > Signed-off-by: German Gomez <german.gomez@arm.com> > --- > tools/perf/util/arm-spe.c | 99 +++++++++++++++++++++++++++------------ > 1 file changed, 70 insertions(+), 29 deletions(-) > > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c > index 230bc7ab2..30b8bb48a 100644 > --- a/tools/perf/util/arm-spe.c > +++ b/tools/perf/util/arm-spe.c > @@ -71,6 +71,7 @@ struct arm_spe { > u64 kernel_start; > > unsigned long num_events; > + u8 use_ctx_pkt_for_pid; > }; > > struct arm_spe_queue { > @@ -226,6 +227,44 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) > PERF_RECORD_MISC_USER; > } > > +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, > + struct auxtrace_queue *queue) > +{ > + struct arm_spe_queue *speq = queue->priv; > + pid_t tid; > + > + tid = machine__get_current_tid(spe->machine, speq->cpu); > + if (tid != -1) { > + speq->tid = tid; > + thread__zput(speq->thread); > + } else > + speq->tid = queue->tid; > + > + if ((!speq->thread) && (speq->tid != -1)) { > + speq->thread = machine__find_thread(spe->machine, -1, > + speq->tid); > + } > + > + if (speq->thread) { > + speq->pid = speq->thread->pid_; > + if (queue->cpu == -1) > + speq->cpu = speq->thread->cpu; > + } > +} > + > +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) > +{ > + struct arm_spe *spe = speq->spe; > + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); I think we should pass -1 as pid as we don't know the real pid. Thanks, Namhyung > + > + if (err) > + return err; > + > + arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); > + > + return 0; > +} > +
On Wed, Nov 10, 2021 at 11:28:48PM -0800, Namhyung Kim wrote: [...] > > +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, > > + struct auxtrace_queue *queue) > > +{ > > + struct arm_spe_queue *speq = queue->priv; > > + pid_t tid; > > + > > + tid = machine__get_current_tid(spe->machine, speq->cpu); > > + if (tid != -1) { > > + speq->tid = tid; > > + thread__zput(speq->thread); > > + } else > > + speq->tid = queue->tid; > > + > > + if ((!speq->thread) && (speq->tid != -1)) { > > + speq->thread = machine__find_thread(spe->machine, -1, > > + speq->tid); > > + } > > + > > + if (speq->thread) { > > + speq->pid = speq->thread->pid_; > > + if (queue->cpu == -1) > > + speq->cpu = speq->thread->cpu; > > + } > > +} > > + > > +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) > > +{ > > + struct arm_spe *spe = speq->spe; > > + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); > > I think we should pass -1 as pid as we don't know the real pid. AFAICT, I observe one case for machine__set_current_tid() returning error is 'speq->cpu' is -1 (this is the case for per-thread tracing). In this case, if pass '-1' for pid/tid, it still will return failure. So here should return the error as it is. Am I missing anything? Thanks, Leo
Hi Leo, On Wed, Nov 10, 2021 at 11:41 PM Leo Yan <leo.yan@linaro.org> wrote: > > On Wed, Nov 10, 2021 at 11:28:48PM -0800, Namhyung Kim wrote: > > [...] > > > > +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, > > > + struct auxtrace_queue *queue) > > > +{ > > > + struct arm_spe_queue *speq = queue->priv; > > > + pid_t tid; > > > + > > > + tid = machine__get_current_tid(spe->machine, speq->cpu); > > > + if (tid != -1) { > > > + speq->tid = tid; > > > + thread__zput(speq->thread); > > > + } else > > > + speq->tid = queue->tid; > > > + > > > + if ((!speq->thread) && (speq->tid != -1)) { > > > + speq->thread = machine__find_thread(spe->machine, -1, > > > + speq->tid); > > > + } > > > + > > > + if (speq->thread) { > > > + speq->pid = speq->thread->pid_; > > > + if (queue->cpu == -1) > > > + speq->cpu = speq->thread->cpu; > > > + } > > > +} > > > + > > > +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) > > > +{ > > > + struct arm_spe *spe = speq->spe; > > > + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); > > > > I think we should pass -1 as pid as we don't know the real pid. > > AFAICT, I observe one case for machine__set_current_tid() returning error > is 'speq->cpu' is -1 (this is the case for per-thread tracing). In > this case, if pass '-1' for pid/tid, it still will return failure. > > So here should return the error as it is. Am I missing anything? I'm not saying about the error. It's about thread status. In the machine__set_current_tid(), it calls machine__findnew_thread() with given pid and tid. I suspect it can set pid to a wrong value if the thread has no pid value at the moment. Thanks, Namhyung
Hi Namhyung, On Wed, Nov 10, 2021 at 11:59:05PM -0800, Namhyung Kim wrote: [...] > > > > +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, > > > > + struct auxtrace_queue *queue) > > > > +{ > > > > + struct arm_spe_queue *speq = queue->priv; > > > > + pid_t tid; > > > > + > > > > + tid = machine__get_current_tid(spe->machine, speq->cpu); > > > > + if (tid != -1) { > > > > + speq->tid = tid; > > > > + thread__zput(speq->thread); > > > > + } else > > > > + speq->tid = queue->tid; > > > > + > > > > + if ((!speq->thread) && (speq->tid != -1)) { > > > > + speq->thread = machine__find_thread(spe->machine, -1, > > > > + speq->tid); > > > > + } > > > > + > > > > + if (speq->thread) { > > > > + speq->pid = speq->thread->pid_; > > > > + if (queue->cpu == -1) > > > > + speq->cpu = speq->thread->cpu; > > > > + } > > > > +} > > > > + > > > > +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) > > > > +{ > > > > + struct arm_spe *spe = speq->spe; > > > > + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); > > > > > > I think we should pass -1 as pid as we don't know the real pid. > > > > AFAICT, I observe one case for machine__set_current_tid() returning error > > is 'speq->cpu' is -1 (this is the case for per-thread tracing). In > > this case, if pass '-1' for pid/tid, it still will return failure. > > > > So here should return the error as it is. Am I missing anything? > > I'm not saying about the error. It's about thread status. > In the machine__set_current_tid(), it calls > machine__findnew_thread() with given pid and tid. > > I suspect it can set pid to a wrong value if the thread has > no pid value at the moment. Here we should avoid to write pid '-1' with machine__set_current_tid(). The function arm_spe_set_tid() is invoked when SPE trace data contains context packet and it passes pid coming from the context packet. On the other hand, when SPE trace data doesn't contain context packet, we relies on context switch event to set pid value. So if we pass pid '-1' in arm_spe_set_tid(), it will overwrite the pid value which has been set by context switch event. Simply say, if SPE trace data contains context packet with valid pid, perf invokes arm_spe_set_tid() to set the pid value. Otherwise, it should skip this operation and roll back to use the pid value from the context switch event. Thanks, Leo
Hi Leo, Namhyung, On 11/11/2021 08:30, Leo Yan wrote: > Hi Namhyung, > > On Wed, Nov 10, 2021 at 11:59:05PM -0800, Namhyung Kim wrote: > > [...] > >>>>> +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, >>>>> + struct auxtrace_queue *queue) >>>>> +{ >>>>> + struct arm_spe_queue *speq = queue->priv; >>>>> + pid_t tid; >>>>> + >>>>> + tid = machine__get_current_tid(spe->machine, speq->cpu); >>>>> + if (tid != -1) { >>>>> + speq->tid = tid; >>>>> + thread__zput(speq->thread); >>>>> + } else >>>>> + speq->tid = queue->tid; >>>>> + >>>>> + if ((!speq->thread) && (speq->tid != -1)) { >>>>> + speq->thread = machine__find_thread(spe->machine, -1, >>>>> + speq->tid); >>>>> + } >>>>> + >>>>> + if (speq->thread) { >>>>> + speq->pid = speq->thread->pid_; >>>>> + if (queue->cpu == -1) >>>>> + speq->cpu = speq->thread->cpu; >>>>> + } >>>>> +} >>>>> + >>>>> +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) >>>>> +{ >>>>> + struct arm_spe *spe = speq->spe; >>>>> + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); >>>> I think we should pass -1 as pid as we don't know the real pid. >>> AFAICT, I observe one case for machine__set_current_tid() returning error >>> is 'speq->cpu' is -1 (this is the case for per-thread tracing). In >>> this case, if pass '-1' for pid/tid, it still will return failure. >>> >>> So here should return the error as it is. Am I missing anything? >> I'm not saying about the error. It's about thread status. >> In the machine__set_current_tid(), it calls >> machine__findnew_thread() with given pid and tid. >> >> I suspect it can set pid to a wrong value if the thread has >> no pid value at the moment. > Here we should avoid to write pid '-1' with > machine__set_current_tid(). If the kernel is writing the tids to the contextidr, isn't it wrong to assume tid == pid when decoding the context packets here? I haven't observed any impact in the built-in commands though, so there must be something I'm not seeing. Thanks, German > > The function arm_spe_set_tid() is invoked when SPE trace data contains > context packet and it passes pid coming from the context packet. On > the other hand, when SPE trace data doesn't contain context packet, we > relies on context switch event to set pid value. So if we pass pid > '-1' in arm_spe_set_tid(), it will overwrite the pid value which has > been set by context switch event. > > Simply say, if SPE trace data contains context packet with valid pid, > perf invokes arm_spe_set_tid() to set the pid value. Otherwise, it > should skip this operation and roll back to use the pid value from > the context switch event. > > Thanks, > Leo
On Thu, Nov 11, 2021 at 12:23:08PM +0000, German Gomez wrote: > On 11/11/2021 08:30, Leo Yan wrote: > > On Wed, Nov 10, 2021 at 11:59:05PM -0800, Namhyung Kim wrote: [...] > >>>>> +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) > >>>>> +{ > >>>>> + struct arm_spe *spe = speq->spe; > >>>>> + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); > >>>> > >>>> I think we should pass -1 as pid as we don't know the real pid. > >>>> > >>> AFAICT, I observe one case for machine__set_current_tid() returning error > >>> is 'speq->cpu' is -1 (this is the case for per-thread tracing). In > >>> this case, if pass '-1' for pid/tid, it still will return failure. > >>> > >>> So here should return the error as it is. Am I missing anything? > >> > >> I'm not saying about the error. It's about thread status. > >> In the machine__set_current_tid(), it calls > >> machine__findnew_thread() with given pid and tid. > >> > >> I suspect it can set pid to a wrong value if the thread has > >> no pid value at the moment. > > > > Here we should avoid to write pid '-1' with > > machine__set_current_tid(). > > If the kernel is writing the tids to the contextidr, isn't it wrong to > assume tid == pid when decoding the context packets here? I haven't > observed any impact in the built-in commands though, so there must be > something I'm not seeing. Okay, let me correct myself :) I checked Intel-pt's implementation, I understand now that we need to distinguish the cases for pid/tid from context switch event and only tid from SPE context packet. Since the context switch event contains the correct pid and tid values, we should set both of them, see Intel-PT's implementation [1]. As Namhyung pointed, we need to set pid as '-1' when we only know the tid value from SPE context packet; see [2]. So we should use the same with Intel-pt. Sorry for I didn't really understand well Namhyung's suggestion and thanks you both pointed out the issue. Leo P.s. an offline topic is we should send a patch to fix cs-etm issue as well [3]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/intel-pt.c#n2920 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/intel-pt.c#n2215 [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/cs-etm.c#n1121
Hi, thanks for looking into it On 11/11/2021 12:42, Leo Yan wrote: > On Thu, Nov 11, 2021 at 12:23:08PM +0000, German Gomez wrote: >> On 11/11/2021 08:30, Leo Yan wrote: >>> On Wed, Nov 10, 2021 at 11:59:05PM -0800, Namhyung Kim wrote: > [...] > >>>>>>> +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) >>>>>>> +{ >>>>>>> + struct arm_spe *spe = speq->spe; >>>>>>> + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); >>>>>> I think we should pass -1 as pid as we don't know the real pid. >>>>>> >>>>> AFAICT, I observe one case for machine__set_current_tid() returning error >>>>> is 'speq->cpu' is -1 (this is the case for per-thread tracing). In >>>>> this case, if pass '-1' for pid/tid, it still will return failure. >>>>> >>>>> So here should return the error as it is. Am I missing anything? >>>> I'm not saying about the error. It's about thread status. >>>> In the machine__set_current_tid(), it calls >>>> machine__findnew_thread() with given pid and tid. >>>> >>>> I suspect it can set pid to a wrong value if the thread has >>>> no pid value at the moment. >>> Here we should avoid to write pid '-1' with >>> machine__set_current_tid(). >> If the kernel is writing the tids to the contextidr, isn't it wrong to >> assume tid == pid when decoding the context packets here? I haven't >> observed any impact in the built-in commands though, so there must be >> something I'm not seeing. > Okay, let me correct myself :) > > I checked Intel-pt's implementation, I understand now that we need to > distinguish the cases for pid/tid from context switch event and only tid > from SPE context packet. > > Since the context switch event contains the correct pid and tid > values, we should set both of them, see Intel-PT's implementation [1]. > > As Namhyung pointed, we need to set pid as '-1' when we only know the > tid value from SPE context packet; see [2]. I will correct this and resend the patch for SPE. Thanks, German > > So we should use the same with Intel-pt. > > Sorry for I didn't really understand well Namhyung's suggestion and > thanks you both pointed out the issue. > > Leo > > P.s. an offline topic is we should send a patch to fix cs-etm issue > as well [3]. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/intel-pt.c#n2920 > [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/intel-pt.c#n2215 > [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/cs-etm.c#n1121
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 230bc7ab2..30b8bb48a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -71,6 +71,7 @@ struct arm_spe { u64 kernel_start; unsigned long num_events; + u8 use_ctx_pkt_for_pid; }; struct arm_spe_queue { @@ -226,6 +227,44 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) PERF_RECORD_MISC_USER; } +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, + struct auxtrace_queue *queue) +{ + struct arm_spe_queue *speq = queue->priv; + pid_t tid; + + tid = machine__get_current_tid(spe->machine, speq->cpu); + if (tid != -1) { + speq->tid = tid; + thread__zput(speq->thread); + } else + speq->tid = queue->tid; + + if ((!speq->thread) && (speq->tid != -1)) { + speq->thread = machine__find_thread(spe->machine, -1, + speq->tid); + } + + if (speq->thread) { + speq->pid = speq->thread->pid_; + if (queue->cpu == -1) + speq->cpu = speq->thread->cpu; + } +} + +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) +{ + struct arm_spe *spe = speq->spe; + int err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid); + + if (err) + return err; + + arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); + + return 0; +} + static void arm_spe_prep_sample(struct arm_spe *spe, struct arm_spe_queue *speq, union perf_event *event, @@ -460,6 +499,19 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) * can correlate samples between Arm SPE trace data and other * perf events with correct time ordering. */ + + /* + * Update pid/tid info. + */ + record = &speq->decoder->record; + if (!spe->timeless_decoding && record->context_id != (u64)-1) { + ret = arm_spe_set_tid(speq, record->context_id); + if (ret) + return ret; + + spe->use_ctx_pkt_for_pid = true; + } + ret = arm_spe_sample(speq); if (ret) return ret; @@ -586,31 +638,6 @@ static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) return timeless_decoding; } -static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, - struct auxtrace_queue *queue) -{ - struct arm_spe_queue *speq = queue->priv; - pid_t tid; - - tid = machine__get_current_tid(spe->machine, speq->cpu); - if (tid != -1) { - speq->tid = tid; - thread__zput(speq->thread); - } else - speq->tid = queue->tid; - - if ((!speq->thread) && (speq->tid != -1)) { - speq->thread = machine__find_thread(spe->machine, -1, - speq->tid); - } - - if (speq->thread) { - speq->pid = speq->thread->pid_; - if (queue->cpu == -1) - speq->cpu = speq->thread->cpu; - } -} - static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) { unsigned int queue_nr; @@ -641,7 +668,12 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) ts = timestamp; } - arm_spe_set_pid_tid_cpu(spe, queue); + /* + * A previous context-switch event has set pid/tid in the machine's context, so + * here we need to update the pid/tid in the thread and SPE queue. + */ + if (!spe->use_ctx_pkt_for_pid) + arm_spe_set_pid_tid_cpu(spe, queue); ret = arm_spe_run_decoder(speq, &ts); if (ret < 0) { @@ -740,8 +772,9 @@ static int arm_spe_process_event(struct perf_session *session, if (err) return err; - if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || - event->header.type == PERF_RECORD_SWITCH) + if (!spe->use_ctx_pkt_for_pid && + (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || + event->header.type == PERF_RECORD_SWITCH)) err = arm_spe_context_switch(spe, event, sample); } @@ -808,7 +841,15 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return arm_spe_process_timeless_queues(spe, -1, MAX_TIMESTAMP - 1); - return arm_spe_process_queues(spe, MAX_TIMESTAMP); + ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); + if (ret) + return ret; + + if (!spe->use_ctx_pkt_for_pid) + ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" + "Matching of TIDs to SPE events could be inaccurate.\n"); + + return 0; } static void arm_spe_free_queue(void *priv)
If Arm SPE traces contain CONTEXT packets with TID info, use these values for tracking tid of samples. Otherwise fall back to using context switch events and display a message warning the user of possible timing inaccuracies [1]. [1] https://lore.kernel.org/lkml/f877cfa6-9b25-6445-3806-ca44a4042eaf@arm.com/ Signed-off-by: German Gomez <german.gomez@arm.com> --- tools/perf/util/arm-spe.c | 99 +++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 29 deletions(-)