diff mbox series

[RFC] perf cs-etm: Handle valid-but-zero timestamps

Message ID 20210507095814.17933-1-james.clark@arm.com (mailing list archive)
State New, archived
Headers show
Series [RFC] perf cs-etm: Handle valid-but-zero timestamps | expand

Commit Message

James Clark May 7, 2021, 9:58 a.m. UTC
There is an intermittent issue on Trogdor devices that
results in all Coresight timestamps having a value of zero.
Because zero is currently treated as "not found" rather
than "found but zero", this breaks the decoding flow
when it would otherwise work.

This patch adds an out parameter and return code so
the difference between zero and non-existent timestamps
can be determined.

There is also a change to fix an underflow.

Although this is a workaround, the change is technically
a valid way of writing the cs_etm__etmq_get_timestamp()
function. It could have been written similarly to this
without trying to work around this issue, so it's no less
correct. But, because it's a workaround to a problem
elsewhere, I will submit this as an RFC for feedback.

This patch applies on top of the "[PATCH v2 0/2] perf
cs-etm: Set time on synthesised samples to preserve ordering"
patchset.

Co-developed-by: Denis Nikitin <denik@chromium.org>
Signed-off-by: Denis Nikitin <denik@chromium.org>
Signed-off-by: James Clark <james.clark@arm.com>
---
 .../perf/util/cs-etm-decoder/cs-etm-decoder.c |  5 ++++-
 tools/perf/util/cs-etm.c                      | 22 +++++++++----------
 2 files changed, 15 insertions(+), 12 deletions(-)

Comments

Mike Leach May 7, 2021, 2:09 p.m. UTC | #1
Hi James,

1) Not sure about this particular target - but previously all 0
timestamps have been the result of not enabling a clock or the
coresight timestamp generator.

2) Given that you can synthesise timestamps when all the values are
all 0 - does this not imply that they could be synthesised when they
are not present at all?

Cheers

Mike

On Fri, 7 May 2021 at 11:02, James Clark <james.clark@arm.com> wrote:
>
>
>
> On 07/05/2021 12:58, James Clark wrote:
> > There is an intermittent issue on Trogdor devices that
> > results in all Coresight timestamps having a value of zero.
>
> I've attached a file here that has the issue. From the dump you
> can see the zero timestamps:
>
>         Idx:69; ID:10;  I_TIMESTAMP : Timestamp.; Updated val = 0x0
>         Idx:71; ID:10;  I_ATOM_F1 : Atom format 1.; E
>         Idx:72; ID:10;  I_ADDR_S_IS0 : Address, Short, IS0.; Addr=0xFFFFFFE723C65824 ~[0x5824]
>
> This doesn't have an impact on decoding as they end up being
> decoded in file order as in with timeless mode.
>
> James
>
> > Because zero is currently treated as "not found" rather
> > than "found but zero", this breaks the decoding flow
> > when it would otherwise work.
> >
> > This patch adds an out parameter and return code so
> > the difference between zero and non-existent timestamps
> > can be determined.
> >
> > There is also a change to fix an underflow.
> >
> > Although this is a workaround, the change is technically
> > a valid way of writing the cs_etm__etmq_get_timestamp()
> > function. It could have been written similarly to this
> > without trying to work around this issue, so it's no less
> > correct. But, because it's a workaround to a problem
> > elsewhere, I will submit this as an RFC for feedback.
> >
> > This patch applies on top of the "[PATCH v2 0/2] perf
> > cs-etm: Set time on synthesised samples to preserve ordering"
> > patchset.
> >
> > Co-developed-by: Denis Nikitin <denik@chromium.org>
> > Signed-off-by: Denis Nikitin <denik@chromium.org>
> > Signed-off-by: James Clark <james.clark@arm.com>
> > ---
> >  .../perf/util/cs-etm-decoder/cs-etm-decoder.c |  5 ++++-
> >  tools/perf/util/cs-etm.c                      | 22 +++++++++----------
> >  2 files changed, 15 insertions(+), 12 deletions(-)
> >
> > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > index b01d363b9301..947e44413c6e 100644
> > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > @@ -320,7 +320,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
> >        * which instructions started by subtracting the number of instructions
> >        * executed to the timestamp.
> >        */
> > -     packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> > +     if (packet_queue->instr_count >= elem->timestamp)
> > +             packet_queue->cs_timestamp = 0;
> > +     else
> > +             packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> >       packet_queue->next_cs_timestamp = elem->timestamp;
> >       packet_queue->instr_count = 0;
> >
> > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > index e5c1a1b22a2a..1969921c406a 100644
> > --- a/tools/perf/util/cs-etm.c
> > +++ b/tools/perf/util/cs-etm.c
> > @@ -210,13 +210,14 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
> >       etmq->pending_timestamp_chan_id = trace_chan_id;
> >  }
> >
> > -static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> > +static int cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> > +                                   u64 *timestamp,
> >                                     u8 *trace_chan_id)
> >  {
> >       struct cs_etm_packet_queue *packet_queue;
> >
> >       if (!etmq->pending_timestamp_chan_id)
> > -             return 0;
> > +             return -ENODATA;
> >
> >       if (trace_chan_id)
> >               *trace_chan_id = etmq->pending_timestamp_chan_id;
> > @@ -224,13 +225,15 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> >       packet_queue = cs_etm__etmq_get_packet_queue(etmq,
> >                                                    etmq->pending_timestamp_chan_id);
> >       if (!packet_queue)
> > -             return 0;
> > +             return -ENODATA;
> >
> >       /* Acknowledge pending status */
> >       etmq->pending_timestamp_chan_id = 0;
> >
> >       /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
> > -     return packet_queue->cs_timestamp;
> > +     if (timestamp)
> > +             *timestamp = packet_queue->cs_timestamp;
> > +     return 0;
> >  }
> >
> >  static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
> > @@ -864,11 +867,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> >                * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
> >                * the timestamp calculation for us.
> >                */
> > -             cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
> > -
> > -             /* We found a timestamp, no need to continue. */
> > -             if (cs_timestamp)
> > +             if (!cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
> > +                     /* We found a timestamp, no need to continue. */
> >                       break;
> > +             }
> >
> >               /*
> >                * We didn't find a timestamp so empty all the traceid packet
> > @@ -2286,9 +2288,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
> >               if (ret)
> >                       goto out;
> >
> > -             cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
> > -
> > -             if (!cs_timestamp) {
> > +             if (cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
> >                       /*
> >                        * Function cs_etm__decode_data_block() returns when
> >                        * there is no more traces to decode in the current
> >
Leo Yan May 10, 2021, 5:39 a.m. UTC | #2
Hi James,

On Fri, May 07, 2021 at 01:02:35PM +0300, James Clark wrote:
> 
> 
> On 07/05/2021 12:58, James Clark wrote:
> > There is an intermittent issue on Trogdor devices that
> > results in all Coresight timestamps having a value of zero.
> 
> I've attached a file here that has the issue. From the dump you 
> can see the zero timestamps:
> 
>         Idx:69; ID:10;  I_TIMESTAMP : Timestamp.; Updated val = 0x0
>         Idx:71; ID:10;  I_ATOM_F1 : Atom format 1.; E
>         Idx:72; ID:10;  I_ADDR_S_IS0 : Address, Short, IS0.; Addr=0xFFFFFFE723C65824 ~[0x5824]
> 
> This doesn't have an impact on decoding as they end up being
> decoded in file order as in with timeless mode.

Just remind, as Mike has mentioned that if the timestamp is zero, it
means the hardware setting for timestamp is not enabled properly.  So
for system wide or per CPU mode tracing, it's better to double check
what's the reason the timestamp is not enabled properly.

IIUC, this patch breaks the existed rational in the code.  Let's think
about there have 4 CPUs, every CPU has its own AUX trace buffer, and
when decode the trace data, it will use 4 queues to track the packets
and every queue has its timestamp.

  CPU0: cs_etm_queue -> ... -> packet_queue->timestamp
  CPU1: cs_etm_queue -> ... -> packet_queue->timestamp
  CPU2: cs_etm_queue -> ... -> packet_queue->timestamp
  CPU3: cs_etm_queue -> ... -> packet_queue->timestamp

The issue is if all CPUs' timestamp are zero, it's impossible to find
a way to synthesize samples in the right time order.

[...]

> > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > index b01d363b9301..947e44413c6e 100644
> > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > @@ -320,7 +320,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
> >  	 * which instructions started by subtracting the number of instructions
> >  	 * executed to the timestamp.
> >  	 */
> > -	packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> > +	if (packet_queue->instr_count >= elem->timestamp)
> > +		packet_queue->cs_timestamp = 0;
> > +	else
> > +		packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;

Actually here have two situations: one case is "elem->timestamp" is zero,
another case is the overflow for "elem->timestamp".

So the change should be like:

   if (!elem->timestamp)
       packet_queue->cs_timestamp = 0;
   else if (packet_queue->instr_count >= elem->timestamp)
       /* handle overflow? */
   else
      packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;

It's better to think about how to handle the overflow in this case.

Thanks,
Leo
Denis Nikitin May 11, 2021, 7:39 a.m. UTC | #3
Hi Mike,

1. This is a different issue and it's already confirmed as the hw bug.
Reset of the coresight generator may happen at any time and
it goes to normal only after the device reboot.

Thanks,
Denis


On Fri, May 7, 2021 at 7:09 AM Mike Leach <mike.leach@linaro.org> wrote:
>
> Hi James,
>
> 1) Not sure about this particular target - but previously all 0
> timestamps have been the result of not enabling a clock or the
> coresight timestamp generator.
>
> 2) Given that you can synthesise timestamps when all the values are
> all 0 - does this not imply that they could be synthesised when they
> are not present at all?
>
> Cheers
>
> Mike
>
> On Fri, 7 May 2021 at 11:02, James Clark <james.clark@arm.com> wrote:
> >
> >
> >
> > On 07/05/2021 12:58, James Clark wrote:
> > > There is an intermittent issue on Trogdor devices that
> > > results in all Coresight timestamps having a value of zero.
> >
> > I've attached a file here that has the issue. From the dump you
> > can see the zero timestamps:
> >
> >         Idx:69; ID:10;  I_TIMESTAMP : Timestamp.; Updated val = 0x0
> >         Idx:71; ID:10;  I_ATOM_F1 : Atom format 1.; E
> >         Idx:72; ID:10;  I_ADDR_S_IS0 : Address, Short, IS0.; Addr=0xFFFFFFE723C65824 ~[0x5824]
> >
> > This doesn't have an impact on decoding as they end up being
> > decoded in file order as in with timeless mode.
> >
> > James
> >
> > > Because zero is currently treated as "not found" rather
> > > than "found but zero", this breaks the decoding flow
> > > when it would otherwise work.
> > >
> > > This patch adds an out parameter and return code so
> > > the difference between zero and non-existent timestamps
> > > can be determined.
> > >
> > > There is also a change to fix an underflow.
> > >
> > > Although this is a workaround, the change is technically
> > > a valid way of writing the cs_etm__etmq_get_timestamp()
> > > function. It could have been written similarly to this
> > > without trying to work around this issue, so it's no less
> > > correct. But, because it's a workaround to a problem
> > > elsewhere, I will submit this as an RFC for feedback.
> > >
> > > This patch applies on top of the "[PATCH v2 0/2] perf
> > > cs-etm: Set time on synthesised samples to preserve ordering"
> > > patchset.
> > >
> > > Co-developed-by: Denis Nikitin <denik@chromium.org>
> > > Signed-off-by: Denis Nikitin <denik@chromium.org>
> > > Signed-off-by: James Clark <james.clark@arm.com>
> > > ---
> > >  .../perf/util/cs-etm-decoder/cs-etm-decoder.c |  5 ++++-
> > >  tools/perf/util/cs-etm.c                      | 22 +++++++++----------
> > >  2 files changed, 15 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > > index b01d363b9301..947e44413c6e 100644
> > > --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > > +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> > > @@ -320,7 +320,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
> > >        * which instructions started by subtracting the number of instructions
> > >        * executed to the timestamp.
> > >        */
> > > -     packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> > > +     if (packet_queue->instr_count >= elem->timestamp)
> > > +             packet_queue->cs_timestamp = 0;
> > > +     else
> > > +             packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> > >       packet_queue->next_cs_timestamp = elem->timestamp;
> > >       packet_queue->instr_count = 0;
> > >
> > > diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> > > index e5c1a1b22a2a..1969921c406a 100644
> > > --- a/tools/perf/util/cs-etm.c
> > > +++ b/tools/perf/util/cs-etm.c
> > > @@ -210,13 +210,14 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
> > >       etmq->pending_timestamp_chan_id = trace_chan_id;
> > >  }
> > >
> > > -static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> > > +static int cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> > > +                                   u64 *timestamp,
> > >                                     u8 *trace_chan_id)
> > >  {
> > >       struct cs_etm_packet_queue *packet_queue;
> > >
> > >       if (!etmq->pending_timestamp_chan_id)
> > > -             return 0;
> > > +             return -ENODATA;
> > >
> > >       if (trace_chan_id)
> > >               *trace_chan_id = etmq->pending_timestamp_chan_id;
> > > @@ -224,13 +225,15 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
> > >       packet_queue = cs_etm__etmq_get_packet_queue(etmq,
> > >                                                    etmq->pending_timestamp_chan_id);
> > >       if (!packet_queue)
> > > -             return 0;
> > > +             return -ENODATA;
> > >
> > >       /* Acknowledge pending status */
> > >       etmq->pending_timestamp_chan_id = 0;
> > >
> > >       /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
> > > -     return packet_queue->cs_timestamp;
> > > +     if (timestamp)
> > > +             *timestamp = packet_queue->cs_timestamp;
> > > +     return 0;
> > >  }
> > >
> > >  static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
> > > @@ -864,11 +867,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> > >                * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
> > >                * the timestamp calculation for us.
> > >                */
> > > -             cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
> > > -
> > > -             /* We found a timestamp, no need to continue. */
> > > -             if (cs_timestamp)
> > > +             if (!cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
> > > +                     /* We found a timestamp, no need to continue. */
> > >                       break;
> > > +             }
> > >
> > >               /*
> > >                * We didn't find a timestamp so empty all the traceid packet
> > > @@ -2286,9 +2288,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
> > >               if (ret)
> > >                       goto out;
> > >
> > > -             cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
> > > -
> > > -             if (!cs_timestamp) {
> > > +             if (cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
> > >                       /*
> > >                        * Function cs_etm__decode_data_block() returns when
> > >                        * there is no more traces to decode in the current
> > >
>
>
>
> --
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
Leo Yan May 11, 2021, 8:05 a.m. UTC | #4
On Mon, May 10, 2021 at 01:39:04PM +0800, Leo Yan wrote:
> On Fri, May 07, 2021 at 01:02:35PM +0300, James Clark wrote:
> > 
> > 
> > On 07/05/2021 12:58, James Clark wrote:
> > > There is an intermittent issue on Trogdor devices that
> > > results in all Coresight timestamps having a value of zero.
> > 
> > I've attached a file here that has the issue. From the dump you 
> > can see the zero timestamps:
> > 
> >         Idx:69; ID:10;  I_TIMESTAMP : Timestamp.; Updated val = 0x0
> >         Idx:71; ID:10;  I_ATOM_F1 : Atom format 1.; E
> >         Idx:72; ID:10;  I_ADDR_S_IS0 : Address, Short, IS0.; Addr=0xFFFFFFE723C65824 ~[0x5824]
> > 
> > This doesn't have an impact on decoding as they end up being
> > decoded in file order as in with timeless mode.
> 
> Just remind, as Mike has mentioned that if the timestamp is zero, it
> means the hardware setting for timestamp is not enabled properly.  So
> for system wide or per CPU mode tracing, it's better to double check
> what's the reason the timestamp is not enabled properly.
> 
> IIUC, this patch breaks the existed rational in the code.  Let's think
> about there have 4 CPUs, every CPU has its own AUX trace buffer, and
> when decode the trace data, it will use 4 queues to track the packets
> and every queue has its timestamp.
> 
>   CPU0: cs_etm_queue -> ... -> packet_queue->timestamp
>   CPU1: cs_etm_queue -> ... -> packet_queue->timestamp
>   CPU2: cs_etm_queue -> ... -> packet_queue->timestamp
>   CPU3: cs_etm_queue -> ... -> packet_queue->timestamp
> 
> The issue is if all CPUs' timestamp are zero, it's impossible to find
> a way to synthesize samples in the right time order.

I saw Denis's replying for the hardware issue for timestamp, wander if
can add a new option "--force-aux-ts-zero" to override the hardware
timestamp issue.  Without the option "--force-aux-ts-zero", the
developers still have chance to observe the failure case caused by the
abnormal timestamps.

Thanks,
Leo
Denis Nikitin May 11, 2021, 8:06 a.m. UTC | #5
Hi Leo,



> Just remind, as Mike has mentioned that if the timestamp is zero, it
> means the hardware setting for timestamp is not enabled properly.  So
> for system wide or per CPU mode tracing, it's better to double check
> what's the reason the timestamp is not enabled properly.

The bug is confirmed by HW verification.

>
> IIUC, this patch breaks the existed rational in the code.  Let's think
> about there have 4 CPUs, every CPU has its own AUX trace buffer, and
> when decode the trace data, it will use 4 queues to track the packets
> and every queue has its timestamp.
>
>   CPU0: cs_etm_queue -> ... -> packet_queue->timestamp
>   CPU1: cs_etm_queue -> ... -> packet_queue->timestamp
>   CPU2: cs_etm_queue -> ... -> packet_queue->timestamp
>   CPU3: cs_etm_queue -> ... -> packet_queue->timestamp
>
> The issue is if all CPUs' timestamp are zero, it's impossible to find
> a way to synthesize samples in the right time order.

Is it really impossible or it just can lead to incorrect decoding?
I verified the profiles generated with zero timestamps and this patch
on Trogdor (8 CPU cores) and I don't see any significant difference
in the quality of the AutoFDO profiles.

If mixed packets don't cause errors in reconstructing the branches
but instead mess up with their timeline then it probably won't matter
for AutoFDO which just collects statistics of the branches.
What do you think?

>
> [...]
>
> Thanks,
> Leo

Thanks,
Denis
Leo Yan May 11, 2021, 8:26 a.m. UTC | #6
Hi Denis,

On Tue, May 11, 2021 at 01:06:03AM -0700, Denis Nikitin wrote:
> Hi Leo,
> 
> > Just remind, as Mike has mentioned that if the timestamp is zero, it
> > means the hardware setting for timestamp is not enabled properly.  So
> > for system wide or per CPU mode tracing, it's better to double check
> > what's the reason the timestamp is not enabled properly.
> 
> The bug is confirmed by HW verification.

Yeah.

> > IIUC, this patch breaks the existed rational in the code.  Let's think
> > about there have 4 CPUs, every CPU has its own AUX trace buffer, and
> > when decode the trace data, it will use 4 queues to track the packets
> > and every queue has its timestamp.
> >
> >   CPU0: cs_etm_queue -> ... -> packet_queue->timestamp
> >   CPU1: cs_etm_queue -> ... -> packet_queue->timestamp
> >   CPU2: cs_etm_queue -> ... -> packet_queue->timestamp
> >   CPU3: cs_etm_queue -> ... -> packet_queue->timestamp
> >
> > The issue is if all CPUs' timestamp are zero, it's impossible to find
> > a way to synthesize samples in the right time order.
> 
> Is it really impossible or it just can lead to incorrect decoding?

Thanks for correcting.  Just clarifying: with this change, perf can
decode and synthesize samples, but the sequence of samples is not
time-based ordering.

> I verified the profiles generated with zero timestamps and this patch
> on Trogdor (8 CPU cores) and I don't see any significant difference
> in the quality of the AutoFDO profiles.
> 
> If mixed packets don't cause errors in reconstructing the branches
> but instead mess up with their timeline then it probably won't matter
> for AutoFDO which just collects statistics of the branches.
> What do you think?

Understand.

CoreSight trace data can be used for two purposes: tracing and
profiling.  For AutoFDO profiling, it's okay for with zero timestamps
based on your conclusion; on the other hand, the change can introduce
confusion if any user wants to use CoreSight for tracing and review the
program flow (like use "perf script") command.

The change in this patch is valid for me, but it's better to connect
with a new option (like "--force-aux-ts-zero" mentioned in my another
replying), this can allow users to explictly to force AUX trace
timestamp as zero (or in other word, users can use this option to ignore
AUX timestamp if the timestamp is not reliable).

Thanks,
Leo
James Clark May 11, 2021, 10 a.m. UTC | #7
On 11/05/2021 11:05, Leo Yan wrote:
> On Mon, May 10, 2021 at 01:39:04PM +0800, Leo Yan wrote:
>> On Fri, May 07, 2021 at 01:02:35PM +0300, James Clark wrote:
>>>
>>>
>>> On 07/05/2021 12:58, James Clark wrote:
>>>> There is an intermittent issue on Trogdor devices that
>>>> results in all Coresight timestamps having a value of zero.
>>>
>>> I've attached a file here that has the issue. From the dump you 
>>> can see the zero timestamps:
>>>
>>>         Idx:69; ID:10;  I_TIMESTAMP : Timestamp.; Updated val = 0x0
>>>         Idx:71; ID:10;  I_ATOM_F1 : Atom format 1.; E
>>>         Idx:72; ID:10;  I_ADDR_S_IS0 : Address, Short, IS0.; Addr=0xFFFFFFE723C65824 ~[0x5824]
>>>
>>> This doesn't have an impact on decoding as they end up being
>>> decoded in file order as in with timeless mode.
>>
>> Just remind, as Mike has mentioned that if the timestamp is zero, it
>> means the hardware setting for timestamp is not enabled properly.  So
>> for system wide or per CPU mode tracing, it's better to double check
>> what's the reason the timestamp is not enabled properly.
>>
>> IIUC, this patch breaks the existed rational in the code.  Let's think
>> about there have 4 CPUs, every CPU has its own AUX trace buffer, and
>> when decode the trace data, it will use 4 queues to track the packets
>> and every queue has its timestamp.
>>
>>   CPU0: cs_etm_queue -> ... -> packet_queue->timestamp
>>   CPU1: cs_etm_queue -> ... -> packet_queue->timestamp
>>   CPU2: cs_etm_queue -> ... -> packet_queue->timestamp
>>   CPU3: cs_etm_queue -> ... -> packet_queue->timestamp
>>
>> The issue is if all CPUs' timestamp are zero, it's impossible to find
>> a way to synthesize samples in the right time order.
> 
> I saw Denis's replying for the hardware issue for timestamp, wander if
> can add a new option "--force-aux-ts-zero" to override the hardware
> timestamp issue.  Without the option "--force-aux-ts-zero", the
> developers still have chance to observe the failure case caused by the
> abnormal timestamps.
> 

Hi Leo,

Now that you mention arguments, I remembered that you already can force something like that
with --disable-order.

There is also a recent commit "perf intel-pt: Support Z itrace option for timeless decoding"
which introduces this option:

	@@ -869,6 +869,7 @@ The letters are:
	     	L	synthesize last branch entries on existing event records
	 	s	skip initial number of events
	 	q	quicker (less detailed) decoding
	+	Z	prefer to ignore timestamps (so-called "timeless" decoding)

I will investigate if these are more relevant to fix this issue.


Thanks
James

> Thanks,
> Leo
>
James Clark May 11, 2021, 10:07 a.m. UTC | #8
On 07/05/2021 17:09, Mike Leach wrote:
> Hi James,
> 
> 1) Not sure about this particular target - but previously all 0
> timestamps have been the result of not enabling a clock or the
> coresight timestamp generator.
> 
> 2) Given that you can synthesise timestamps when all the values are
> all 0 - does this not imply that they could be synthesised when they
> are not present at all?

Yes they could, perhaps we could change our command line arguments to remove
recording of timestamps completely. But I think that the auto enabling of
timestamps for per-cpu mode is only additive so there is no way to disable
it on the command line.

Perhaps we could make a change there instead to make the event configuration more flexible.

There is a similar concept with the --timestamp argument where it has both
"not set" and "set, but set to false" variables to distinguish between when the user hasn't
provided anything and when they've provided the negative:

	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
			&record.opts.sample_time_set,

opts.sample_time is whether timestamps are on or not and
opts.sample_time_set is whether the user provided any argument, true or false.

Thanks
James

> 
> Cheers
> 
> Mike
> 
> On Fri, 7 May 2021 at 11:02, James Clark <james.clark@arm.com> wrote:
>>
>>
>>
>> On 07/05/2021 12:58, James Clark wrote:
>>> There is an intermittent issue on Trogdor devices that
>>> results in all Coresight timestamps having a value of zero.
>>
>> I've attached a file here that has the issue. From the dump you
>> can see the zero timestamps:
>>
>>         Idx:69; ID:10;  I_TIMESTAMP : Timestamp.; Updated val = 0x0
>>         Idx:71; ID:10;  I_ATOM_F1 : Atom format 1.; E
>>         Idx:72; ID:10;  I_ADDR_S_IS0 : Address, Short, IS0.; Addr=0xFFFFFFE723C65824 ~[0x5824]
>>
>> This doesn't have an impact on decoding as they end up being
>> decoded in file order as in with timeless mode.
>>
>> James
>>
>>> Because zero is currently treated as "not found" rather
>>> than "found but zero", this breaks the decoding flow
>>> when it would otherwise work.
>>>
>>> This patch adds an out parameter and return code so
>>> the difference between zero and non-existent timestamps
>>> can be determined.
>>>
>>> There is also a change to fix an underflow.
>>>
>>> Although this is a workaround, the change is technically
>>> a valid way of writing the cs_etm__etmq_get_timestamp()
>>> function. It could have been written similarly to this
>>> without trying to work around this issue, so it's no less
>>> correct. But, because it's a workaround to a problem
>>> elsewhere, I will submit this as an RFC for feedback.
>>>
>>> This patch applies on top of the "[PATCH v2 0/2] perf
>>> cs-etm: Set time on synthesised samples to preserve ordering"
>>> patchset.
>>>
>>> Co-developed-by: Denis Nikitin <denik@chromium.org>
>>> Signed-off-by: Denis Nikitin <denik@chromium.org>
>>> Signed-off-by: James Clark <james.clark@arm.com>
>>> ---
>>>  .../perf/util/cs-etm-decoder/cs-etm-decoder.c |  5 ++++-
>>>  tools/perf/util/cs-etm.c                      | 22 +++++++++----------
>>>  2 files changed, 15 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>> index b01d363b9301..947e44413c6e 100644
>>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>> @@ -320,7 +320,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>>>        * which instructions started by subtracting the number of instructions
>>>        * executed to the timestamp.
>>>        */
>>> -     packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
>>> +     if (packet_queue->instr_count >= elem->timestamp)
>>> +             packet_queue->cs_timestamp = 0;
>>> +     else
>>> +             packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
>>>       packet_queue->next_cs_timestamp = elem->timestamp;
>>>       packet_queue->instr_count = 0;
>>>
>>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>> index e5c1a1b22a2a..1969921c406a 100644
>>> --- a/tools/perf/util/cs-etm.c
>>> +++ b/tools/perf/util/cs-etm.c
>>> @@ -210,13 +210,14 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
>>>       etmq->pending_timestamp_chan_id = trace_chan_id;
>>>  }
>>>
>>> -static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
>>> +static int cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
>>> +                                   u64 *timestamp,
>>>                                     u8 *trace_chan_id)
>>>  {
>>>       struct cs_etm_packet_queue *packet_queue;
>>>
>>>       if (!etmq->pending_timestamp_chan_id)
>>> -             return 0;
>>> +             return -ENODATA;
>>>
>>>       if (trace_chan_id)
>>>               *trace_chan_id = etmq->pending_timestamp_chan_id;
>>> @@ -224,13 +225,15 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
>>>       packet_queue = cs_etm__etmq_get_packet_queue(etmq,
>>>                                                    etmq->pending_timestamp_chan_id);
>>>       if (!packet_queue)
>>> -             return 0;
>>> +             return -ENODATA;
>>>
>>>       /* Acknowledge pending status */
>>>       etmq->pending_timestamp_chan_id = 0;
>>>
>>>       /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
>>> -     return packet_queue->cs_timestamp;
>>> +     if (timestamp)
>>> +             *timestamp = packet_queue->cs_timestamp;
>>> +     return 0;
>>>  }
>>>
>>>  static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
>>> @@ -864,11 +867,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>>>                * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
>>>                * the timestamp calculation for us.
>>>                */
>>> -             cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
>>> -
>>> -             /* We found a timestamp, no need to continue. */
>>> -             if (cs_timestamp)
>>> +             if (!cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
>>> +                     /* We found a timestamp, no need to continue. */
>>>                       break;
>>> +             }
>>>
>>>               /*
>>>                * We didn't find a timestamp so empty all the traceid packet
>>> @@ -2286,9 +2288,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
>>>               if (ret)
>>>                       goto out;
>>>
>>> -             cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
>>> -
>>> -             if (!cs_timestamp) {
>>> +             if (cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
>>>                       /*
>>>                        * Function cs_etm__decode_data_block() returns when
>>>                        * there is no more traces to decode in the current
>>>
> 
> 
>
James Clark May 11, 2021, 1:53 p.m. UTC | #9
On 10/05/2021 08:39, Leo Yan wrote:
> Hi James,
> 

[...]
> 
>>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>> index b01d363b9301..947e44413c6e 100644
>>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>> @@ -320,7 +320,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>>>  	 * which instructions started by subtracting the number of instructions
>>>  	 * executed to the timestamp.
>>>  	 */
>>> -	packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
>>> +	if (packet_queue->instr_count >= elem->timestamp)
>>> +		packet_queue->cs_timestamp = 0;
>>> +	else
>>> +		packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> 
> Actually here have two situations: one case is "elem->timestamp" is zero,
> another case is the overflow for "elem->timestamp".
> 
> So the change should be like:
> 
>    if (!elem->timestamp)
>        packet_queue->cs_timestamp = 0;
>    else if (packet_queue->instr_count >= elem->timestamp)
>        /* handle overflow? */
>    else
>       packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
> 
> It's better to think about how to handle the overflow in this case.

Do you have any idea about what to do in the overflow case? I think I will submit a
new patchset that makes the new 'Z' timeless --itrace option work, because that also
fixes this issue, without having to do the original workaround change in this RFC.

But I'd also like to fix this overflow because it masks the issue by making non-zero
timestamps appear even though they aren't valid ones.

I was thinking that printing a warning in the overflow case would work, but then I would
also print a warning for the zero timestamps, and that would make just a single case,
rather than two. Unless we just have slightly different warning text?

Something like this? Without the zero timestamp issue, the underflow issue probably wouldn't
be encountered. But at least there would be some visibility if it did:

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 059bcec3f651..5d8abccd34ab 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -17,6 +17,7 @@
 
 #include "cs-etm.h"
 #include "cs-etm-decoder.h"
+#include "debug.h"
 #include "intlist.h"
 
 /* use raw logging */
@@ -294,9 +295,11 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
 static ocsd_datapath_resp_t
 cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
                                  const ocsd_generic_trace_elem *elem,
-                                 const uint8_t trace_chan_id)
+                                 const uint8_t trace_chan_id,
+                                 const ocsd_trc_index_t indx)
 {
        struct cs_etm_packet_queue *packet_queue;
+       static bool warned_timestamp_zero = false;
 
        /* First get the packet queue for this traceID */
        packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
@@ -320,7 +323,20 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
         * which instructions started by subtracting the number of instructions
         * executed to the timestamp.
         */
-       packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
+       if (!elem->timestamp) {
+               packet_queue->timestamp = 0;
+               if (!warned_timestamp_zero) {
+                       pr_err("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
+                               ". Decoding may be improved with --itrace=Z...\n", indx);
+                       warned_timestamp_zero = true;
+               }
+       }
+       else if (packet_queue->instr_count >= elem->timestamp) {
+               packet_queue->timestamp = 0;
+               pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
+       }
+       else
+               packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
        packet_queue->next_timestamp = elem->timestamp;
        packet_queue->instr_count = 0;
 
@@ -542,7 +558,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
 
 static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
                                const void *context,
-                               const ocsd_trc_index_t indx __maybe_unused,
+                               const ocsd_trc_index_t indx,
                                const u8 trace_chan_id __maybe_unused,
                                const ocsd_generic_trace_elem *elem)
 {
@@ -579,7 +595,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
                break;
        case OCSD_GEN_TRC_ELEM_TIMESTAMP:
                resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
-                                                        trace_chan_id);
+                                                        trace_chan_id,
+                                                         indx);
                break;
        case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
                resp = cs_etm_decoder__set_tid(etmq, packet_queue,


James

> 
> Thanks,
> Leo
>
Leo Yan May 12, 2021, 1:20 a.m. UTC | #10
On Tue, May 11, 2021 at 04:53:35PM +0300, James Clark wrote:

[...]

> Do you have any idea about what to do in the overflow case?

A quick thinking is to connect the kernel timestamp and correlate the
overflow case for CoreSight's timestamp, but this approach will cause
complexity.  And considering if the overflow occurs for not only once
before the new kernel timestamp is updated, it's hard to handle for
this case.  So seems to me, printing warning is a better choice.

> I think I will submit a
> new patchset that makes the new 'Z' timeless --itrace option work, because that also
> fixes this issue, without having to do the original workaround change in this RFC.

Good finding for these options for zero timestamps!

> But I'd also like to fix this overflow because it masks the issue by making non-zero
> timestamps appear even though they aren't valid ones.
> 
> I was thinking that printing a warning in the overflow case would work, but then I would
> also print a warning for the zero timestamps, and that would make just a single case,
> rather than two. Unless we just have slightly different warning text?

Printing two different warnings is okay for me, which is more clear
for users.

> Something like this? Without the zero timestamp issue, the underflow issue probably wouldn't
> be encountered. But at least there would be some visibility if it did:
> 
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 059bcec3f651..5d8abccd34ab 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -17,6 +17,7 @@
>  
>  #include "cs-etm.h"
>  #include "cs-etm-decoder.h"
> +#include "debug.h"
>  #include "intlist.h"
>  
>  /* use raw logging */
> @@ -294,9 +295,11 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
>  static ocsd_datapath_resp_t
>  cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>                                   const ocsd_generic_trace_elem *elem,
> -                                 const uint8_t trace_chan_id)
> +                                 const uint8_t trace_chan_id,
> +                                 const ocsd_trc_index_t indx)

Do we really need the new argument "indx"?  If print "trace_chan_id",
can it give the info that the timestamp is attached to which tracer?

>  {
>         struct cs_etm_packet_queue *packet_queue;
> +       static bool warned_timestamp_zero = false;
>  
>         /* First get the packet queue for this traceID */
>         packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
> @@ -320,7 +323,20 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>          * which instructions started by subtracting the number of instructions
>          * executed to the timestamp.
>          */
> -       packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
> +       if (!elem->timestamp) {
> +               packet_queue->timestamp = 0;
> +               if (!warned_timestamp_zero) {
> +                       pr_err("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
> +                               ". Decoding may be improved with --itrace=Z...\n", indx);
> +                       warned_timestamp_zero = true;
> +               }

I think this warning and the next warning for overflow, both can use
the macro WARN_ONCE(), so you can avoid to add new variable
"warned_timestamp_zero".

Thanks,
Leo

> +       }
> +       else if (packet_queue->instr_count >= elem->timestamp) {
> +               packet_queue->timestamp = 0;
> +               pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
> +       }
> +       else
> +               packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
>         packet_queue->next_timestamp = elem->timestamp;
>         packet_queue->instr_count = 0;
>  
> @@ -542,7 +558,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
>  
>  static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
>                                 const void *context,
> -                               const ocsd_trc_index_t indx __maybe_unused,
> +                               const ocsd_trc_index_t indx,
>                                 const u8 trace_chan_id __maybe_unused,
>                                 const ocsd_generic_trace_elem *elem)
>  {
> @@ -579,7 +595,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
>                 break;
>         case OCSD_GEN_TRC_ELEM_TIMESTAMP:
>                 resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
> -                                                        trace_chan_id);
> +                                                        trace_chan_id,
> +                                                         indx);
>                 break;
>         case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
>                 resp = cs_etm_decoder__set_tid(etmq, packet_queue,
> 
> 
> James
> 
> > 
> > Thanks,
> > Leo
> >
Leo Yan May 12, 2021, 2:08 a.m. UTC | #11
On Tue, May 11, 2021 at 04:53:35PM +0300, James Clark wrote:

[...]

>         /* First get the packet queue for this traceID */
>         packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
> @@ -320,7 +323,20 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>          * which instructions started by subtracting the number of instructions
>          * executed to the timestamp.
>          */
> -       packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
> +       if (!elem->timestamp) {
> +               packet_queue->timestamp = 0;
> +               if (!warned_timestamp_zero) {
> +                       pr_err("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
> +                               ". Decoding may be improved with --itrace=Z...\n", indx);
> +                       warned_timestamp_zero = true;
> +               }
> +       }
> +       else if (packet_queue->instr_count >= elem->timestamp) {

Nitpick: I personally think should use the condition ">" rather than ">=".

> +               packet_queue->timestamp = 0;
> +               pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
> +       }
> +       else
> +               packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;

Nitpick for coding style, as described in
Documentation/process/coding-style.rst, section "3) Placing Braces and
Spaces", so here should use braces with the format:

  if (!elem->timestamp) {
    /* print warning */
  } else if {packet_queue->instr_count >= elem->timestamp) {
    /* print warning */
  } else {
    packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
  }

Thanks,
Leo

>         packet_queue->next_timestamp = elem->timestamp;
>         packet_queue->instr_count = 0;
>  
> @@ -542,7 +558,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
>  
>  static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
>                                 const void *context,
> -                               const ocsd_trc_index_t indx __maybe_unused,
> +                               const ocsd_trc_index_t indx,
>                                 const u8 trace_chan_id __maybe_unused,
>                                 const ocsd_generic_trace_elem *elem)
>  {
> @@ -579,7 +595,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
>                 break;
>         case OCSD_GEN_TRC_ELEM_TIMESTAMP:
>                 resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
> -                                                        trace_chan_id);
> +                                                        trace_chan_id,
> +                                                         indx);
>                 break;
>         case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
>                 resp = cs_etm_decoder__set_tid(etmq, packet_queue,
> 
> 
> James
> 
> > 
> > Thanks,
> > Leo
> >
James Clark May 13, 2021, 1:10 p.m. UTC | #12
On 12/05/2021 05:08, Leo Yan wrote:
> On Tue, May 11, 2021 at 04:53:35PM +0300, James Clark wrote:
> 
> [...]
> 
>>         /* First get the packet queue for this traceID */
>>         packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
>> @@ -320,7 +323,20 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>>          * which instructions started by subtracting the number of instructions
>>          * executed to the timestamp.
>>          */
>> -       packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
>> +       if (!elem->timestamp) {
>> +               packet_queue->timestamp = 0;
>> +               if (!warned_timestamp_zero) {
>> +                       pr_err("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR
>> +                               ". Decoding may be improved with --itrace=Z...\n", indx);
>> +                       warned_timestamp_zero = true;
>> +               }
>> +       }
>> +       else if (packet_queue->instr_count >= elem->timestamp) {
> 
> Nitpick: I personally think should use the condition ">" rather than ">=".

Yes, good catch. I actually changed this because I realised that
if they are equal it shouldn't print an error.

> 
>> +               packet_queue->timestamp = 0;
>> +               pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx);
>> +       }
>> +       else
>> +               packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
> 
> Nitpick for coding style, as described in
> Documentation/process/coding-style.rst, section "3) Placing Braces and
> Spaces", so here should use braces with the format:

Ok I will update and run it through checkpatch.pl before posting.

[...]
James Clark May 13, 2021, 1:57 p.m. UTC | #13
On 12/05/2021 04:20, Leo Yan wrote:
> On Tue, May 11, 2021 at 04:53:35PM +0300, James Clark wrote:
> 
> [...]
> 
>> Do you have any idea about what to do in the overflow case?
> 
> A quick thinking is to connect the kernel timestamp and correlate the
> overflow case for CoreSight's timestamp, but this approach will cause
> complexity.  And considering if the overflow occurs for not only once
> before the new kernel timestamp is updated, it's hard to handle for
> this case.  So seems to me, printing warning is a better choice.
> 
>> I think I will submit a
>> new patchset that makes the new 'Z' timeless --itrace option work, because that also
>> fixes this issue, without having to do the original workaround change in this RFC.
> 
> Good finding for these options for zero timestamps!
> 
>> But I'd also like to fix this overflow because it masks the issue by making non-zero
>> timestamps appear even though they aren't valid ones.
>>
>> I was thinking that printing a warning in the overflow case would work, but then I would
>> also print a warning for the zero timestamps, and that would make just a single case,
>> rather than two. Unless we just have slightly different warning text?
> 
> Printing two different warnings is okay for me, which is more clear
> for users.
> 
>> Something like this? Without the zero timestamp issue, the underflow issue probably wouldn't
>> be encountered. But at least there would be some visibility if it did:
>>
>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> index 059bcec3f651..5d8abccd34ab 100644
>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> @@ -17,6 +17,7 @@
>>  
>>  #include "cs-etm.h"
>>  #include "cs-etm-decoder.h"
>> +#include "debug.h"
>>  #include "intlist.h"
>>  
>>  /* use raw logging */
>> @@ -294,9 +295,11 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
>>  static ocsd_datapath_resp_t
>>  cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
>>                                   const ocsd_generic_trace_elem *elem,
>> -                                 const uint8_t trace_chan_id)
>> +                                 const uint8_t trace_chan_id,
>> +                                 const ocsd_trc_index_t indx)
> 
> Do we really need the new argument "indx"?  If print "trace_chan_id",
> can it give the info that the timestamp is attached to which tracer?

I thought that just the channel ID wouldn't be very useful for locating where the
issue is when doing --dump-raw-trace.

By printing "Idx:..." it can be pasted straight into the search in perf and you'll
jump straight to the part where the error happened. If you only have the channel
ID then you'd still need to get a debugger out and find out the index if you want
to look into the problem.

I will include the index in the new patch I will submit now, but I don't insist on
keeping it so let me know what you think.

James
diff mbox series

Patch

diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index b01d363b9301..947e44413c6e 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -320,7 +320,10 @@  cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
 	 * which instructions started by subtracting the number of instructions
 	 * executed to the timestamp.
 	 */
-	packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
+	if (packet_queue->instr_count >= elem->timestamp)
+		packet_queue->cs_timestamp = 0;
+	else
+		packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
 	packet_queue->next_cs_timestamp = elem->timestamp;
 	packet_queue->instr_count = 0;
 
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index e5c1a1b22a2a..1969921c406a 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -210,13 +210,14 @@  void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
 	etmq->pending_timestamp_chan_id = trace_chan_id;
 }
 
-static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
+static int cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
+				      u64 *timestamp,
 				      u8 *trace_chan_id)
 {
 	struct cs_etm_packet_queue *packet_queue;
 
 	if (!etmq->pending_timestamp_chan_id)
-		return 0;
+		return -ENODATA;
 
 	if (trace_chan_id)
 		*trace_chan_id = etmq->pending_timestamp_chan_id;
@@ -224,13 +225,15 @@  static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
 	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
 						     etmq->pending_timestamp_chan_id);
 	if (!packet_queue)
-		return 0;
+		return -ENODATA;
 
 	/* Acknowledge pending status */
 	etmq->pending_timestamp_chan_id = 0;
 
 	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
-	return packet_queue->cs_timestamp;
+	if (timestamp)
+		*timestamp = packet_queue->cs_timestamp;
+	return 0;
 }
 
 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
@@ -864,11 +867,10 @@  static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
 		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
 		 * the timestamp calculation for us.
 		 */
-		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
-
-		/* We found a timestamp, no need to continue. */
-		if (cs_timestamp)
+		if (!cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
+			/* We found a timestamp, no need to continue. */
 			break;
+		}
 
 		/*
 		 * We didn't find a timestamp so empty all the traceid packet
@@ -2286,9 +2288,7 @@  static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
 		if (ret)
 			goto out;
 
-		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
-
-		if (!cs_timestamp) {
+		if (cs_etm__etmq_get_timestamp(etmq, &cs_timestamp, &trace_chan_id)) {
 			/*
 			 * Function cs_etm__decode_data_block() returns when
 			 * there is no more traces to decode in the current