diff mbox series

[RESEND,1/1] perf arm-spe: report all SPE records as "all" events

Message ID 20211117142833.226629-1-german.gomez@arm.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,1/1] perf arm-spe: report all SPE records as "all" events | expand

Commit Message

German Gomez Nov. 17, 2021, 2:28 p.m. UTC
From: James Clark <james.clark@arm.com>

Currently perf-report and perf-inject are dropping a large number of SPE
records because they don't contain any of the existing events, but the
contextual information of the records is still useful to keep.

The synthesized event "all" is generated for every SPE record that is
processed, regardless of whether the record contains interesting events
or not. The event can be filtered with the flag "--itrace=o".

Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: German Gomez <german.gomez@arm.com>
---
 tools/perf/Documentation/itrace.txt |  2 +-
 tools/perf/util/arm-spe.c           | 36 +++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h          |  2 +-
 3 files changed, 38 insertions(+), 2 deletions(-)

Comments

Leo Yan Nov. 25, 2021, 7:53 a.m. UTC | #1
On Wed, Nov 17, 2021 at 02:28:32PM +0000, German Gomez wrote:
> From: James Clark <james.clark@arm.com>
> 
> Currently perf-report and perf-inject are dropping a large number of SPE
> records because they don't contain any of the existing events, but the
> contextual information of the records is still useful to keep.
> 
> The synthesized event "all" is generated for every SPE record that is
> processed, regardless of whether the record contains interesting events
> or not. The event can be filtered with the flag "--itrace=o".
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: German Gomez <german.gomez@arm.com>
> ---
>  tools/perf/Documentation/itrace.txt |  2 +-
>  tools/perf/util/arm-spe.c           | 36 +++++++++++++++++++++++++++++
>  tools/perf/util/auxtrace.h          |  2 +-
>  3 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
> index c52755481..57dc12b83 100644
> --- a/tools/perf/Documentation/itrace.txt
> +++ b/tools/perf/Documentation/itrace.txt
> @@ -6,7 +6,7 @@
>  		w	synthesize ptwrite events
>  		p	synthesize power events (incl. PSB events for Intel PT)
>  		o	synthesize other events recorded due to the use
> -			of aux-output (refer to perf record)
> +			of aux-output (refer to perf record) (all events for Arm SPE)
>  		e	synthesize error events
>  		d	create a debug log
>  		f	synthesize first level cache events
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index ce77abf90..6428351db 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -58,6 +58,7 @@ struct arm_spe {
>  	u8				sample_branch;
>  	u8				sample_remote_access;
>  	u8				sample_memory;
> +	u8				sample_other;
>  
>  	u64				l1d_miss_id;
>  	u64				l1d_access_id;
> @@ -68,6 +69,7 @@ struct arm_spe {
>  	u64				branch_miss_id;
>  	u64				remote_access_id;
>  	u64				memory_id;
> +	u64				all_id;
>  
>  	u64				kernel_start;
>  
> @@ -351,6 +353,23 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>  	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>  }
>  
> +static int arm_spe__synth_other_sample(struct arm_spe_queue *speq,
> +				       u64 spe_events_id)
> +{
> +	struct arm_spe *spe = speq->spe;
> +	struct arm_spe_record *record = &speq->decoder->record;
> +	union perf_event *event = speq->event_buf;
> +	struct perf_sample sample = { .ip = 0, };
> +
> +	arm_spe_prep_sample(spe, speq, event, &sample);
> +
> +	sample.id = spe_events_id;
> +	sample.stream_id = spe_events_id;
> +	sample.addr = record->to_ip;

After checked the event types, I think "other" samples would include
below raw event types:

  EV_EXCEPTION_GEN
  EV_RETIRED
  EV_NOT_TAKEN
  EV_ALIGNMENT
  EV_PARTIAL_PREDICATE
  EV_EMPTY_PREDICATE

I am just wander if we can use sample.transaction to store these event
types, otherwise, we cannot distinguish the event type for the samples.

And it's good fill more sample fields for complete info, like:

  sample.addr = record->virt_addr;
  sample.phys_addr = record->phys_addr;
  sample.data_src = data_src;

Thanks,
Leo

> +
> +	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
> +}
> +
>  #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>  			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>  			 ARM_SPE_REMOTE_ACCESS)
> @@ -480,6 +499,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>  			return err;
>  	}
>  
> +	if (spe->sample_other) {
> +		err = arm_spe__synth_other_sample(speq, spe->all_id);
> +		if (err)
> +			return err;
> +	}
> +
>  	return 0;
>  }
>  
> @@ -1107,6 +1132,17 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>  			return err;
>  		spe->memory_id = id;
>  		arm_spe_set_event_name(evlist, id, "memory");
> +		id += 1;
> +	}
> +
> +	if (spe->synth_opts.other_events) {
> +		spe->sample_other = true;
> +
> +		err = arm_spe_synth_event(session, &attr, id);
> +		if (err)
> +			return err;
> +		spe->all_id = id;
> +		arm_spe_set_event_name(evlist, id, "all");
>  	}
>  
>  	return 0;
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index bbf0d78c6..efe1bdc06 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -74,7 +74,7 @@ enum itrace_period_type {
>   * @ptwrites: whether to synthesize events for ptwrites
>   * @pwr_events: whether to synthesize power events
>   * @other_events: whether to synthesize other events recorded due to the use of
> - *                aux_output
> + *                aux_output (all events for Arm SPE)
>   * @errors: whether to synthesize decoder error events
>   * @dont_decode: whether to skip decoding entirely
>   * @log: write a decoding log
> -- 
> 2.25.1
>
James Clark Nov. 25, 2021, 10:21 a.m. UTC | #2
On 25/11/2021 07:53, Leo Yan wrote:
> On Wed, Nov 17, 2021 at 02:28:32PM +0000, German Gomez wrote:
>> From: James Clark <james.clark@arm.com>
>>
>> Currently perf-report and perf-inject are dropping a large number of SPE
>> records because they don't contain any of the existing events, but the
>> contextual information of the records is still useful to keep.
>>
>> The synthesized event "all" is generated for every SPE record that is
>> processed, regardless of whether the record contains interesting events
>> or not. The event can be filtered with the flag "--itrace=o".
>>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> Signed-off-by: German Gomez <german.gomez@arm.com>
>> ---
>>  tools/perf/Documentation/itrace.txt |  2 +-
>>  tools/perf/util/arm-spe.c           | 36 +++++++++++++++++++++++++++++
>>  tools/perf/util/auxtrace.h          |  2 +-
>>  3 files changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
>> index c52755481..57dc12b83 100644
>> --- a/tools/perf/Documentation/itrace.txt
>> +++ b/tools/perf/Documentation/itrace.txt
>> @@ -6,7 +6,7 @@
>>  		w	synthesize ptwrite events
>>  		p	synthesize power events (incl. PSB events for Intel PT)
>>  		o	synthesize other events recorded due to the use
>> -			of aux-output (refer to perf record)
>> +			of aux-output (refer to perf record) (all events for Arm SPE)
>>  		e	synthesize error events
>>  		d	create a debug log
>>  		f	synthesize first level cache events
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index ce77abf90..6428351db 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -58,6 +58,7 @@ struct arm_spe {
>>  	u8				sample_branch;
>>  	u8				sample_remote_access;
>>  	u8				sample_memory;
>> +	u8				sample_other;
>>  
>>  	u64				l1d_miss_id;
>>  	u64				l1d_access_id;
>> @@ -68,6 +69,7 @@ struct arm_spe {
>>  	u64				branch_miss_id;
>>  	u64				remote_access_id;
>>  	u64				memory_id;
>> +	u64				all_id;
>>  
>>  	u64				kernel_start;
>>  
>> @@ -351,6 +353,23 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
>>  	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>>  }
>>  
>> +static int arm_spe__synth_other_sample(struct arm_spe_queue *speq,
>> +				       u64 spe_events_id)
>> +{
>> +	struct arm_spe *spe = speq->spe;
>> +	struct arm_spe_record *record = &speq->decoder->record;
>> +	union perf_event *event = speq->event_buf;
>> +	struct perf_sample sample = { .ip = 0, };
>> +
>> +	arm_spe_prep_sample(spe, speq, event, &sample);
>> +
>> +	sample.id = spe_events_id;
>> +	sample.stream_id = spe_events_id;
>> +	sample.addr = record->to_ip;
> 
> After checked the event types, I think "other" samples would include
> below raw event types:

Maybe we should rename some of the functions and variables if there is
confusion, but I think this new group is "all" rather than "other" because
it also includes all the events that would be put in other groups.

> 
>   EV_EXCEPTION_GEN
>   EV_RETIRED
>   EV_NOT_TAKEN
>   EV_ALIGNMENT
>   EV_PARTIAL_PREDICATE
>   EV_EMPTY_PREDICATE
> 
> I am just wander if we can use sample.transaction to store these event
> types, otherwise, we cannot distinguish the event type for the samples.

If we can use the transaction field to distinguish sample types, I'm
wondering why we need the separate groups at all. If this new group
includes all sample types, and they're all labelled, do we need to
continue with the other groups like "tlb-access" and "branch-miss"?

Or does the perf GUI not allow filtering by transaction type?

James

> 
> And it's good fill more sample fields for complete info, like:
> 
>   sample.addr = record->virt_addr;
>   sample.phys_addr = record->phys_addr;
>   sample.data_src = data_src;
> 
> Thanks,
> Leo
> 
>> +
>> +	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
>> +}
>> +
>>  #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
>>  			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
>>  			 ARM_SPE_REMOTE_ACCESS)
>> @@ -480,6 +499,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
>>  			return err;
>>  	}
>>  
>> +	if (spe->sample_other) {
>> +		err = arm_spe__synth_other_sample(speq, spe->all_id);
>> +		if (err)
>> +			return err;
>> +	}
>> +
>>  	return 0;
>>  }
>>  
>> @@ -1107,6 +1132,17 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>>  			return err;
>>  		spe->memory_id = id;
>>  		arm_spe_set_event_name(evlist, id, "memory");
>> +		id += 1;
>> +	}
>> +
>> +	if (spe->synth_opts.other_events) {
>> +		spe->sample_other = true;
>> +
>> +		err = arm_spe_synth_event(session, &attr, id);
>> +		if (err)
>> +			return err;
>> +		spe->all_id = id;
>> +		arm_spe_set_event_name(evlist, id, "all");
>>  	}
>>  
>>  	return 0;
>> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
>> index bbf0d78c6..efe1bdc06 100644
>> --- a/tools/perf/util/auxtrace.h
>> +++ b/tools/perf/util/auxtrace.h
>> @@ -74,7 +74,7 @@ enum itrace_period_type {
>>   * @ptwrites: whether to synthesize events for ptwrites
>>   * @pwr_events: whether to synthesize power events
>>   * @other_events: whether to synthesize other events recorded due to the use of
>> - *                aux_output
>> + *                aux_output (all events for Arm SPE)
>>   * @errors: whether to synthesize decoder error events
>>   * @dont_decode: whether to skip decoding entirely
>>   * @log: write a decoding log
>> -- 
>> 2.25.1
>>
German Gomez Nov. 25, 2021, 11:54 a.m. UTC | #3
Hi Leo,

On 25/11/2021 07:53, Leo Yan wrote:
> On Wed, Nov 17, 2021 at 02:28:32PM +0000, German Gomez wrote:
>> [...]
> After checked the event types, I think "other" samples would include
> below raw event types:
>
>   EV_EXCEPTION_GEN
>   EV_RETIRED
>   EV_NOT_TAKEN
>   EV_ALIGNMENT
>   EV_PARTIAL_PREDICATE
>   EV_EMPTY_PREDICATE
>
> I am just wander if we can use sample.transaction to store these event
> types, otherwise, we cannot distinguish the event type for the samples.

I'm not familiar with the meaning of transactions in this context, but
I agree giving visibility of these remaining events is a good idea. I'm
just unsure where to place them from the available --itrace options.

Regarding the "all" events, we thought having quick access to a  global
histogram of all the spe events is useful, and from all the --itrace
options it seemed to fit best under --itrace=o.

Thanks,
German

>
> And it's good fill more sample fields for complete info, like:
>
>   sample.addr = record->virt_addr;
>   sample.phys_addr = record->phys_addr;
>   sample.data_src = data_src;
>
> Thanks,
> Leo
>
>> [...]
Leo Yan Nov. 25, 2021, 12:30 p.m. UTC | #4
On Thu, Nov 25, 2021 at 10:21:48AM +0000, James Clark wrote:
> On 25/11/2021 07:53, Leo Yan wrote:

[...]

> >> +static int arm_spe__synth_other_sample(struct arm_spe_queue *speq,
> >> +				       u64 spe_events_id)
> >> +{
> >> +	struct arm_spe *spe = speq->spe;
> >> +	struct arm_spe_record *record = &speq->decoder->record;
> >> +	union perf_event *event = speq->event_buf;
> >> +	struct perf_sample sample = { .ip = 0, };
> >> +
> >> +	arm_spe_prep_sample(spe, speq, event, &sample);
> >> +
> >> +	sample.id = spe_events_id;
> >> +	sample.stream_id = spe_events_id;
> >> +	sample.addr = record->to_ip;
> > 
> > After checked the event types, I think "other" samples would include
> > below raw event types:
> 
> Maybe we should rename some of the functions and variables if there is
> confusion, but I think this new group is "all" rather than "other" because
> it also includes all the events that would be put in other groups.
> 
> > 
> >   EV_EXCEPTION_GEN
> >   EV_RETIRED
> >   EV_NOT_TAKEN
> >   EV_ALIGNMENT
> >   EV_PARTIAL_PREDICATE
> >   EV_EMPTY_PREDICATE
> > 
> > I am just wander if we can use sample.transaction to store these event
> > types, otherwise, we cannot distinguish the event type for the samples.
> 
> If we can use the transaction field to distinguish sample types, I'm
> wondering why we need the separate groups at all. If this new group
> includes all sample types, and they're all labelled, do we need to
> continue with the other groups like "tlb-access" and "branch-miss"?

I admit the samples for "tlb-access" and "branch-miss" might not a
good practice.  At the time when I was upstreaming the Arm SPE patches
(mainly based Hisilicon patches), the main idea for use some events to
output samples, this is why "tlb-access" and "branch-miss" events were
introduced.

But when worked on Arm SPE for enabling "perf mem" and "perf c2c", I
recognized that _consuming_ hardware trace data is much more important
than merely outputting samples.  A better way for _consuming_ the Arm SPE
trace data is to synthesize samples with a prominent type and use an
extra field in sample for the associated attribution.  E.g. we can
synthesize memory samples and uses field "sample.data_src" to
distinguish different memory attributions, thus the events
"tlb-access" and "branch-miss" are not useful.  This approach can be
applied to instruction event and branch event, and both of them use
field "sample.flags" to indicate what's the type of instruction or
branch.

If we follow up this approach, below records can be considered to
synthesize instruction or branch samples:

  EV_EXCEPTION_GEN
  EV_RETIRED
  EV_NOT_TAKEN

Below records can be considered to generate memory samples:

  EV_ALIGNMENT
  EV_PARTIAL_PREDICATE
  EV_EMPTY_PREDICATE

We can consider to extend sample's three fields:
sample::flags for instruction/branch samples
sample::data_srouce for memory samples
sample::transaction for memory transactions (see macros with
prefix PERF_TXN_).

> Or does the perf GUI not allow filtering by transaction type?

To be honest, when introduced the events "tlb-access" and
"branch-miss", I didn't consider transaction type at all.

Thanks,
Leo
Leo Yan Nov. 25, 2021, 12:49 p.m. UTC | #5
On Thu, Nov 25, 2021 at 11:54:16AM +0000, German Gomez wrote:
> Hi Leo,
> 
> On 25/11/2021 07:53, Leo Yan wrote:
> > On Wed, Nov 17, 2021 at 02:28:32PM +0000, German Gomez wrote:
> >> [...]
> > After checked the event types, I think "other" samples would include
> > below raw event types:
> >
> >   EV_EXCEPTION_GEN
> >   EV_RETIRED
> >   EV_NOT_TAKEN
> >   EV_ALIGNMENT
> >   EV_PARTIAL_PREDICATE
> >   EV_EMPTY_PREDICATE
> >
> > I am just wander if we can use sample.transaction to store these event
> > types, otherwise, we cannot distinguish the event type for the samples.
> 
> I'm not familiar with the meaning of transactions in this context, but
> I agree giving visibility of these remaining events is a good idea. I'm
> just unsure where to place them from the available --itrace options.

Please take a look in my another reply, I think it's good to consider
to use event type + sample fields (flags/data_src/transactions) for
better expressing samples.

> Regarding the "all" events, we thought having quick access to a  global
> histogram of all the spe events is useful, and from all the --itrace
> options it seemed to fit best under --itrace=o.

Okay, I agree this is a distinct requirement and we can use this way
to generate for all SPE records.  But the question is if a user reviews
"all" events, how user can easily understand the samples if only bases
on fields sample::addr, e.g. we will have no idea if a sample is a
memory operation or a branch operation, and I think the samples for
memory operations will always set sample.addr as 0.

Thanks,
Leo
diff mbox series

Patch

diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index c52755481..57dc12b83 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -6,7 +6,7 @@ 
 		w	synthesize ptwrite events
 		p	synthesize power events (incl. PSB events for Intel PT)
 		o	synthesize other events recorded due to the use
-			of aux-output (refer to perf record)
+			of aux-output (refer to perf record) (all events for Arm SPE)
 		e	synthesize error events
 		d	create a debug log
 		f	synthesize first level cache events
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index ce77abf90..6428351db 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,7 @@  struct arm_spe {
 	u8				sample_branch;
 	u8				sample_remote_access;
 	u8				sample_memory;
+	u8				sample_other;
 
 	u64				l1d_miss_id;
 	u64				l1d_access_id;
@@ -68,6 +69,7 @@  struct arm_spe {
 	u64				branch_miss_id;
 	u64				remote_access_id;
 	u64				memory_id;
+	u64				all_id;
 
 	u64				kernel_start;
 
@@ -351,6 +353,23 @@  static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 }
 
+static int arm_spe__synth_other_sample(struct arm_spe_queue *speq,
+				       u64 spe_events_id)
+{
+	struct arm_spe *spe = speq->spe;
+	struct arm_spe_record *record = &speq->decoder->record;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+	sample.addr = record->to_ip;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
 			 ARM_SPE_REMOTE_ACCESS)
@@ -480,6 +499,12 @@  static int arm_spe_sample(struct arm_spe_queue *speq)
 			return err;
 	}
 
+	if (spe->sample_other) {
+		err = arm_spe__synth_other_sample(speq, spe->all_id);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1107,6 +1132,17 @@  arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 			return err;
 		spe->memory_id = id;
 		arm_spe_set_event_name(evlist, id, "memory");
+		id += 1;
+	}
+
+	if (spe->synth_opts.other_events) {
+		spe->sample_other = true;
+
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->all_id = id;
+		arm_spe_set_event_name(evlist, id, "all");
 	}
 
 	return 0;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index bbf0d78c6..efe1bdc06 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -74,7 +74,7 @@  enum itrace_period_type {
  * @ptwrites: whether to synthesize events for ptwrites
  * @pwr_events: whether to synthesize power events
  * @other_events: whether to synthesize other events recorded due to the use of
- *                aux_output
+ *                aux_output (all events for Arm SPE)
  * @errors: whether to synthesize decoder error events
  * @dont_decode: whether to skip decoding entirely
  * @log: write a decoding log