diff mbox series

[v2,4/5] perf arm-spe: Support metadata version 2

Message ID 20240914215458.751802-5-leo.yan@arm.com (mailing list archive)
State New, archived
Headers show
Series perf arm-spe: Introduce metadata version 2 | expand

Commit Message

Leo Yan Sept. 14, 2024, 9:54 p.m. UTC
This commit is to support metadata version 2 and at the meantime it is
backward compatible for version 1's format.

The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field.
As version 1 is fixed with two u64 fields, by checking the metadata
size, it distinguishes the metadata is version 1 or version 2 (and any
new versions if later will have). For version 2, it reads out CPU number
and retrieves the metadata info for every CPU.

Signed-off-by: Leo Yan <leo.yan@arm.com>
---
 tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 3 deletions(-)

Comments

Namhyung Kim Sept. 27, 2024, 6:35 a.m. UTC | #1
On Sat, Sep 14, 2024 at 10:54:57PM +0100, Leo Yan wrote:
> This commit is to support metadata version 2 and at the meantime it is
> backward compatible for version 1's format.
> 
> The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field.
> As version 1 is fixed with two u64 fields, by checking the metadata
> size, it distinguishes the metadata is version 1 or version 2 (and any
> new versions if later will have). For version 2, it reads out CPU number
> and retrieves the metadata info for every CPU.
> 
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
>  tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++--
>  1 file changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 70989b1bae47..17782cb40fb5 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -78,6 +78,10 @@ struct arm_spe {
>  
>  	unsigned long			num_events;
>  	u8				use_ctx_pkt_for_pid;
> +
> +	u64				**metadata;
> +	u64				metadata_ver;
> +	u64				metadata_nr_cpu;
>  };
>  
>  struct arm_spe_queue {
> @@ -1016,6 +1020,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused,
>  	return 0;
>  }
>  
> +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size)
> +{
> +	u64 *metadata;
> +
> +	metadata = zalloc(sizeof(*metadata) * cpu_size);

Maybe calloc() is slightly better here, but not a strong opinion.


> +	if (!metadata)
> +		return NULL;
> +
> +	memcpy(metadata, buf, cpu_size);

I'm not sure if it's correct since you allocated cpu_size * 8 and copies
only cpu_size.


> +	return metadata;
> +}
> +
> +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_cpu; i++)
> +		zfree(&metadata[i]);
> +	free(metadata);
> +}
> +
> +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
> +				     u64 *ver, int *nr_cpu)
> +{
> +	u64 *ptr = (u64 *)info->priv;
> +	u64 metadata_size;
> +	u64 **metadata = NULL;
> +	int hdr_sz, cpu_sz, i;
> +
> +	metadata_size = info->header.size -
> +		sizeof(struct perf_record_auxtrace_info);
> +
> +	/* Metadata version 1 */
> +	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
> +		*ver = 1;
> +		*nr_cpu = 0;
> +		/* No per CPU metadata */
> +		return NULL;
> +	}
> +
> +	*ver = ptr[ARM_SPE_HEADER_VERSION];
> +	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
> +	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
> +
> +	metadata = zalloc(sizeof(*metadata) * (*nr_cpu));

calloc() instead?  But probably better defining a struct for metadata.

Thanks,
Namhyung


> +	if (!metadata)
> +		return NULL;
> +
> +	/* Locate the start address of per CPU metadata */
> +	ptr += hdr_sz;
> +	cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
> +
> +	for (i = 0; i < *nr_cpu; i++) {
> +		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, cpu_sz);
> +		if (!metadata[i])
> +			goto err_per_cpu_metadata;
> +
> +		ptr += cpu_sz / sizeof(u64);
> +	}
> +
> +	return metadata;
> +
> +err_per_cpu_metadata:
> +	arm_spe__free_metadata(metadata, *nr_cpu);
> +	return NULL;
> +}
> +
>  static void arm_spe_free_queue(void *priv)
>  {
>  	struct arm_spe_queue *speq = priv;
> @@ -1050,6 +1121,7 @@ static void arm_spe_free(struct perf_session *session)
>  	auxtrace_heap__free(&spe->heap);
>  	arm_spe_free_events(session);
>  	session->auxtrace = NULL;
> +	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
>  	free(spe);
>  }
>  
> @@ -1267,15 +1339,24 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
>  	const char *cpuid = perf_env__cpuid(session->evlist->env);
>  	u64 midr = strtol(cpuid, NULL, 16);
>  	struct arm_spe *spe;
> -	int err;
> +	u64 **metadata = NULL;
> +	u64 metadata_ver;
> +	int nr_cpu, err;
>  
>  	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
>  					min_sz)
>  		return -EINVAL;
>  
> +	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
> +					   &nr_cpu);
> +	if (!metadata && metadata_ver != 1) {
> +		pr_err("Failed to parse Arm SPE metadata.\n");
> +		return -EINVAL;
> +	}
> +
>  	spe = zalloc(sizeof(struct arm_spe));
>  	if (!spe)
> -		return -ENOMEM;
> +		goto err_free_metadata;
>  
>  	err = auxtrace_queues__init(&spe->queues);
>  	if (err)
> @@ -1284,8 +1365,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
>  	spe->session = session;
>  	spe->machine = &session->machines.host; /* No kvm support */
>  	spe->auxtrace_type = auxtrace_info->type;
> -	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
> +	if (metadata_ver == 1)
> +		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
> +	else
> +		spe->pmu_type = auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE];
>  	spe->midr = midr;
> +	spe->metadata = metadata;
> +	spe->metadata_ver = metadata_ver;
> +	spe->metadata_nr_cpu = nr_cpu;
>  
>  	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
>  
> @@ -1346,5 +1433,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
>  	session->auxtrace = NULL;
>  err_free:
>  	free(spe);
> +err_free_metadata:
> +	arm_spe__free_metadata(metadata, nr_cpu);
>  	return err;
>  }
> -- 
> 2.34.1
>
Leo Yan Sept. 27, 2024, 8:32 a.m. UTC | #2
On 9/27/24 07:35, Namhyung Kim wrote:
> On Sat, Sep 14, 2024 at 10:54:57PM +0100, Leo Yan wrote:
>> This commit is to support metadata version 2 and at the meantime it is
>> backward compatible for version 1's format.
>>
>> The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field.
>> As version 1 is fixed with two u64 fields, by checking the metadata
>> size, it distinguishes the metadata is version 1 or version 2 (and any
>> new versions if later will have). For version 2, it reads out CPU number
>> and retrieves the metadata info for every CPU.
>>
>> Signed-off-by: Leo Yan <leo.yan@arm.com>
>> ---
>>   tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 92 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
>> index 70989b1bae47..17782cb40fb5 100644
>> --- a/tools/perf/util/arm-spe.c
>> +++ b/tools/perf/util/arm-spe.c
>> @@ -78,6 +78,10 @@ struct arm_spe {
>>
>>        unsigned long                   num_events;
>>        u8                              use_ctx_pkt_for_pid;
>> +
>> +     u64                             **metadata;
>> +     u64                             metadata_ver;
>> +     u64                             metadata_nr_cpu;
>>   };
>>
>>   struct arm_spe_queue {
>> @@ -1016,6 +1020,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused,
>>        return 0;
>>   }
>>
>> +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size)
>> +{
>> +     u64 *metadata;
>> +
>> +     metadata = zalloc(sizeof(*metadata) * cpu_size);
> 
> Maybe calloc() is slightly better here, but not a strong opinion.

Ah, here 'cpu_size' is the size of per CPU's metadata in bytes. It
should not multiply sizeof(*metadata), though this will not lead buffer
overwrite issue as it allocates a bigger buffer than used.

I will rename the 'cpu_size' argument to 'per_cpu_size'.

>> +     if (!metadata)
>> +             return NULL;
>> +
>> +     memcpy(metadata, buf, cpu_size);
> 
> I'm not sure if it's correct since you allocated cpu_size * 8 and copies
> only cpu_size.

As described above, here it is correct as the unit of 'cpu_size' is in
bytes.

>> +     return metadata;
>> +}
>> +
>> +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
>> +{
>> +     int i;
>> +
>> +     for (i = 0; i < nr_cpu; i++)
>> +             zfree(&metadata[i]);
>> +     free(metadata);
>> +}
>> +
>> +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
>> +                                  u64 *ver, int *nr_cpu)
>> +{
>> +     u64 *ptr = (u64 *)info->priv;
>> +     u64 metadata_size;
>> +     u64 **metadata = NULL;
>> +     int hdr_sz, cpu_sz, i;
>> +
>> +     metadata_size = info->header.size -
>> +             sizeof(struct perf_record_auxtrace_info);
>> +
>> +     /* Metadata version 1 */
>> +     if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
>> +             *ver = 1;
>> +             *nr_cpu = 0;
>> +             /* No per CPU metadata */
>> +             return NULL;
>> +     }
>> +
>> +     *ver = ptr[ARM_SPE_HEADER_VERSION];
>> +     hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
>> +     *nr_cpu = ptr[ARM_SPE_CPUS_NUM];
>> +
>> +     metadata = zalloc(sizeof(*metadata) * (*nr_cpu));
> 
> calloc() instead?  But probably better defining a struct for metadata.

I will change to calloc(). It allocates a pointer array, so I will
try to change to 'u64 *metadata[] = NULL;' for clear definition.

Thanks,
Leo
diff mbox series

Patch

diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 70989b1bae47..17782cb40fb5 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -78,6 +78,10 @@  struct arm_spe {
 
 	unsigned long			num_events;
 	u8				use_ctx_pkt_for_pid;
+
+	u64				**metadata;
+	u64				metadata_ver;
+	u64				metadata_nr_cpu;
 };
 
 struct arm_spe_queue {
@@ -1016,6 +1020,73 @@  static int arm_spe_flush(struct perf_session *session __maybe_unused,
 	return 0;
 }
 
+static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size)
+{
+	u64 *metadata;
+
+	metadata = zalloc(sizeof(*metadata) * cpu_size);
+	if (!metadata)
+		return NULL;
+
+	memcpy(metadata, buf, cpu_size);
+	return metadata;
+}
+
+static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
+{
+	int i;
+
+	for (i = 0; i < nr_cpu; i++)
+		zfree(&metadata[i]);
+	free(metadata);
+}
+
+static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
+				     u64 *ver, int *nr_cpu)
+{
+	u64 *ptr = (u64 *)info->priv;
+	u64 metadata_size;
+	u64 **metadata = NULL;
+	int hdr_sz, cpu_sz, i;
+
+	metadata_size = info->header.size -
+		sizeof(struct perf_record_auxtrace_info);
+
+	/* Metadata version 1 */
+	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
+		*ver = 1;
+		*nr_cpu = 0;
+		/* No per CPU metadata */
+		return NULL;
+	}
+
+	*ver = ptr[ARM_SPE_HEADER_VERSION];
+	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
+	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
+
+	metadata = zalloc(sizeof(*metadata) * (*nr_cpu));
+	if (!metadata)
+		return NULL;
+
+	/* Locate the start address of per CPU metadata */
+	ptr += hdr_sz;
+	cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
+
+	for (i = 0; i < *nr_cpu; i++) {
+		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, cpu_sz);
+		if (!metadata[i])
+			goto err_per_cpu_metadata;
+
+		ptr += cpu_sz / sizeof(u64);
+	}
+
+	return metadata;
+
+err_per_cpu_metadata:
+	arm_spe__free_metadata(metadata, *nr_cpu);
+	return NULL;
+}
+
 static void arm_spe_free_queue(void *priv)
 {
 	struct arm_spe_queue *speq = priv;
@@ -1050,6 +1121,7 @@  static void arm_spe_free(struct perf_session *session)
 	auxtrace_heap__free(&spe->heap);
 	arm_spe_free_events(session);
 	session->auxtrace = NULL;
+	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
 	free(spe);
 }
 
@@ -1267,15 +1339,24 @@  int arm_spe_process_auxtrace_info(union perf_event *event,
 	const char *cpuid = perf_env__cpuid(session->evlist->env);
 	u64 midr = strtol(cpuid, NULL, 16);
 	struct arm_spe *spe;
-	int err;
+	u64 **metadata = NULL;
+	u64 metadata_ver;
+	int nr_cpu, err;
 
 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
 					min_sz)
 		return -EINVAL;
 
+	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
+					   &nr_cpu);
+	if (!metadata && metadata_ver != 1) {
+		pr_err("Failed to parse Arm SPE metadata.\n");
+		return -EINVAL;
+	}
+
 	spe = zalloc(sizeof(struct arm_spe));
 	if (!spe)
-		return -ENOMEM;
+		goto err_free_metadata;
 
 	err = auxtrace_queues__init(&spe->queues);
 	if (err)
@@ -1284,8 +1365,14 @@  int arm_spe_process_auxtrace_info(union perf_event *event,
 	spe->session = session;
 	spe->machine = &session->machines.host; /* No kvm support */
 	spe->auxtrace_type = auxtrace_info->type;
-	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+	if (metadata_ver == 1)
+		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+	else
+		spe->pmu_type = auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE];
 	spe->midr = midr;
+	spe->metadata = metadata;
+	spe->metadata_ver = metadata_ver;
+	spe->metadata_nr_cpu = nr_cpu;
 
 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
 
@@ -1346,5 +1433,7 @@  int arm_spe_process_auxtrace_info(union perf_event *event,
 	session->auxtrace = NULL;
 err_free:
 	free(spe);
+err_free_metadata:
+	arm_spe__free_metadata(metadata, nr_cpu);
 	return err;
 }