diff mbox series

[v2,3/5] perf arm-spe: Save per CPU information in metadata

Message ID 20240914215458.751802-4-leo.yan@arm.com (mailing list archive)
State New, archived
Headers show
Series perf arm-spe: Introduce metadata version 2 | expand

Commit Message

Leo Yan Sept. 14, 2024, 9:54 p.m. UTC
Save the Arm SPE information on a per-CPU basis. This approach is easier
in the decoding phase for retrieving metadata based on the CPU number of
every Arm SPE record.

Signed-off-by: Leo Yan <leo.yan@arm.com>
---
 tools/perf/arch/arm64/util/arm-spe.c | 71 +++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

Comments

Namhyung Kim Sept. 27, 2024, 6:21 a.m. UTC | #1
On Sat, Sep 14, 2024 at 10:54:56PM +0100, Leo Yan wrote:
> Save the Arm SPE information on a per-CPU basis. This approach is easier
> in the decoding phase for retrieving metadata based on the CPU number of
> every Arm SPE record.
> 
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
>  tools/perf/arch/arm64/util/arm-spe.c | 71 +++++++++++++++++++++++++++-
>  1 file changed, 70 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> index 15478989ef30..2790a37709a5 100644
> --- a/tools/perf/arch/arm64/util/arm-spe.c
> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> @@ -26,6 +26,8 @@
>  #include "../../../util/arm-spe.h"
>  #include <tools/libc_compat.h> // reallocarray
>  
> +#define ARM_SPE_CPU_MAGIC		0x1010101010101010ULL
> +
>  #define KiB(x) ((x) * 1024)
>  #define MiB(x) ((x) * 1024 * 1024)
>  
> @@ -73,14 +75,66 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
>  	return size;
>  }
>  
> +static int arm_spe_save_cpu_header(struct auxtrace_record *itr,
> +				   struct perf_cpu cpu, __u64 data[])
> +{
> +	struct arm_spe_recording *sper =
> +			container_of(itr, struct arm_spe_recording, itr);
> +	struct perf_pmu *pmu = NULL;
> +	struct perf_pmu tmp_pmu;
> +	char cpu_id_str[16];
> +	char *cpuid = NULL;
> +	u64 val;
> +
> +	snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu);
> +	tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str);
> +	if (!tmp_pmu.cpus)
> +		return -ENOMEM;
> +
> +	/* Read CPU MIDR */
> +	cpuid = perf_pmu__getcpuid(&tmp_pmu);
> +	if (!cpuid)
> +		return -ENOMEM;

You'd better call perf_cpu_map__put() before return.


> +	val = strtol(cpuid, NULL, 16);
> +	perf_cpu_map__put(tmp_pmu.cpus);
> +
> +	data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC;
> +	data[ARM_SPE_CPU] = cpu.cpu;
> +	data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR;
> +	data[ARM_SPE_CPU_MIDR] = val;
> +
> +	/* Find the associate Arm SPE PMU for the CPU */
> +	if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu))
> +		pmu = sper->arm_spe_pmu;
> +
> +	if (!pmu) {
> +		/* No Arm SPE PMU is found */
> +		data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX;
> +		data[ARM_SPE_CAP_MIN_IVAL] = 0;
> +	} else {
> +		data[ARM_SPE_CPU_PMU_TYPE] = pmu->type;
> +
> +		if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1)
> +			val = 0;
> +		data[ARM_SPE_CAP_MIN_IVAL] = val;
> +	}
> +
> +	return ARM_SPE_CPU_PRIV_MAX;
> +}
> +
>  static int arm_spe_info_fill(struct auxtrace_record *itr,
>  			     struct perf_session *session,
>  			     struct perf_record_auxtrace_info *auxtrace_info,
>  			     size_t priv_size)
>  {
> +	int i, ret;
> +	size_t offset;
>  	struct arm_spe_recording *sper =
>  			container_of(itr, struct arm_spe_recording, itr);
>  	struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
> +	struct perf_cpu_map *cpu_map = arm_spe_find_cpus(session->evlist);

Maybe you can move this to later in the function to make the error
handling easier.  Otherwise it should call perf_cpu_map__put().


> +	struct perf_cpu cpu;
> +	__u64 *data;
>  
>  	if (priv_size != arm_spe_info_priv_size(itr, session->evlist))
>  		return -EINVAL;
> @@ -89,8 +143,23 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
>  		return -EINVAL;
>  
>  	auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
> -	auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
> +	auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION;
> +	auxtrace_info->priv[ARM_SPE_HEADER_SIZE] =
> +		ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION;
> +	auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE] = arm_spe_pmu->type;
> +	auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map);
> +
> +	offset = ARM_SPE_AUXTRACE_PRIV_MAX;
> +	perf_cpu_map__for_each_cpu(cpu, i, cpu_map) {
> +		assert(offset < priv_size);
> +		data = &auxtrace_info->priv[offset];
> +		ret = arm_spe_save_cpu_header(itr, cpu, data);
> +		if (ret < 0)
> +			return ret;

Please break the loop and release the cpu map.

Thanks,
Namhyung


> +		offset += ret;
> +	}
>  
> +	perf_cpu_map__put(cpu_map);
>  	return 0;
>  }
>  
> -- 
> 2.34.1
>
Leo Yan Sept. 27, 2024, 8:16 a.m. UTC | #2
On 9/27/24 07:21, Namhyung Kim wrote:>
> On Sat, Sep 14, 2024 at 10:54:56PM +0100, Leo Yan wrote:
>> Save the Arm SPE information on a per-CPU basis. This approach is easier
>> in the decoding phase for retrieving metadata based on the CPU number of
>> every Arm SPE record.
>>
>> Signed-off-by: Leo Yan <leo.yan@arm.com>
>> ---
>>   tools/perf/arch/arm64/util/arm-spe.c | 71 +++++++++++++++++++++++++++-
>>   1 file changed, 70 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
>> index 15478989ef30..2790a37709a5 100644
>> --- a/tools/perf/arch/arm64/util/arm-spe.c
>> +++ b/tools/perf/arch/arm64/util/arm-spe.c
>> @@ -26,6 +26,8 @@
>>   #include "../../../util/arm-spe.h"
>>   #include <tools/libc_compat.h> // reallocarray
>>
>> +#define ARM_SPE_CPU_MAGIC            0x1010101010101010ULL
>> +
>>   #define KiB(x) ((x) * 1024)
>>   #define MiB(x) ((x) * 1024 * 1024)
>>
>> @@ -73,14 +75,66 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
>>        return size;
>>   }
>>
>> +static int arm_spe_save_cpu_header(struct auxtrace_record *itr,
>> +                                struct perf_cpu cpu, __u64 data[])
>> +{
>> +     struct arm_spe_recording *sper =
>> +                     container_of(itr, struct arm_spe_recording, itr);
>> +     struct perf_pmu *pmu = NULL;
>> +     struct perf_pmu tmp_pmu;
>> +     char cpu_id_str[16];
>> +     char *cpuid = NULL;
>> +     u64 val;
>> +
>> +     snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu);
>> +     tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str);
>> +     if (!tmp_pmu.cpus)
>> +             return -ENOMEM;
>> +
>> +     /* Read CPU MIDR */
>> +     cpuid = perf_pmu__getcpuid(&tmp_pmu);
>> +     if (!cpuid)
>> +             return -ENOMEM;
> 
> You'd better call perf_cpu_map__put() before return.

Will do.

Just for recording, 'cpuid' should be released at the end of function.

>> +     val = strtol(cpuid, NULL, 16);
>> +     perf_cpu_map__put(tmp_pmu.cpus);
>> +
>> +     data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC;
>> +     data[ARM_SPE_CPU] = cpu.cpu;
>> +     data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR;
>> +     data[ARM_SPE_CPU_MIDR] = val;
>> +
>> +     /* Find the associate Arm SPE PMU for the CPU */
>> +     if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu))
>> +             pmu = sper->arm_spe_pmu;
>> +
>> +     if (!pmu) {
>> +             /* No Arm SPE PMU is found */
>> +             data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX;
>> +             data[ARM_SPE_CAP_MIN_IVAL] = 0;
>> +     } else {
>> +             data[ARM_SPE_CPU_PMU_TYPE] = pmu->type;
>> +
>> +             if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1)
>> +                     val = 0;
>> +             data[ARM_SPE_CAP_MIN_IVAL] = val;
>> +     }
>> +
>> +     return ARM_SPE_CPU_PRIV_MAX;
>> +}
>> +
>>   static int arm_spe_info_fill(struct auxtrace_record *itr,
>>                             struct perf_session *session,
>>                             struct perf_record_auxtrace_info *auxtrace_info,
>>                             size_t priv_size)
>>   {
>> +     int i, ret;
>> +     size_t offset;
>>        struct arm_spe_recording *sper =
>>                        container_of(itr, struct arm_spe_recording, itr);
>>        struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
>> +     struct perf_cpu_map *cpu_map = arm_spe_find_cpus(session->evlist);
> 
> Maybe you can move this to later in the function to make the error
> handling easier.  Otherwise it should call perf_cpu_map__put().

Good point. Will do.

>> +     struct perf_cpu cpu;
>> +     __u64 *data;
>>
>>        if (priv_size != arm_spe_info_priv_size(itr, session->evlist))
>>                return -EINVAL;
>> @@ -89,8 +143,23 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
>>                return -EINVAL;
>>
>>        auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
>> -     auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
>> +     auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION;
>> +     auxtrace_info->priv[ARM_SPE_HEADER_SIZE] =
>> +             ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION;
>> +     auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE] = arm_spe_pmu->type;
>> +     auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map);
>> +
>> +     offset = ARM_SPE_AUXTRACE_PRIV_MAX;
>> +     perf_cpu_map__for_each_cpu(cpu, i, cpu_map) {
>> +             assert(offset < priv_size);
>> +             data = &auxtrace_info->priv[offset];
>> +             ret = arm_spe_save_cpu_header(itr, cpu, data);
>> +             if (ret < 0)
>> +                     return ret;
> 
> Please break the loop and release the cpu map.

Will do.

Thanks for good catchings!

Leo
diff mbox series

Patch

diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 15478989ef30..2790a37709a5 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -26,6 +26,8 @@ 
 #include "../../../util/arm-spe.h"
 #include <tools/libc_compat.h> // reallocarray
 
+#define ARM_SPE_CPU_MAGIC		0x1010101010101010ULL
+
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
 
@@ -73,14 +75,66 @@  arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 	return size;
 }
 
+static int arm_spe_save_cpu_header(struct auxtrace_record *itr,
+				   struct perf_cpu cpu, __u64 data[])
+{
+	struct arm_spe_recording *sper =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct perf_pmu *pmu = NULL;
+	struct perf_pmu tmp_pmu;
+	char cpu_id_str[16];
+	char *cpuid = NULL;
+	u64 val;
+
+	snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu);
+	tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str);
+	if (!tmp_pmu.cpus)
+		return -ENOMEM;
+
+	/* Read CPU MIDR */
+	cpuid = perf_pmu__getcpuid(&tmp_pmu);
+	if (!cpuid)
+		return -ENOMEM;
+	val = strtol(cpuid, NULL, 16);
+	perf_cpu_map__put(tmp_pmu.cpus);
+
+	data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC;
+	data[ARM_SPE_CPU] = cpu.cpu;
+	data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR;
+	data[ARM_SPE_CPU_MIDR] = val;
+
+	/* Find the associate Arm SPE PMU for the CPU */
+	if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu))
+		pmu = sper->arm_spe_pmu;
+
+	if (!pmu) {
+		/* No Arm SPE PMU is found */
+		data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX;
+		data[ARM_SPE_CAP_MIN_IVAL] = 0;
+	} else {
+		data[ARM_SPE_CPU_PMU_TYPE] = pmu->type;
+
+		if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1)
+			val = 0;
+		data[ARM_SPE_CAP_MIN_IVAL] = val;
+	}
+
+	return ARM_SPE_CPU_PRIV_MAX;
+}
+
 static int arm_spe_info_fill(struct auxtrace_record *itr,
 			     struct perf_session *session,
 			     struct perf_record_auxtrace_info *auxtrace_info,
 			     size_t priv_size)
 {
+	int i, ret;
+	size_t offset;
 	struct arm_spe_recording *sper =
 			container_of(itr, struct arm_spe_recording, itr);
 	struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+	struct perf_cpu_map *cpu_map = arm_spe_find_cpus(session->evlist);
+	struct perf_cpu cpu;
+	__u64 *data;
 
 	if (priv_size != arm_spe_info_priv_size(itr, session->evlist))
 		return -EINVAL;
@@ -89,8 +143,23 @@  static int arm_spe_info_fill(struct auxtrace_record *itr,
 		return -EINVAL;
 
 	auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
-	auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
+	auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION;
+	auxtrace_info->priv[ARM_SPE_HEADER_SIZE] =
+		ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION;
+	auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE] = arm_spe_pmu->type;
+	auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map);
+
+	offset = ARM_SPE_AUXTRACE_PRIV_MAX;
+	perf_cpu_map__for_each_cpu(cpu, i, cpu_map) {
+		assert(offset < priv_size);
+		data = &auxtrace_info->priv[offset];
+		ret = arm_spe_save_cpu_header(itr, cpu, data);
+		if (ret < 0)
+			return ret;
+		offset += ret;
+	}
 
+	perf_cpu_map__put(cpu_map);
 	return 0;
 }