Message ID | 20240914215458.751802-4-leo.yan@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | perf arm-spe: Introduce metadata version 2 | expand |
On Sat, Sep 14, 2024 at 10:54:56PM +0100, Leo Yan wrote: > Save the Arm SPE information on a per-CPU basis. This approach is easier > in the decoding phase for retrieving metadata based on the CPU number of > every Arm SPE record. > > Signed-off-by: Leo Yan <leo.yan@arm.com> > --- > tools/perf/arch/arm64/util/arm-spe.c | 71 +++++++++++++++++++++++++++- > 1 file changed, 70 insertions(+), 1 deletion(-) > > diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c > index 15478989ef30..2790a37709a5 100644 > --- a/tools/perf/arch/arm64/util/arm-spe.c > +++ b/tools/perf/arch/arm64/util/arm-spe.c > @@ -26,6 +26,8 @@ > #include "../../../util/arm-spe.h" > #include <tools/libc_compat.h> // reallocarray > > +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL > + > #define KiB(x) ((x) * 1024) > #define MiB(x) ((x) * 1024 * 1024) > > @@ -73,14 +75,66 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, > return size; > } > > +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, > + struct perf_cpu cpu, __u64 data[]) > +{ > + struct arm_spe_recording *sper = > + container_of(itr, struct arm_spe_recording, itr); > + struct perf_pmu *pmu = NULL; > + struct perf_pmu tmp_pmu; > + char cpu_id_str[16]; > + char *cpuid = NULL; > + u64 val; > + > + snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu); > + tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str); > + if (!tmp_pmu.cpus) > + return -ENOMEM; > + > + /* Read CPU MIDR */ > + cpuid = perf_pmu__getcpuid(&tmp_pmu); > + if (!cpuid) > + return -ENOMEM; You'd better call perf_cpu_map__put() before return. > + val = strtol(cpuid, NULL, 16); > + perf_cpu_map__put(tmp_pmu.cpus); > + > + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; > + data[ARM_SPE_CPU] = cpu.cpu; > + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; > + data[ARM_SPE_CPU_MIDR] = val; > + > + /* Find the associate Arm SPE PMU for the CPU */ > + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) > + pmu = sper->arm_spe_pmu; > + > + if (!pmu) { > + /* No Arm SPE PMU is found */ > + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; > + data[ARM_SPE_CAP_MIN_IVAL] = 0; > + } else { > + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; > + > + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) > + val = 0; > + data[ARM_SPE_CAP_MIN_IVAL] = val; > + } > + > + return ARM_SPE_CPU_PRIV_MAX; > +} > + > static int arm_spe_info_fill(struct auxtrace_record *itr, > struct perf_session *session, > struct perf_record_auxtrace_info *auxtrace_info, > size_t priv_size) > { > + int i, ret; > + size_t offset; > struct arm_spe_recording *sper = > container_of(itr, struct arm_spe_recording, itr); > struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; > + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(session->evlist); Maybe you can move this to later in the function to make the error handling easier. Otherwise it should call perf_cpu_map__put(). > + struct perf_cpu cpu; > + __u64 *data; > > if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) > return -EINVAL; > @@ -89,8 +143,23 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, > return -EINVAL; > > auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; > - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; > + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; > + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = > + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; > + auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE] = arm_spe_pmu->type; > + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); > + > + offset = ARM_SPE_AUXTRACE_PRIV_MAX; > + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { > + assert(offset < priv_size); > + data = &auxtrace_info->priv[offset]; > + ret = arm_spe_save_cpu_header(itr, cpu, data); > + if (ret < 0) > + return ret; Please break the loop and release the cpu map. Thanks, Namhyung > + offset += ret; > + } > > + perf_cpu_map__put(cpu_map); > return 0; > } > > -- > 2.34.1 >
On 9/27/24 07:21, Namhyung Kim wrote:> > On Sat, Sep 14, 2024 at 10:54:56PM +0100, Leo Yan wrote: >> Save the Arm SPE information on a per-CPU basis. This approach is easier >> in the decoding phase for retrieving metadata based on the CPU number of >> every Arm SPE record. >> >> Signed-off-by: Leo Yan <leo.yan@arm.com> >> --- >> tools/perf/arch/arm64/util/arm-spe.c | 71 +++++++++++++++++++++++++++- >> 1 file changed, 70 insertions(+), 1 deletion(-) >> >> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c >> index 15478989ef30..2790a37709a5 100644 >> --- a/tools/perf/arch/arm64/util/arm-spe.c >> +++ b/tools/perf/arch/arm64/util/arm-spe.c >> @@ -26,6 +26,8 @@ >> #include "../../../util/arm-spe.h" >> #include <tools/libc_compat.h> // reallocarray >> >> +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL >> + >> #define KiB(x) ((x) * 1024) >> #define MiB(x) ((x) * 1024 * 1024) >> >> @@ -73,14 +75,66 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, >> return size; >> } >> >> +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, >> + struct perf_cpu cpu, __u64 data[]) >> +{ >> + struct arm_spe_recording *sper = >> + container_of(itr, struct arm_spe_recording, itr); >> + struct perf_pmu *pmu = NULL; >> + struct perf_pmu tmp_pmu; >> + char cpu_id_str[16]; >> + char *cpuid = NULL; >> + u64 val; >> + >> + snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu); >> + tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str); >> + if (!tmp_pmu.cpus) >> + return -ENOMEM; >> + >> + /* Read CPU MIDR */ >> + cpuid = perf_pmu__getcpuid(&tmp_pmu); >> + if (!cpuid) >> + return -ENOMEM; > > You'd better call perf_cpu_map__put() before return. Will do. Just for recording, 'cpuid' should be released at the end of function. >> + val = strtol(cpuid, NULL, 16); >> + perf_cpu_map__put(tmp_pmu.cpus); >> + >> + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; >> + data[ARM_SPE_CPU] = cpu.cpu; >> + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; >> + data[ARM_SPE_CPU_MIDR] = val; >> + >> + /* Find the associate Arm SPE PMU for the CPU */ >> + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) >> + pmu = sper->arm_spe_pmu; >> + >> + if (!pmu) { >> + /* No Arm SPE PMU is found */ >> + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; >> + data[ARM_SPE_CAP_MIN_IVAL] = 0; >> + } else { >> + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; >> + >> + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) >> + val = 0; >> + data[ARM_SPE_CAP_MIN_IVAL] = val; >> + } >> + >> + return ARM_SPE_CPU_PRIV_MAX; >> +} >> + >> static int arm_spe_info_fill(struct auxtrace_record *itr, >> struct perf_session *session, >> struct perf_record_auxtrace_info *auxtrace_info, >> size_t priv_size) >> { >> + int i, ret; >> + size_t offset; >> struct arm_spe_recording *sper = >> container_of(itr, struct arm_spe_recording, itr); >> struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; >> + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(session->evlist); > > Maybe you can move this to later in the function to make the error > handling easier. Otherwise it should call perf_cpu_map__put(). Good point. Will do. >> + struct perf_cpu cpu; >> + __u64 *data; >> >> if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) >> return -EINVAL; >> @@ -89,8 +143,23 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, >> return -EINVAL; >> >> auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; >> - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; >> + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; >> + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = >> + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; >> + auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE] = arm_spe_pmu->type; >> + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); >> + >> + offset = ARM_SPE_AUXTRACE_PRIV_MAX; >> + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { >> + assert(offset < priv_size); >> + data = &auxtrace_info->priv[offset]; >> + ret = arm_spe_save_cpu_header(itr, cpu, data); >> + if (ret < 0) >> + return ret; > > Please break the loop and release the cpu map. Will do. Thanks for good catchings! Leo
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 15478989ef30..2790a37709a5 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -26,6 +26,8 @@ #include "../../../util/arm-spe.h" #include <tools/libc_compat.h> // reallocarray +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -73,14 +75,66 @@ arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, return size; } +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, + struct perf_cpu cpu, __u64 data[]) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *pmu = NULL; + struct perf_pmu tmp_pmu; + char cpu_id_str[16]; + char *cpuid = NULL; + u64 val; + + snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu); + tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str); + if (!tmp_pmu.cpus) + return -ENOMEM; + + /* Read CPU MIDR */ + cpuid = perf_pmu__getcpuid(&tmp_pmu); + if (!cpuid) + return -ENOMEM; + val = strtol(cpuid, NULL, 16); + perf_cpu_map__put(tmp_pmu.cpus); + + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; + data[ARM_SPE_CPU] = cpu.cpu; + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; + data[ARM_SPE_CPU_MIDR] = val; + + /* Find the associate Arm SPE PMU for the CPU */ + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) + pmu = sper->arm_spe_pmu; + + if (!pmu) { + /* No Arm SPE PMU is found */ + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; + data[ARM_SPE_CAP_MIN_IVAL] = 0; + } else { + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; + + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) + val = 0; + data[ARM_SPE_CAP_MIN_IVAL] = val; + } + + return ARM_SPE_CPU_PRIV_MAX; +} + static int arm_spe_info_fill(struct auxtrace_record *itr, struct perf_session *session, struct perf_record_auxtrace_info *auxtrace_info, size_t priv_size) { + int i, ret; + size_t offset; struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(session->evlist); + struct perf_cpu cpu; + __u64 *data; if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) return -EINVAL; @@ -89,8 +143,23 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, return -EINVAL; auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; + auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); + + offset = ARM_SPE_AUXTRACE_PRIV_MAX; + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { + assert(offset < priv_size); + data = &auxtrace_info->priv[offset]; + ret = arm_spe_save_cpu_header(itr, cpu, data); + if (ret < 0) + return ret; + offset += ret; + } + perf_cpu_map__put(cpu_map); return 0; }
Save the Arm SPE information on a per-CPU basis. This approach is easier in the decoding phase for retrieving metadata based on the CPU number of every Arm SPE record. Signed-off-by: Leo Yan <leo.yan@arm.com> --- tools/perf/arch/arm64/util/arm-spe.c | 71 +++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-)