Message ID | 20240914215458.751802-5-leo.yan@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | perf arm-spe: Introduce metadata version 2 | expand |
On Sat, Sep 14, 2024 at 10:54:57PM +0100, Leo Yan wrote: > This commit is to support metadata version 2 and at the meantime it is > backward compatible for version 1's format. > > The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field. > As version 1 is fixed with two u64 fields, by checking the metadata > size, it distinguishes the metadata is version 1 or version 2 (and any > new versions if later will have). For version 2, it reads out CPU number > and retrieves the metadata info for every CPU. > > Signed-off-by: Leo Yan <leo.yan@arm.com> > --- > tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++-- > 1 file changed, 92 insertions(+), 3 deletions(-) > > diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c > index 70989b1bae47..17782cb40fb5 100644 > --- a/tools/perf/util/arm-spe.c > +++ b/tools/perf/util/arm-spe.c > @@ -78,6 +78,10 @@ struct arm_spe { > > unsigned long num_events; > u8 use_ctx_pkt_for_pid; > + > + u64 **metadata; > + u64 metadata_ver; > + u64 metadata_nr_cpu; > }; > > struct arm_spe_queue { > @@ -1016,6 +1020,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, > return 0; > } > > +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size) > +{ > + u64 *metadata; > + > + metadata = zalloc(sizeof(*metadata) * cpu_size); Maybe calloc() is slightly better here, but not a strong opinion. > + if (!metadata) > + return NULL; > + > + memcpy(metadata, buf, cpu_size); I'm not sure if it's correct since you allocated cpu_size * 8 and copies only cpu_size. > + return metadata; > +} > + > +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) > +{ > + int i; > + > + for (i = 0; i < nr_cpu; i++) > + zfree(&metadata[i]); > + free(metadata); > +} > + > +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, > + u64 *ver, int *nr_cpu) > +{ > + u64 *ptr = (u64 *)info->priv; > + u64 metadata_size; > + u64 **metadata = NULL; > + int hdr_sz, cpu_sz, i; > + > + metadata_size = info->header.size - > + sizeof(struct perf_record_auxtrace_info); > + > + /* Metadata version 1 */ > + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { > + *ver = 1; > + *nr_cpu = 0; > + /* No per CPU metadata */ > + return NULL; > + } > + > + *ver = ptr[ARM_SPE_HEADER_VERSION]; > + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; > + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; > + > + metadata = zalloc(sizeof(*metadata) * (*nr_cpu)); calloc() instead? But probably better defining a struct for metadata. Thanks, Namhyung > + if (!metadata) > + return NULL; > + > + /* Locate the start address of per CPU metadata */ > + ptr += hdr_sz; > + cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); > + > + for (i = 0; i < *nr_cpu; i++) { > + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, cpu_sz); > + if (!metadata[i]) > + goto err_per_cpu_metadata; > + > + ptr += cpu_sz / sizeof(u64); > + } > + > + return metadata; > + > +err_per_cpu_metadata: > + arm_spe__free_metadata(metadata, *nr_cpu); > + return NULL; > +} > + > static void arm_spe_free_queue(void *priv) > { > struct arm_spe_queue *speq = priv; > @@ -1050,6 +1121,7 @@ static void arm_spe_free(struct perf_session *session) > auxtrace_heap__free(&spe->heap); > arm_spe_free_events(session); > session->auxtrace = NULL; > + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); > free(spe); > } > > @@ -1267,15 +1339,24 @@ int arm_spe_process_auxtrace_info(union perf_event *event, > const char *cpuid = perf_env__cpuid(session->evlist->env); > u64 midr = strtol(cpuid, NULL, 16); > struct arm_spe *spe; > - int err; > + u64 **metadata = NULL; > + u64 metadata_ver; > + int nr_cpu, err; > > if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + > min_sz) > return -EINVAL; > > + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, > + &nr_cpu); > + if (!metadata && metadata_ver != 1) { > + pr_err("Failed to parse Arm SPE metadata.\n"); > + return -EINVAL; > + } > + > spe = zalloc(sizeof(struct arm_spe)); > if (!spe) > - return -ENOMEM; > + goto err_free_metadata; > > err = auxtrace_queues__init(&spe->queues); > if (err) > @@ -1284,8 +1365,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, > spe->session = session; > spe->machine = &session->machines.host; /* No kvm support */ > spe->auxtrace_type = auxtrace_info->type; > - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; > + if (metadata_ver == 1) > + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; > + else > + spe->pmu_type = auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE]; > spe->midr = midr; > + spe->metadata = metadata; > + spe->metadata_ver = metadata_ver; > + spe->metadata_nr_cpu = nr_cpu; > > spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); > > @@ -1346,5 +1433,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, > session->auxtrace = NULL; > err_free: > free(spe); > +err_free_metadata: > + arm_spe__free_metadata(metadata, nr_cpu); > return err; > } > -- > 2.34.1 >
On 9/27/24 07:35, Namhyung Kim wrote: > On Sat, Sep 14, 2024 at 10:54:57PM +0100, Leo Yan wrote: >> This commit is to support metadata version 2 and at the meantime it is >> backward compatible for version 1's format. >> >> The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field. >> As version 1 is fixed with two u64 fields, by checking the metadata >> size, it distinguishes the metadata is version 1 or version 2 (and any >> new versions if later will have). For version 2, it reads out CPU number >> and retrieves the metadata info for every CPU. >> >> Signed-off-by: Leo Yan <leo.yan@arm.com> >> --- >> tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 92 insertions(+), 3 deletions(-) >> >> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c >> index 70989b1bae47..17782cb40fb5 100644 >> --- a/tools/perf/util/arm-spe.c >> +++ b/tools/perf/util/arm-spe.c >> @@ -78,6 +78,10 @@ struct arm_spe { >> >> unsigned long num_events; >> u8 use_ctx_pkt_for_pid; >> + >> + u64 **metadata; >> + u64 metadata_ver; >> + u64 metadata_nr_cpu; >> }; >> >> struct arm_spe_queue { >> @@ -1016,6 +1020,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, >> return 0; >> } >> >> +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size) >> +{ >> + u64 *metadata; >> + >> + metadata = zalloc(sizeof(*metadata) * cpu_size); > > Maybe calloc() is slightly better here, but not a strong opinion. Ah, here 'cpu_size' is the size of per CPU's metadata in bytes. It should not multiply sizeof(*metadata), though this will not lead buffer overwrite issue as it allocates a bigger buffer than used. I will rename the 'cpu_size' argument to 'per_cpu_size'. >> + if (!metadata) >> + return NULL; >> + >> + memcpy(metadata, buf, cpu_size); > > I'm not sure if it's correct since you allocated cpu_size * 8 and copies > only cpu_size. As described above, here it is correct as the unit of 'cpu_size' is in bytes. >> + return metadata; >> +} >> + >> +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) >> +{ >> + int i; >> + >> + for (i = 0; i < nr_cpu; i++) >> + zfree(&metadata[i]); >> + free(metadata); >> +} >> + >> +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, >> + u64 *ver, int *nr_cpu) >> +{ >> + u64 *ptr = (u64 *)info->priv; >> + u64 metadata_size; >> + u64 **metadata = NULL; >> + int hdr_sz, cpu_sz, i; >> + >> + metadata_size = info->header.size - >> + sizeof(struct perf_record_auxtrace_info); >> + >> + /* Metadata version 1 */ >> + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { >> + *ver = 1; >> + *nr_cpu = 0; >> + /* No per CPU metadata */ >> + return NULL; >> + } >> + >> + *ver = ptr[ARM_SPE_HEADER_VERSION]; >> + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; >> + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; >> + >> + metadata = zalloc(sizeof(*metadata) * (*nr_cpu)); > > calloc() instead? But probably better defining a struct for metadata. I will change to calloc(). It allocates a pointer array, so I will try to change to 'u64 *metadata[] = NULL;' for clear definition. Thanks, Leo
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 70989b1bae47..17782cb40fb5 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -78,6 +78,10 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; }; struct arm_spe_queue { @@ -1016,6 +1020,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int cpu_size) +{ + u64 *metadata; + + metadata = zalloc(sizeof(*metadata) * cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = zalloc(sizeof(*metadata) * (*nr_cpu)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1050,6 +1121,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1267,15 +1339,24 @@ int arm_spe_process_auxtrace_info(union perf_event *event, const char *cpuid = perf_env__cpuid(session->evlist->env); u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); if (!spe) - return -ENOMEM; + goto err_free_metadata; err = auxtrace_queues__init(&spe->queues); if (err) @@ -1284,8 +1365,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_SHARED_PMU_TYPE]; spe->midr = midr; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1346,5 +1433,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; }
This commit is to support metadata version 2 and at the meantime it is backward compatible for version 1's format. The metadata version 1 doesn't include the ARM_SPE_HEADER_VERSION field. As version 1 is fixed with two u64 fields, by checking the metadata size, it distinguishes the metadata is version 1 or version 2 (and any new versions if later will have). For version 2, it reads out CPU number and retrieves the metadata info for every CPU. Signed-off-by: Leo Yan <leo.yan@arm.com> --- tools/perf/util/arm-spe.c | 95 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-)