Message ID | 20230913153355.138331-2-james.clark@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | perf: strcmp_cpuid_str() expression fixups | expand |
On Wed, Sep 13, 2023 at 8:34 AM James Clark <james.clark@arm.com> wrote: > > pmu__find_core_pmu() more logically belongs in pmus.c because it > iterates over all PMUs, so move it to pmus.c > > At the same time rename it to perf_pmus__find_core_pmu() to match the > naming convention in this file. > > list_prepare_entry() can't be used in perf_pmus__scan_core() anymore now > that it's called from the same compilation unit. This is with -O2 > (specifically -O1 -ftree-vrp -finline-functions > -finline-small-functions) which allow the bounds of the array > access to be determined at compile time. list_prepare_entry() subtracts > the offset of the 'list' member in struct perf_pmu from &core_pmus, > which isn't a struct perf_pmu. The compiler sees that pmu results in > &core_pmus - 8 and refuses to compile. At runtime this works because > list_for_each_entry_continue() always adds the offset back again before > dereferencing ->next, but it's technically undefined behavior. With > -fsanitize=undefined an additional warning is generated. > > Using list_first_entry_or_null() to get the first entry here avoids > doing &core_pmus - 8 but has the same result and fixes both the compile > warning and the undefined behavior warning. There are other uses of > list_prepare_entry() in pmus.c, but the compiler doesn't seem to be > able to see that they can also be called with &core_pmus, so I won't > change any at this time. > > Signed-off-by: James Clark <james.clark@arm.com> Reviewed-by: Ian Rogers <irogers@google.com> Thanks, Ian > --- > tools/perf/arch/arm64/util/pmu.c | 6 +++--- > tools/perf/tests/expr.c | 2 +- > tools/perf/util/expr.c | 2 +- > tools/perf/util/pmu.c | 17 ----------------- > tools/perf/util/pmu.h | 2 +- > tools/perf/util/pmus.c | 20 +++++++++++++++++++- > 6 files changed, 25 insertions(+), 24 deletions(-) > > diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c > index 615084eb88d8..3d9330feebd2 100644 > --- a/tools/perf/arch/arm64/util/pmu.c > +++ b/tools/perf/arch/arm64/util/pmu.c > @@ -10,7 +10,7 @@ > > const struct pmu_metrics_table *pmu_metrics_table__find(void) > { > - struct perf_pmu *pmu = pmu__find_core_pmu(); > + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); > > if (pmu) > return perf_pmu__find_metrics_table(pmu); > @@ -20,7 +20,7 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void) > > const struct pmu_events_table *pmu_events_table__find(void) > { > - struct perf_pmu *pmu = pmu__find_core_pmu(); > + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); > > if (pmu) > return perf_pmu__find_events_table(pmu); > @@ -32,7 +32,7 @@ double perf_pmu__cpu_slots_per_cycle(void) > { > char path[PATH_MAX]; > unsigned long long slots = 0; > - struct perf_pmu *pmu = pmu__find_core_pmu(); > + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); > > if (pmu) { > perf_pmu__pathname_scnprintf(path, sizeof(path), > diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c > index 78544092ef0c..e3aa9d4fcf3a 100644 > --- a/tools/perf/tests/expr.c > +++ b/tools/perf/tests/expr.c > @@ -76,7 +76,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u > struct expr_parse_ctx *ctx; > bool is_intel = false; > char strcmp_cpuid_buf[256]; > - struct perf_pmu *pmu = pmu__find_core_pmu(); > + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); > char *cpuid = perf_pmu__getcpuid(pmu); > char *escaped_cpuid1, *escaped_cpuid2; > > diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c > index 4488f306de78..7be23b3ac082 100644 > --- a/tools/perf/util/expr.c > +++ b/tools/perf/util/expr.c > @@ -509,7 +509,7 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, > bool compute_ids __maybe_unused, const char *test_id) > { > double ret; > - struct perf_pmu *pmu = pmu__find_core_pmu(); > + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); > char *cpuid = perf_pmu__getcpuid(pmu); > > if (!cpuid) > diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c > index e2159854ab26..f50a5636633f 100644 > --- a/tools/perf/util/pmu.c > +++ b/tools/perf/util/pmu.c > @@ -2050,20 +2050,3 @@ void perf_pmu__delete(struct perf_pmu *pmu) > zfree(&pmu->id); > free(pmu); > } > - > -struct perf_pmu *pmu__find_core_pmu(void) > -{ > - struct perf_pmu *pmu = NULL; > - > - while ((pmu = perf_pmus__scan_core(pmu))) { > - /* > - * The cpumap should cover all CPUs. Otherwise, some CPUs may > - * not support some events or have different event IDs. > - */ > - if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu) > - return NULL; > - > - return pmu; > - } > - return NULL; > -} > diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h > index bd5d804a6736..d7b46085642d 100644 > --- a/tools/perf/util/pmu.h > +++ b/tools/perf/util/pmu.h > @@ -264,6 +264,6 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, > struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name); > struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); > void perf_pmu__delete(struct perf_pmu *pmu); > -struct perf_pmu *pmu__find_core_pmu(void); > +struct perf_pmu *perf_pmus__find_core_pmu(void); > > #endif /* __PMU_H */ > diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c > index 6631367c756f..cec869cbe163 100644 > --- a/tools/perf/util/pmus.c > +++ b/tools/perf/util/pmus.c > @@ -10,6 +10,7 @@ > #include <pthread.h> > #include <string.h> > #include <unistd.h> > +#include "cpumap.h" > #include "debug.h" > #include "evsel.h" > #include "pmus.h" > @@ -268,7 +269,7 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) > { > if (!pmu) { > pmu_read_sysfs(/*core_only=*/true); > - pmu = list_prepare_entry(pmu, &core_pmus, list); > + return list_first_entry_or_null(&core_pmus, typeof(*pmu), list); > } > list_for_each_entry_continue(pmu, &core_pmus, list) > return pmu; > @@ -592,3 +593,20 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) > } > return pmu; > } > + > +struct perf_pmu *perf_pmus__find_core_pmu(void) > +{ > + struct perf_pmu *pmu = NULL; > + > + while ((pmu = perf_pmus__scan_core(pmu))) { > + /* > + * The cpumap should cover all CPUs. Otherwise, some CPUs may > + * not support some events or have different event IDs. > + */ > + if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu) > + return NULL; > + > + return pmu; > + } > + return NULL; > +} > -- > 2.34.1 >
On 13/09/2023 16:33, James Clark wrote: > pmu__find_core_pmu() more logically belongs in pmus.c because it > iterates over all PMUs, so move it to pmus.c > > At the same time rename it to perf_pmus__find_core_pmu() to match the > naming convention in this file. > > list_prepare_entry() can't be used in perf_pmus__scan_core() anymore now > that it's called from the same compilation unit. This is with -O2 > (specifically -O1 -ftree-vrp -finline-functions > -finline-small-functions) which allow the bounds of the array > access to be determined at compile time. list_prepare_entry() subtracts > the offset of the 'list' member in struct perf_pmu from &core_pmus, > which isn't a struct perf_pmu. The compiler sees that pmu results in > &core_pmus - 8 and refuses to compile. At runtime this works because > list_for_each_entry_continue() always adds the offset back again before > dereferencing ->next, but it's technically undefined behavior. With > -fsanitize=undefined an additional warning is generated. > > Using list_first_entry_or_null() to get the first entry here avoids > doing &core_pmus - 8 but has the same result and fixes both the compile > warning and the undefined behavior warning. There are other uses of > list_prepare_entry() in pmus.c, but the compiler doesn't seem to be > able to see that they can also be called with &core_pmus, so I won't > change any at this time. > > Signed-off-by: James Clark<james.clark@arm.com> > --- Reviewed-by: John Garry <john.g.garry@oracle.com>
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 615084eb88d8..3d9330feebd2 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -10,7 +10,7 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void) { - struct perf_pmu *pmu = pmu__find_core_pmu(); + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); if (pmu) return perf_pmu__find_metrics_table(pmu); @@ -20,7 +20,7 @@ const struct pmu_metrics_table *pmu_metrics_table__find(void) const struct pmu_events_table *pmu_events_table__find(void) { - struct perf_pmu *pmu = pmu__find_core_pmu(); + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); if (pmu) return perf_pmu__find_events_table(pmu); @@ -32,7 +32,7 @@ double perf_pmu__cpu_slots_per_cycle(void) { char path[PATH_MAX]; unsigned long long slots = 0; - struct perf_pmu *pmu = pmu__find_core_pmu(); + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); if (pmu) { perf_pmu__pathname_scnprintf(path, sizeof(path), diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 78544092ef0c..e3aa9d4fcf3a 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -76,7 +76,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u struct expr_parse_ctx *ctx; bool is_intel = false; char strcmp_cpuid_buf[256]; - struct perf_pmu *pmu = pmu__find_core_pmu(); + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); char *cpuid = perf_pmu__getcpuid(pmu); char *escaped_cpuid1, *escaped_cpuid2; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 4488f306de78..7be23b3ac082 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -509,7 +509,7 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, const char *test_id) { double ret; - struct perf_pmu *pmu = pmu__find_core_pmu(); + struct perf_pmu *pmu = perf_pmus__find_core_pmu(); char *cpuid = perf_pmu__getcpuid(pmu); if (!cpuid) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e2159854ab26..f50a5636633f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2050,20 +2050,3 @@ void perf_pmu__delete(struct perf_pmu *pmu) zfree(&pmu->id); free(pmu); } - -struct perf_pmu *pmu__find_core_pmu(void) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmus__scan_core(pmu))) { - /* - * The cpumap should cover all CPUs. Otherwise, some CPUs may - * not support some events or have different event IDs. - */ - if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu) - return NULL; - - return pmu; - } - return NULL; -} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index bd5d804a6736..d7b46085642d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -264,6 +264,6 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); -struct perf_pmu *pmu__find_core_pmu(void); +struct perf_pmu *perf_pmus__find_core_pmu(void); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 6631367c756f..cec869cbe163 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -10,6 +10,7 @@ #include <pthread.h> #include <string.h> #include <unistd.h> +#include "cpumap.h" #include "debug.h" #include "evsel.h" #include "pmus.h" @@ -268,7 +269,7 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) { if (!pmu) { pmu_read_sysfs(/*core_only=*/true); - pmu = list_prepare_entry(pmu, &core_pmus, list); + return list_first_entry_or_null(&core_pmus, typeof(*pmu), list); } list_for_each_entry_continue(pmu, &core_pmus, list) return pmu; @@ -592,3 +593,20 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) } return pmu; } + +struct perf_pmu *perf_pmus__find_core_pmu(void) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmus__scan_core(pmu))) { + /* + * The cpumap should cover all CPUs. Otherwise, some CPUs may + * not support some events or have different event IDs. + */ + if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu) + return NULL; + + return pmu; + } + return NULL; +}
pmu__find_core_pmu() more logically belongs in pmus.c because it iterates over all PMUs, so move it to pmus.c At the same time rename it to perf_pmus__find_core_pmu() to match the naming convention in this file. list_prepare_entry() can't be used in perf_pmus__scan_core() anymore now that it's called from the same compilation unit. This is with -O2 (specifically -O1 -ftree-vrp -finline-functions -finline-small-functions) which allow the bounds of the array access to be determined at compile time. list_prepare_entry() subtracts the offset of the 'list' member in struct perf_pmu from &core_pmus, which isn't a struct perf_pmu. The compiler sees that pmu results in &core_pmus - 8 and refuses to compile. At runtime this works because list_for_each_entry_continue() always adds the offset back again before dereferencing ->next, but it's technically undefined behavior. With -fsanitize=undefined an additional warning is generated. Using list_first_entry_or_null() to get the first entry here avoids doing &core_pmus - 8 but has the same result and fixes both the compile warning and the undefined behavior warning. There are other uses of list_prepare_entry() in pmus.c, but the compiler doesn't seem to be able to see that they can also be called with &core_pmus, so I won't change any at this time. Signed-off-by: James Clark <james.clark@arm.com> --- tools/perf/arch/arm64/util/pmu.c | 6 +++--- tools/perf/tests/expr.c | 2 +- tools/perf/util/expr.c | 2 +- tools/perf/util/pmu.c | 17 ----------------- tools/perf/util/pmu.h | 2 +- tools/perf/util/pmus.c | 20 +++++++++++++++++++- 6 files changed, 25 insertions(+), 24 deletions(-)