diff mbox series

[v2,6/7] perf pmu-events: Remember the perf_events_map for a PMU

Message ID 20231012175645.1849503-7-irogers@google.com (mailing list archive)
State New, archived
Headers show
Series PMU performance improvements | expand

Commit Message

Ian Rogers Oct. 12, 2023, 5:56 p.m. UTC
strcmp_cpuid_str performs regular expression comparisons and so per
CPUID linear searches over the perf_events_map are expensive. Add a
helper function called map_for_pmu that does the search but also
caches the map specific to a PMU. As the PMU may differ, also cache
the CPUID string so that PMUs with the same CPUID string don't require
the linear search and regular expression comparisons. This speeds
loading PMUs as the search is done once per PMU to find the
appropriate tables.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/pmu-events/jevents.py | 109 ++++++++++++++++++++-----------
 1 file changed, 70 insertions(+), 39 deletions(-)

Comments

Yang Jihong Oct. 16, 2023, 8:48 a.m. UTC | #1
Hello,

On 2023/10/13 1:56, Ian Rogers wrote:
> strcmp_cpuid_str performs regular expression comparisons and so per
> CPUID linear searches over the perf_events_map are expensive. Add a
> helper function called map_for_pmu that does the search but also
> caches the map specific to a PMU. As the PMU may differ, also cache
> the CPUID string so that PMUs with the same CPUID string don't require
> the linear search and regular expression comparisons. This speeds
> loading PMUs as the search is done once per PMU to find the
> appropriate tables.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>   tools/perf/pmu-events/jevents.py | 109 ++++++++++++++++++++-----------
>   1 file changed, 70 insertions(+), 39 deletions(-)
> 
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index 96dc74c90b20..3c091ab75305 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -976,68 +976,99 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>           return 0;
>   }
>   
> -const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
>   {
> -        const struct pmu_events_table *table = NULL;
> -        char *cpuid = perf_pmu__getcpuid(pmu);
> +        static struct {
> +                const struct pmu_events_map *map;
> +                struct perf_pmu *pmu;
> +        } last_result;
> +        static struct {
> +                const struct pmu_events_map *map;
> +                char *cpuid;
> +        } last_map_search;
> +        static bool has_last_result, has_last_map_search;
> +        const struct pmu_events_map *map = NULL;
> +        char *cpuid = NULL;
>           size_t i;
>   
> -        /* on some platforms which uses cpus map, cpuid can be NULL for
> +        if (has_last_result && last_result.pmu == pmu)
> +                return last_result.map;
> +
Currently, perf_pmu__find_events_table() is invoked only by 
perf_pmu__lookup(). Because the `pmu` is alloc memory each time (see 
perf_pmu__lookup()), the `pmu` is different each time when calling 
perf_pmu__find_events_table(). Therefore, the condition is false.

IIUC, has_last_result is introduced to avoid reading cpuid every time. 
 From the above, this variable does not work. Therefore, can we remove it?

In addition to the above questions, the patch is already being tested:
Tested-by: Yang Jihong <yangjihong1@huawei.com>


Thanks,
Yang
Yang Jihong Oct. 16, 2023, 9:50 a.m. UTC | #2
Hello,

On 2023/10/13 1:56, Ian Rogers wrote:
> strcmp_cpuid_str performs regular expression comparisons and so per
> CPUID linear searches over the perf_events_map are expensive. Add a
> helper function called map_for_pmu that does the search but also
> caches the map specific to a PMU. As the PMU may differ, also cache
> the CPUID string so that PMUs with the same CPUID string don't require
> the linear search and regular expression comparisons. This speeds
> loading PMUs as the search is done once per PMU to find the
> appropriate tables.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>   tools/perf/pmu-events/jevents.py | 109 ++++++++++++++++++++-----------
>   1 file changed, 70 insertions(+), 39 deletions(-)
> 
> diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
> index 96dc74c90b20..3c091ab75305 100755
> --- a/tools/perf/pmu-events/jevents.py
> +++ b/tools/perf/pmu-events/jevents.py
> @@ -976,68 +976,99 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
>           return 0;
>   }
>   
> -const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
> +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
>   {
> -        const struct pmu_events_table *table = NULL;
> -        char *cpuid = perf_pmu__getcpuid(pmu);
> +        static struct {
> +                const struct pmu_events_map *map;
> +                struct perf_pmu *pmu;
> +        } last_result;
> +        static struct {
> +                const struct pmu_events_map *map;
> +                char *cpuid;
> +        } last_map_search;
> +        static bool has_last_result, has_last_map_search;
> +        const struct pmu_events_map *map = NULL;
> +        char *cpuid = NULL;
>           size_t i;
>   
> -        /* on some platforms which uses cpus map, cpuid can be NULL for
> +        if (has_last_result && last_result.pmu == pmu)
> +                return last_result.map;
> +
> +        cpuid = perf_pmu__getcpuid(pmu);
For the software pmu, we do not need to look for the events table.
It seems that the software pmu can be filtered out in perf_pmu__lookup() 
to reduce unnecessary perf_pmu__find_events_table() calls.

I tried to submit a patch, please see if it helps:
https://lore.kernel.org/all/20231016093309.726436-1-yangjihong1@huawei.com/

Thanks,
Yang
diff mbox series

Patch

diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 96dc74c90b20..3c091ab75305 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -976,68 +976,99 @@  int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
         return 0;
 }
 
-const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
+static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
 {
-        const struct pmu_events_table *table = NULL;
-        char *cpuid = perf_pmu__getcpuid(pmu);
+        static struct {
+                const struct pmu_events_map *map;
+                struct perf_pmu *pmu;
+        } last_result;
+        static struct {
+                const struct pmu_events_map *map;
+                char *cpuid;
+        } last_map_search;
+        static bool has_last_result, has_last_map_search;
+        const struct pmu_events_map *map = NULL;
+        char *cpuid = NULL;
         size_t i;
 
-        /* on some platforms which uses cpus map, cpuid can be NULL for
+        if (has_last_result && last_result.pmu == pmu)
+                return last_result.map;
+
+        cpuid = perf_pmu__getcpuid(pmu);
+
+        /*
+         * On some platforms which uses cpus map, cpuid can be NULL for
          * PMUs other than CORE PMUs.
          */
         if (!cpuid)
-                return NULL;
+                goto out_update_last_result;
+
+        if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
+                map = last_map_search.map;
+                free(cpuid);
+        } else {
+                i = 0;
+                for (;;) {
+                        map = &pmu_events_map[i++];
+
+                        if (!map->arch) {
+                                map = NULL;
+                                break;
+                        }
+
+                        if (!strcmp_cpuid_str(map->cpuid, cpuid))
+                                break;
+               }
+               free(last_map_search.cpuid);
+               last_map_search.cpuid = cpuid;
+               last_map_search.map = map;
+               has_last_map_search = true;
+        }
+out_update_last_result:
+        last_result.pmu = pmu;
+        last_result.map = map;
+        has_last_result = true;
+        return map;
+}
 
-        i = 0;
-        for (;;) {
-                const struct pmu_events_map *map = &pmu_events_map[i++];
-                if (!map->arch)
-                        break;
+const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
+{
+        const struct pmu_events_map *map = map_for_pmu(pmu);
 
-                if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
-                        table = &map->event_table;
-                        break;
-                }
-        }
-        free(cpuid);
-        if (!pmu || !table)
-                return table;
+        if (!map)
+                return NULL;
 
-        for (i = 0; i < table->num_pmus; i++) {
-                const struct pmu_table_entry *table_pmu = &table->pmus[i];
+        if (!pmu)
+                return &map->event_table;
+
+        for (size_t i = 0; i < map->event_table.num_pmus; i++) {
+                const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
                 const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
 
                 if (pmu__name_match(pmu, pmu_name))
-                        return table;
+                         return &map->event_table;
         }
         return NULL;
 }
 
 const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
 {
-        const struct pmu_metrics_table *table = NULL;
-        char *cpuid = perf_pmu__getcpuid(pmu);
-        int i;
+        const struct pmu_events_map *map = map_for_pmu(pmu);
 
-        /* on some platforms which uses cpus map, cpuid can be NULL for
-         * PMUs other than CORE PMUs.
-         */
-        if (!cpuid)
+        if (!map)
                 return NULL;
 
-        i = 0;
-        for (;;) {
-                const struct pmu_events_map *map = &pmu_events_map[i++];
-                if (!map->arch)
-                        break;
+        if (!pmu)
+                return &map->metric_table;
 
-                if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
-                        table = &map->metric_table;
-                        break;
-                }
+        for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
+                const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
+                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+
+                if (pmu__name_match(pmu, pmu_name))
+                           return &map->metric_table;
         }
-        free(cpuid);
-        return table;
+        return NULL;
 }
 
 const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)