diff mbox series

[2/5] perf cpumap: More cpu map reuse by merge.

Message ID 20220328062414.1893550-3-irogers@google.com (mailing list archive)
State New, archived
Headers show
Series Make evlist CPUs more accurate | expand

Commit Message

Ian Rogers March 28, 2022, 6:24 a.m. UTC
perf_cpu_map__merge will reuse one of its arguments if they are equal or
the other argument is NULL. The arguments could be reused if it is known
one set of values is a subset of the other. For example, a map of 0-1
and a map of just 0 when merged yields the map of 0-1. Currently a new
map is created rather than adding a reference count to the original 0-1
map.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/lib/perf/cpumap.c | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

Comments

Arnaldo Carvalho de Melo March 28, 2022, 8:26 p.m. UTC | #1
Em Sun, Mar 27, 2022 at 11:24:11PM -0700, Ian Rogers escreveu:
> perf_cpu_map__merge will reuse one of its arguments if they are equal or
> the other argument is NULL. The arguments could be reused if it is known
> one set of values is a subset of the other. For example, a map of 0-1
> and a map of just 0 when merged yields the map of 0-1. Currently a new
> map is created rather than adding a reference count to the original 0-1
> map.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/lib/perf/cpumap.c | 38 ++++++++++++++++++++++++++++----------
>  1 file changed, 28 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> index ee66760f1e63..953bc50b0e41 100644
> --- a/tools/lib/perf/cpumap.c
> +++ b/tools/lib/perf/cpumap.c
> @@ -319,6 +319,29 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
>  	return map->nr > 0 ? map->map[map->nr - 1] : result;
>  }
>  
> +/** Is 'b' a subset of 'a'. */
> +static bool perf_cpu_map__is_subset(const struct perf_cpu_map *a,
> +				    const struct perf_cpu_map *b)
> +{
> +	int i, j;
> +
> +	if (a == b || !b)
> +		return true;
> +	if (!a || b->nr > a->nr)
> +		return false;
> +	j = 0;
> +	for (i = 0; i < a->nr; i++) {

Since the kernel bumped the minimum gcc version to one that supports
declaring loop variables locally and that perf has been using this since
forever:

⬢[acme@toolbox perf]$ grep -r '(int [[:alpha:]] = 0;' tools/perf
tools/perf/util/block-info.c:	for (int i = 0; i < nr_hpps; i++)
tools/perf/util/block-info.c:	for (int i = 0; i < nr_hpps; i++) {
tools/perf/util/block-info.c:	for (int i = 0; i < nr_reps; i++)
tools/perf/util/stream.c:	for (int i = 0; i < nr_evsel; i++)
tools/perf/util/stream.c:	for (int i = 0; i < nr_evsel; i++) {
tools/perf/util/stream.c:	for (int i = 0; i < els->nr_evsel; i++) {
tools/perf/util/stream.c:	for (int i = 0; i < es_pair->nr_streams; i++) {
tools/perf/util/stream.c:	for (int i = 0; i < es_base->nr_streams; i++) {
tools/perf/util/cpumap.c:		for (int j = 0; j < c->nr; j++) {
tools/perf/util/mem-events.c:	for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
tools/perf/util/header.c:	for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) {
tools/perf/builtin-diff.c:	for (int i = 0; i < num; i++)
tools/perf/builtin-diff.c:		for (int i = 0; i < pair->block_info->num; i++) {
tools/perf/builtin-stat.c:	for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
⬢[acme@toolbox perf]$

And this builds on all my test containers, please use:

	for (int i = 0, j = 0; i < a->nr; i++)

In this case to make the source code more compact.

> +		if (a->map[i].cpu > b->map[j].cpu)
> +			return false;
> +		if (a->map[i].cpu == b->map[j].cpu) {
> +			j++;
> +			if (j == b->nr)
> +				return true;

Ok, as its guaranteed that cpu_maps are ordered.

> +		}
> +	}
> +	return false;
> +}
> +
>  /*
>   * Merge two cpumaps
>   *
> @@ -335,17 +358,12 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
>  	int i, j, k;
>  	struct perf_cpu_map *merged;
>  
> -	if (!orig && !other)
> -		return NULL;
> -	if (!orig) {
> -		perf_cpu_map__get(other);
> -		return other;
> -	}
> -	if (!other)
> -		return orig;
> -	if (orig->nr == other->nr &&
> -	    !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
> +	if (perf_cpu_map__is_subset(orig, other))
>  		return orig;

Can't we have first the introduction of perf_cpu_map__is_subset() and
then another patch that gets the refcount, i.e. the four lines below?

> +	if (perf_cpu_map__is_subset(other, orig)) {
> +		perf_cpu_map__put(orig);
> +		return perf_cpu_map__get(other);
> +	}
>  
>  	tmp_len = orig->nr + other->nr;
>  	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
> -- 
> 2.35.1.1021.g381101b075-goog
Ian Rogers March 28, 2022, 8:50 p.m. UTC | #2
On Mon, Mar 28, 2022 at 1:26 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> Em Sun, Mar 27, 2022 at 11:24:11PM -0700, Ian Rogers escreveu:
> > perf_cpu_map__merge will reuse one of its arguments if they are equal or
> > the other argument is NULL. The arguments could be reused if it is known
> > one set of values is a subset of the other. For example, a map of 0-1
> > and a map of just 0 when merged yields the map of 0-1. Currently a new
> > map is created rather than adding a reference count to the original 0-1
> > map.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/lib/perf/cpumap.c | 38 ++++++++++++++++++++++++++++----------
> >  1 file changed, 28 insertions(+), 10 deletions(-)
> >
> > diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> > index ee66760f1e63..953bc50b0e41 100644
> > --- a/tools/lib/perf/cpumap.c
> > +++ b/tools/lib/perf/cpumap.c
> > @@ -319,6 +319,29 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
> >       return map->nr > 0 ? map->map[map->nr - 1] : result;
> >  }
> >
> > +/** Is 'b' a subset of 'a'. */
> > +static bool perf_cpu_map__is_subset(const struct perf_cpu_map *a,
> > +                                 const struct perf_cpu_map *b)
> > +{
> > +     int i, j;
> > +
> > +     if (a == b || !b)
> > +             return true;
> > +     if (!a || b->nr > a->nr)
> > +             return false;
> > +     j = 0;
> > +     for (i = 0; i < a->nr; i++) {
>
> Since the kernel bumped the minimum gcc version to one that supports
> declaring loop variables locally and that perf has been using this since
> forever:
>
> ⬢[acme@toolbox perf]$ grep -r '(int [[:alpha:]] = 0;' tools/perf
> tools/perf/util/block-info.c:   for (int i = 0; i < nr_hpps; i++)
> tools/perf/util/block-info.c:   for (int i = 0; i < nr_hpps; i++) {
> tools/perf/util/block-info.c:   for (int i = 0; i < nr_reps; i++)
> tools/perf/util/stream.c:       for (int i = 0; i < nr_evsel; i++)
> tools/perf/util/stream.c:       for (int i = 0; i < nr_evsel; i++) {
> tools/perf/util/stream.c:       for (int i = 0; i < els->nr_evsel; i++) {
> tools/perf/util/stream.c:       for (int i = 0; i < es_pair->nr_streams; i++) {
> tools/perf/util/stream.c:       for (int i = 0; i < es_base->nr_streams; i++) {
> tools/perf/util/cpumap.c:               for (int j = 0; j < c->nr; j++) {
> tools/perf/util/mem-events.c:   for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
> tools/perf/util/header.c:       for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) {
> tools/perf/builtin-diff.c:      for (int i = 0; i < num; i++)
> tools/perf/builtin-diff.c:              for (int i = 0; i < pair->block_info->num; i++) {
> tools/perf/builtin-stat.c:      for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
> ⬢[acme@toolbox perf]$
>
> And this builds on all my test containers, please use:
>
>         for (int i = 0, j = 0; i < a->nr; i++)
>
> In this case to make the source code more compact.

Ack. We still need to declare 'j' and it is a bit weird to declare j
before i. Fwiw, Making.config has the CORE_CFLAGS set to gnu99, but
declaring in the loop is clearly valid in c99.

> > +             if (a->map[i].cpu > b->map[j].cpu)
> > +                     return false;
> > +             if (a->map[i].cpu == b->map[j].cpu) {
> > +                     j++;
> > +                     if (j == b->nr)
> > +                             return true;
>
> Ok, as its guaranteed that cpu_maps are ordered.
>
> > +             }
> > +     }
> > +     return false;
> > +}
> > +
> >  /*
> >   * Merge two cpumaps
> >   *
> > @@ -335,17 +358,12 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
> >       int i, j, k;
> >       struct perf_cpu_map *merged;
> >
> > -     if (!orig && !other)
> > -             return NULL;
> > -     if (!orig) {
> > -             perf_cpu_map__get(other);
> > -             return other;
> > -     }
> > -     if (!other)
> > -             return orig;
> > -     if (orig->nr == other->nr &&
> > -         !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
> > +     if (perf_cpu_map__is_subset(orig, other))
> >               return orig;
>
> Can't we have first the introduction of perf_cpu_map__is_subset() and
> then another patch that gets the refcount, i.e. the four lines below?

I believe that will fail as it'd be an unused static function warning
and werror.

Thanks,
Ian

> > +     if (perf_cpu_map__is_subset(other, orig)) {
> > +             perf_cpu_map__put(orig);
> > +             return perf_cpu_map__get(other);
> > +     }
> >
> >       tmp_len = orig->nr + other->nr;
> >       tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
> > --
> > 2.35.1.1021.g381101b075-goog
>
> --
>
> - Arnaldo
Arnaldo Carvalho de Melo March 28, 2022, 8:56 p.m. UTC | #3
On March 28, 2022 5:50:21 PM GMT-03:00, Ian Rogers <irogers@google.com> wrote:
>On Mon, Mar 28, 2022 at 1:26 PM Arnaldo Carvalho de Melo
><acme@kernel.org> wrote:
>>
>> Em Sun, Mar 27, 2022 at 11:24:11PM -0700, Ian Rogers escreveu:
>> > perf_cpu_map__merge will reuse one of its arguments if they are equal or
>> > the other argument is NULL. The arguments could be reused if it is known
>> > one set of values is a subset of the other. For example, a map of 0-1
>> > and a map of just 0 when merged yields the map of 0-1. Currently a new
>> > map is created rather than adding a reference count to the original 0-1
>> > map.
>> >
>> > Signed-off-by: Ian Rogers <irogers@google.com>
>> > ---
>> >  tools/lib/perf/cpumap.c | 38 ++++++++++++++++++++++++++++----------
>> >  1 file changed, 28 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
>> > index ee66760f1e63..953bc50b0e41 100644
>> > --- a/tools/lib/perf/cpumap.c
>> > +++ b/tools/lib/perf/cpumap.c
>> > @@ -319,6 +319,29 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
>> >       return map->nr > 0 ? map->map[map->nr - 1] : result;
>> >  }
>> >
>> > +/** Is 'b' a subset of 'a'. */
>> > +static bool perf_cpu_map__is_subset(const struct perf_cpu_map *a,
>> > +                                 const struct perf_cpu_map *b)
>> > +{
>> > +     int i, j;
>> > +
>> > +     if (a == b || !b)
>> > +             return true;
>> > +     if (!a || b->nr > a->nr)
>> > +             return false;
>> > +     j = 0;
>> > +     for (i = 0; i < a->nr; i++) {
>>
>> Since the kernel bumped the minimum gcc version to one that supports
>> declaring loop variables locally and that perf has been using this since
>> forever:
>>
>> ⬢[acme@toolbox perf]$ grep -r '(int [[:alpha:]] = 0;' tools/perf
>> tools/perf/util/block-info.c:   for (int i = 0; i < nr_hpps; i++)
>> tools/perf/util/block-info.c:   for (int i = 0; i < nr_hpps; i++) {
>> tools/perf/util/block-info.c:   for (int i = 0; i < nr_reps; i++)
>> tools/perf/util/stream.c:       for (int i = 0; i < nr_evsel; i++)
>> tools/perf/util/stream.c:       for (int i = 0; i < nr_evsel; i++) {
>> tools/perf/util/stream.c:       for (int i = 0; i < els->nr_evsel; i++) {
>> tools/perf/util/stream.c:       for (int i = 0; i < es_pair->nr_streams; i++) {
>> tools/perf/util/stream.c:       for (int i = 0; i < es_base->nr_streams; i++) {
>> tools/perf/util/cpumap.c:               for (int j = 0; j < c->nr; j++) {
>> tools/perf/util/mem-events.c:   for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
>> tools/perf/util/header.c:       for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) {
>> tools/perf/builtin-diff.c:      for (int i = 0; i < num; i++)
>> tools/perf/builtin-diff.c:              for (int i = 0; i < pair->block_info->num; i++) {
>> tools/perf/builtin-stat.c:      for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
>> ⬢[acme@toolbox perf]$
>>
>> And this builds on all my test containers, please use:
>>
>>         for (int i = 0, j = 0; i < a->nr; i++)
>>
>> In this case to make the source code more compact.
>
>Ack. We still need to declare 'j' and it is a bit weird to declare j
>before i. Fwiw, Making.config has the CORE_CFLAGS set to gnu99, but
>declaring in the loop is clearly valid in c99.
>
>> > +             if (a->map[i].cpu > b->map[j].cpu)
>> > +                     return false;
>> > +             if (a->map[i].cpu == b->map[j].cpu) {
>> > +                     j++;
>> > +                     if (j == b->nr)
>> > +                             return true;
>>
>> Ok, as its guaranteed that cpu_maps are ordered.
>>
>> > +             }
>> > +     }
>> > +     return false;
>> > +}
>> > +
>> >  /*
>> >   * Merge two cpumaps
>> >   *
>> > @@ -335,17 +358,12 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
>> >       int i, j, k;
>> >       struct perf_cpu_map *merged;
>> >
>> > -     if (!orig && !other)
>> > -             return NULL;
>> > -     if (!orig) {
>> > -             perf_cpu_map__get(other);
>> > -             return other;
>> > -     }
>> > -     if (!other)
>> > -             return orig;
>> > -     if (orig->nr == other->nr &&
>> > -         !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
>> > +     if (perf_cpu_map__is_subset(orig, other))
>> >               return orig;
>>
>> Can't we have first the introduction of perf_cpu_map__is_subset() and
>> then another patch that gets the refcount, i.e. the four lines below?
>
>I believe that will fail as it'd be an unused static function warning
>and werror.

I thought that it seemed useful enough not to be a static, even if we don't at first export it, i.e. keep it as internal to libperf

>
>Thanks,
>Ian
>
>> > +     if (perf_cpu_map__is_subset(other, orig)) {
>> > +             perf_cpu_map__put(orig);
>> > +             return perf_cpu_map__get(other);
>> > +     }
>> >
>> >       tmp_len = orig->nr + other->nr;
>> >       tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
>> > --
>> > 2.35.1.1021.g381101b075-goog
>>
>> --
>>
>> - Arnaldo
diff mbox series

Patch

diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index ee66760f1e63..953bc50b0e41 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -319,6 +319,29 @@  struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
 	return map->nr > 0 ? map->map[map->nr - 1] : result;
 }
 
+/** Is 'b' a subset of 'a'. */
+static bool perf_cpu_map__is_subset(const struct perf_cpu_map *a,
+				    const struct perf_cpu_map *b)
+{
+	int i, j;
+
+	if (a == b || !b)
+		return true;
+	if (!a || b->nr > a->nr)
+		return false;
+	j = 0;
+	for (i = 0; i < a->nr; i++) {
+		if (a->map[i].cpu > b->map[j].cpu)
+			return false;
+		if (a->map[i].cpu == b->map[j].cpu) {
+			j++;
+			if (j == b->nr)
+				return true;
+		}
+	}
+	return false;
+}
+
 /*
  * Merge two cpumaps
  *
@@ -335,17 +358,12 @@  struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
 	int i, j, k;
 	struct perf_cpu_map *merged;
 
-	if (!orig && !other)
-		return NULL;
-	if (!orig) {
-		perf_cpu_map__get(other);
-		return other;
-	}
-	if (!other)
-		return orig;
-	if (orig->nr == other->nr &&
-	    !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
+	if (perf_cpu_map__is_subset(orig, other))
 		return orig;
+	if (perf_cpu_map__is_subset(other, orig)) {
+		perf_cpu_map__put(orig);
+		return perf_cpu_map__get(other);
+	}
 
 	tmp_len = orig->nr + other->nr;
 	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));