diff mbox

perf evsel: Fix to perf-stat malloc corruption on arm64 platforms

Message ID CAFpQJXWjx3uVG-6UNiJzmdc4FFw8kea2A5aJZquxVRXq8BR1VQ@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ganapatrao Kulkarni April 27, 2017, 3:46 p.m. UTC
On Thu, Apr 27, 2017 at 8:04 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Wed, Apr 26, 2017 at 11:49:46PM +0530, Ganapatrao Kulkarni wrote:
>> On Wed, Apr 26, 2017 at 10:42 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> > index 13b5499..638aefa 100644
>> > --- a/tools/perf/builtin-stat.c
>> > +++ b/tools/perf/builtin-stat.c
>> > @@ -346,6 +346,28 @@ static void read_counters(void)
>> >         }
>> >  }
>> >
>> > +/*
>> > + * Close all evnt FDs we open in __run_perf_stat() and
>> > + * create_perf_stat_counter(), taking care to match the number of threads and CPUs.
>> > + *
>> > + * Note that perf_evlist__close(evsel_list) is not equivalent, as it doesn't
>> > + * take the target into account.
>> > + */
>> > +static void close_counters(void)
>> > +{
>> > +       bool per_cpu = target__has_cpu(&target);
>> > +       struct perf_evsel *evsel;
>> > +
>> > +       evlist__for_each_entry(evsel_list, evsel) {
>> > +               if (per_cpu)
>> > +                       perf_evsel__close_per_cpu(evsel,
>> > +                                                 perf_evsel__cpus(evsel));
>> > +               else
>> > +                       perf_evsel__close_per_thread(evsel,
>> > +                                                    evsel_list->threads);
>> > +       }
>> > +}
>> > +
>> >  static void process_interval(void)
>> >  {
>> >         struct timespec ts, rs;
>> > @@ -686,7 +708,7 @@ static int __run_perf_stat(int argc, const char **argv)
>> >          * group leaders.
>> >          */
>> >         read_counters();
>> > -       perf_evlist__close(evsel_list);
>> > +       close_counters();
>> >
>> >         return WEXITSTATUS(status);
>> >  }
>> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> > index ac59710..726ceca 100644
>> > --- a/tools/perf/util/evsel.c
>> > +++ b/tools/perf/util/evsel.c
>> > @@ -1670,6 +1670,18 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>> >         return err;
>> >  }
>> >
>> > +int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
>> > +                            struct cpu_map *cpus)
>> > +{
>> > +       return perf_evsel__open(evsel, cpus, NULL);
>> > +}
>> > +
>> > +int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>> > +                               struct thread_map *threads)
>> > +{
>> > +       return perf_evsel__open(evsel, NULL, threads);
>> > +}
>> > +
>> >  void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
>> >  {
>> >         if (evsel->fd == NULL)
>> > @@ -1679,16 +1691,18 @@ void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
>> >         perf_evsel__free_fd(evsel);
>> >  }
>> >
>> > -int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
>> > -                            struct cpu_map *cpus)
>> > +void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
>> > +                              struct cpu_map *cpus)
>> >  {
>> > -       return perf_evsel__open(evsel, cpus, NULL);
>> > +       int ncpus = cpus ? cpus->nr : 1;
>> > +       perf_evsel__close(evsel, ncpus, 1);
>> >  }
>> >
>> > -int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>> > -                               struct thread_map *threads)
>> > +void perf_evsel__close_per_thread(struct perf_evsel *evsel,
>> > +                                 struct thread_map *threads)
>> >  {
>> > -       return perf_evsel__open(evsel, NULL, threads);
>> > +       int nthreads = threads ? threads->nr : 1;
>> > +       perf_evsel__close(evsel, 1, nthreads);
>> >  }
>> >
>> >  static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
>> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> > index 06ef6f2..02bea43 100644
>> > --- a/tools/perf/util/evsel.h
>> > +++ b/tools/perf/util/evsel.h
>> > @@ -252,6 +252,10 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>> >                                 struct thread_map *threads);
>> >  int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>> >                      struct thread_map *threads);
>> > +void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
>> > +                              struct cpu_map *cpus);
>> > +void perf_evsel__close_per_thread(struct perf_evsel *evsel,
>> > +                                 struct thread_map *threads);
>> >  void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
>> >
>> >  struct perf_sample;
>>
>> this diff looks to me doing same as mine.
>
> Be careful when reading the diff above; the open functions have been
> moved, but have not changed.
>
> I've only changed the close path, whereas your proposal changed the open
> path. Those are not equivalent changes.
>
>> i think below diff should be more appropriate fix to this issue?
>>
>> when open allocates and uses dummy cpus, there is no point in holding
>> old unused one.  instead it should free and link to dummy cpus which
>> is created with 1 CPU.  same will be used by close.
>>
>> i did quick testing on both x86 and arm64.  testing looks ok, may need
>> more testing!
>>
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index ac59710..b1aab0a 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -1466,9 +1466,13 @@ int perf_evsel__open(struct perf_evsel *evsel,
>> struct cpu_map *cpus,
>>                         empty_cpu_map = cpu_map__dummy_new();
>>                         if (empty_cpu_map == NULL)
>>                                 return -ENOMEM;
>> +               } else {
>> +                       cpu_map__get(empty_cpu_map);
>>                 }
>>
>>                 cpus = empty_cpu_map;
>> +               cpu_map__put(evsel->cpus);
>> +               evsel->cpus = cpus;
>>         }
>>
>>         if (threads == NULL) {
>
> Unfortunately, I believe that might break the logic added in commit:
>
>   9f21b815be863218 ("perf evlist: Only open events on CPUs an evsel permits")
>
> ... since the evsel->cpus would now not represent the PMUs CPUs.
>
> As I'd mentioned in my prior reply, I think in order to use the cpu_maps
> consistently we need to do a bigger rework of the way cpu_maps are used,
> in order to separate the PMU CPUs from the requested event CPUs, etc.
> while taking all of these into account.
>
> Could you please give my diff a go?

i tried your diff, and testing looks ok.
below is the cleanly merged diff on top of latest commit
f832460 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc



>
> Thanks,
> Mark.

thanks
Ganapat

Comments

Mark Rutland April 27, 2017, 3:52 p.m. UTC | #1
On Thu, Apr 27, 2017 at 09:16:41PM +0530, Ganapatrao Kulkarni wrote:
> > Could you please give my diff a go?
> 
> i tried your diff, and testing looks ok.

Can I take that as a Tested-by when I post this as a proper patch?

> below is the cleanly merged diff on top of latest commit
> f832460 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Thanks for the rebase.

Mark.

> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 13b5499..4be2980 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -346,6 +346,28 @@ static void read_counters(void)
>   }
>  }
> 
> +/*
> + * Close all evnt FDs we open in __run_perf_stat() and
> + * create_perf_stat_counter(), taking care to match the number of
> threads and CPUs.
> + *
> + * Note that perf_evlist__close(evsel_list) is not equivalent, as it doesn't
> + * take the target into account.
> + */
> +static void close_counters(void)
> +{
> +       bool per_cpu = target__has_cpu(&target);
> +       struct perf_evsel *evsel;
> +
> +       evlist__for_each_entry(evsel_list, evsel) {
> +               if (per_cpu)
> +                       perf_evsel__close_per_cpu(evsel,
> +                                                 perf_evsel__cpus(evsel));
> +               else
> +                       perf_evsel__close_per_thread(evsel,
> +                                                    evsel_list->threads);
> +       }
> +}
> +
>  static void process_interval(void)
>  {
>   struct timespec ts, rs;
> @@ -686,7 +708,7 @@ static int __run_perf_stat(int argc, const char **argv)
>   * group leaders.
>   */
>   read_counters();
> - perf_evlist__close(evsel_list);
> + close_counters();
> 
>   return WEXITSTATUS(status);
>  }
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index ac59710..ecd9778 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1691,6 +1691,20 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>   return perf_evsel__open(evsel, NULL, threads);
>  }
> 
> +void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
> +                              struct cpu_map *cpus)
> + {
> +       int ncpus = cpus ? cpus->nr : 1;
> +       perf_evsel__close(evsel, ncpus, 1);
> + }
> +
> +void perf_evsel__close_per_thread(struct perf_evsel *evsel,
> +                                 struct thread_map *threads)
> + {
> +       int nthreads = threads ? threads->nr : 1;
> +       perf_evsel__close(evsel, 1, nthreads);
> + }
> +
>  static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
>         const union perf_event *event,
>         struct perf_sample *sample)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 06ef6f2..6779bd2 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -250,6 +250,10 @@ int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
>       struct cpu_map *cpus);
>  int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>   struct thread_map *threads);
> +void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
> +                              struct cpu_map *cpus);
> +void perf_evsel__close_per_thread(struct perf_evsel *evsel,
> +                                 struct thread_map *threads);
>  int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>       struct thread_map *threads);
>  void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
Ganapatrao Kulkarni April 27, 2017, 5:24 p.m. UTC | #2
On Thu, Apr 27, 2017 at 9:22 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Apr 27, 2017 at 09:16:41PM +0530, Ganapatrao Kulkarni wrote:
>> > Could you please give my diff a go?
>>
>> i tried your diff, and testing looks ok.
>
> Can I take that as a Tested-by when I post this as a proper patch?

sure.

>
>> below is the cleanly merged diff on top of latest commit
>> f832460 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
>
> Thanks for the rebase.
>
> Mark.
>
>> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
>> index 13b5499..4be2980 100644
>> --- a/tools/perf/builtin-stat.c
>> +++ b/tools/perf/builtin-stat.c
>> @@ -346,6 +346,28 @@ static void read_counters(void)
>>   }
>>  }
>>
>> +/*
>> + * Close all evnt FDs we open in __run_perf_stat() and
>> + * create_perf_stat_counter(), taking care to match the number of
>> threads and CPUs.
>> + *
>> + * Note that perf_evlist__close(evsel_list) is not equivalent, as it doesn't
>> + * take the target into account.
>> + */
>> +static void close_counters(void)
>> +{
>> +       bool per_cpu = target__has_cpu(&target);
>> +       struct perf_evsel *evsel;
>> +
>> +       evlist__for_each_entry(evsel_list, evsel) {
>> +               if (per_cpu)
>> +                       perf_evsel__close_per_cpu(evsel,
>> +                                                 perf_evsel__cpus(evsel));
>> +               else
>> +                       perf_evsel__close_per_thread(evsel,
>> +                                                    evsel_list->threads);
>> +       }
>> +}
>> +
>>  static void process_interval(void)
>>  {
>>   struct timespec ts, rs;
>> @@ -686,7 +708,7 @@ static int __run_perf_stat(int argc, const char **argv)
>>   * group leaders.
>>   */
>>   read_counters();
>> - perf_evlist__close(evsel_list);
>> + close_counters();
>>
>>   return WEXITSTATUS(status);
>>  }
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index ac59710..ecd9778 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -1691,6 +1691,20 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>>   return perf_evsel__open(evsel, NULL, threads);
>>  }
>>
>> +void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
>> +                              struct cpu_map *cpus)
>> + {
>> +       int ncpus = cpus ? cpus->nr : 1;
>> +       perf_evsel__close(evsel, ncpus, 1);
>> + }
>> +
>> +void perf_evsel__close_per_thread(struct perf_evsel *evsel,
>> +                                 struct thread_map *threads)
>> + {
>> +       int nthreads = threads ? threads->nr : 1;
>> +       perf_evsel__close(evsel, 1, nthreads);
>> + }
>> +
>>  static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
>>         const union perf_event *event,
>>         struct perf_sample *sample)
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index 06ef6f2..6779bd2 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -250,6 +250,10 @@ int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
>>       struct cpu_map *cpus);
>>  int perf_evsel__open_per_thread(struct perf_evsel *evsel,
>>   struct thread_map *threads);
>> +void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
>> +                              struct cpu_map *cpus);
>> +void perf_evsel__close_per_thread(struct perf_evsel *evsel,
>> +                                 struct thread_map *threads);
>>  int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>>       struct thread_map *threads);
>>  void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
diff mbox

Patch

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 13b5499..4be2980 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -346,6 +346,28 @@  static void read_counters(void)
  }
 }

+/*
+ * Close all evnt FDs we open in __run_perf_stat() and
+ * create_perf_stat_counter(), taking care to match the number of
threads and CPUs.
+ *
+ * Note that perf_evlist__close(evsel_list) is not equivalent, as it doesn't
+ * take the target into account.
+ */
+static void close_counters(void)
+{
+       bool per_cpu = target__has_cpu(&target);
+       struct perf_evsel *evsel;
+
+       evlist__for_each_entry(evsel_list, evsel) {
+               if (per_cpu)
+                       perf_evsel__close_per_cpu(evsel,
+                                                 perf_evsel__cpus(evsel));
+               else
+                       perf_evsel__close_per_thread(evsel,
+                                                    evsel_list->threads);
+       }
+}
+
 static void process_interval(void)
 {
  struct timespec ts, rs;
@@ -686,7 +708,7 @@  static int __run_perf_stat(int argc, const char **argv)
  * group leaders.
  */
  read_counters();
- perf_evlist__close(evsel_list);
+ close_counters();

  return WEXITSTATUS(status);
 }
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ac59710..ecd9778 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1691,6 +1691,20 @@  int perf_evsel__open_per_thread(struct perf_evsel *evsel,
  return perf_evsel__open(evsel, NULL, threads);
 }

+void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
+                              struct cpu_map *cpus)
+ {
+       int ncpus = cpus ? cpus->nr : 1;
+       perf_evsel__close(evsel, ncpus, 1);
+ }
+
+void perf_evsel__close_per_thread(struct perf_evsel *evsel,
+                                 struct thread_map *threads)
+ {
+       int nthreads = threads ? threads->nr : 1;
+       perf_evsel__close(evsel, 1, nthreads);
+ }
+
 static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
        const union perf_event *event,
        struct perf_sample *sample)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 06ef6f2..6779bd2 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -250,6 +250,10 @@  int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
      struct cpu_map *cpus);
 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
  struct thread_map *threads);
+void perf_evsel__close_per_cpu(struct perf_evsel *evsel,
+                              struct cpu_map *cpus);
+void perf_evsel__close_per_thread(struct perf_evsel *evsel,
+                                 struct thread_map *threads);
 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
      struct thread_map *threads);
 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);