diff mbox series

[v1,2/6] perf trace: Ignore thread hashing in summary

Message ID 20240214063708.972376-3-irogers@google.com (mailing list archive)
State Superseded
Headers show
Series Thread memory improvements and fixes | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Ian Rogers Feb. 14, 2024, 6:37 a.m. UTC
Commit 91e467bc568f ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf trace
--summary output sorts and prints each hash bucket, rather than all
threads globally. Change this behavior by turn all threads into a
list, sort the list by number of trace events then by tids, finally
print the list. This also allows the rbtree in threads to be not
accessed outside of machine.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-trace.c  | 41 +++++++++++++++++++++----------------
 tools/perf/util/rb_resort.h |  5 -----
 2 files changed, 23 insertions(+), 23 deletions(-)

Comments

Arnaldo Carvalho de Melo Feb. 14, 2024, 5:25 p.m. UTC | #1
On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> Commit 91e467bc568f ("perf machine: Use hashtable for machine
> threads") made the iteration of thread tids unordered. The perf trace
> --summary output sorts and prints each hash bucket, rather than all
> threads globally. Change this behavior by turn all threads into a
> list, sort the list by number of trace events then by tids, finally
> print the list. This also allows the rbtree in threads to be not
> accessed outside of machine.

Can you please provide a refresh of the output that is changed by your patch?

- Arnaldo
 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-trace.c  | 41 +++++++++++++++++++++----------------
>  tools/perf/util/rb_resort.h |  5 -----
>  2 files changed, 23 insertions(+), 23 deletions(-)
> 
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 109b8e64fe69..90eaff8c0f6e 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -74,6 +74,7 @@
>  #include <linux/err.h>
>  #include <linux/filter.h>
>  #include <linux/kernel.h>
> +#include <linux/list_sort.h>
>  #include <linux/random.h>
>  #include <linux/stringify.h>
>  #include <linux/time64.h>
> @@ -4312,34 +4313,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
>  	return ttrace ? ttrace->nr_events : 0;
>  }
>  
> -DEFINE_RESORT_RB(threads,
> -		(thread__nr_events(thread__priv(a->thread)) <
> -		 thread__nr_events(thread__priv(b->thread))),
> -	struct thread *thread;
> -)
> +static int trace_nr_events_cmp(void *priv __maybe_unused,
> +			       const struct list_head *la,
> +			       const struct list_head *lb)
>  {
> -	entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
> +	struct thread_list *a = list_entry(la, struct thread_list, list);
> +	struct thread_list *b = list_entry(lb, struct thread_list, list);
> +	unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
> +	unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
> +
> +	if (a_nr_events != b_nr_events)
> +		return a_nr_events < b_nr_events ? -1 : 1;
> +
> +	/* Identical number of threads, place smaller tids first. */
> +	return thread__tid(a->thread) < thread__tid(b->thread)
> +		? -1
> +		: (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
>  }
>  
>  static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
>  {
>  	size_t printed = trace__fprintf_threads_header(fp);
> -	struct rb_node *nd;
> -	int i;
> -
> -	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
> -		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
> +	LIST_HEAD(threads);
>  
> -		if (threads == NULL) {
> -			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
> -			return 0;
> -		}
> +	if (machine__thread_list(trace->host, &threads) == 0) {
> +		struct thread_list *pos;
>  
> -		resort_rb__for_each_entry(nd, threads)
> -			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
> +		list_sort(NULL, &threads, trace_nr_events_cmp);
>  
> -		resort_rb__delete(threads);
> +		list_for_each_entry(pos, &threads, list)
> +			printed += trace__fprintf_thread(fp, pos->thread, trace);
>  	}
> +	thread_list__delete(&threads);
>  	return printed;
>  }
>  
> diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
> index 376e86cb4c3c..d927a0d25052 100644
> --- a/tools/perf/util/rb_resort.h
> +++ b/tools/perf/util/rb_resort.h
> @@ -143,9 +143,4 @@ struct __name##_sorted *__name = __name##_sorted__new
>  	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,		\
>  				  __ilist->rblist.nr_entries)
>  
> -/* For 'struct machine->threads' */
> -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
> - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
> -			   __machine->threads[hash_bucket].nr)
> -
>  #endif /* _PERF_RESORT_RB_H_ */
> -- 
> 2.43.0.687.g38aa6559b0-goog
Ian Rogers Feb. 14, 2024, 6:27 p.m. UTC | #2
On Wed, Feb 14, 2024 at 9:25 AM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> > Commit 91e467bc568f ("perf machine: Use hashtable for machine
> > threads") made the iteration of thread tids unordered. The perf trace
> > --summary output sorts and prints each hash bucket, rather than all
> > threads globally. Change this behavior by turn all threads into a
> > list, sort the list by number of trace events then by tids, finally
> > print the list. This also allows the rbtree in threads to be not
> > accessed outside of machine.
>
> Can you please provide a refresh of the output that is changed by your patch?

Hmm.. looks like perf trace record has broken and doesn't produce
output in newer perfs. It works on 6.5 and so a bisect is necessary.

Without record it is hard to be repeatable but here is a before:
```
$ perf trace -a --summary sleep 1 2>&1 |grep events
Summary of events:
process1 (976898), 10 events, 0.1%
process2 (79368), 2 events, 0.0%
process3 (3759625), 11 events, 0.1%
...
```
Note, see how the events 10, 2, 11 aren't sorted.

After:
```
$ perf trace -a --summary sleep 1 2>&1 |grep events
Summary of events:
process1 (3190), 2 events, 0.0%
process1 (3191), 2 events, 0.0%
process2 (3681), 2 events, 0.0%
process3 (4034), 2 events, 0.0%
process3 (4035), 2 events, 0.0%
...
```
I've anonymized the process names.

Thanks,
Ian

> - Arnaldo
Ian Rogers Feb. 14, 2024, 9:15 p.m. UTC | #3
On Wed, Feb 14, 2024 at 10:27 AM Ian Rogers <irogers@google.com> wrote:
>
> On Wed, Feb 14, 2024 at 9:25 AM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> > > Commit 91e467bc568f ("perf machine: Use hashtable for machine
> > > threads") made the iteration of thread tids unordered. The perf trace
> > > --summary output sorts and prints each hash bucket, rather than all
> > > threads globally. Change this behavior by turn all threads into a
> > > list, sort the list by number of trace events then by tids, finally
> > > print the list. This also allows the rbtree in threads to be not
> > > accessed outside of machine.
> >
> > Can you please provide a refresh of the output that is changed by your patch?
>
> Hmm.. looks like perf trace record has broken and doesn't produce
> output in newer perfs. It works on 6.5 and so a bisect is necessary.

Bisect result:
```
9925495d96efc14d885ba66c5696f664fe0e663c is the first bad commit
commit 9925495d96efc14d885ba66c5696f664fe0e663c
Author: Ian Rogers <irogers@google.com>
Date:   Thu Sep 14 14:19:45 2023 -0700

   perf build: Default BUILD_BPF_SKEL, warn/disable for missing deps
...
https://lore.kernel.org/r/20230914211948.814999-3-irogers@google.com
```

Now to do the bisect with BUILD_BPF_SKEL=1 on each make.

Thanks,
Ian
Ian Rogers Feb. 14, 2024, 9:36 p.m. UTC | #4
On Wed, Feb 14, 2024 at 1:15 PM Ian Rogers <irogers@google.com> wrote:
>
> On Wed, Feb 14, 2024 at 10:27 AM Ian Rogers <irogers@google.com> wrote:
> >
> > On Wed, Feb 14, 2024 at 9:25 AM Arnaldo Carvalho de Melo
> > <acme@kernel.org> wrote:
> > >
> > > On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> > > > Commit 91e467bc568f ("perf machine: Use hashtable for machine
> > > > threads") made the iteration of thread tids unordered. The perf trace
> > > > --summary output sorts and prints each hash bucket, rather than all
> > > > threads globally. Change this behavior by turn all threads into a
> > > > list, sort the list by number of trace events then by tids, finally
> > > > print the list. This also allows the rbtree in threads to be not
> > > > accessed outside of machine.
> > >
> > > Can you please provide a refresh of the output that is changed by your patch?
> >
> > Hmm.. looks like perf trace record has broken and doesn't produce
> > output in newer perfs. It works on 6.5 and so a bisect is necessary.
>
> Bisect result:
> ```
> 9925495d96efc14d885ba66c5696f664fe0e663c is the first bad commit
> commit 9925495d96efc14d885ba66c5696f664fe0e663c
> Author: Ian Rogers <irogers@google.com>
> Date:   Thu Sep 14 14:19:45 2023 -0700
>
>    perf build: Default BUILD_BPF_SKEL, warn/disable for missing deps
> ...
> https://lore.kernel.org/r/20230914211948.814999-3-irogers@google.com
> ```
>
> Now to do the bisect with BUILD_BPF_SKEL=1 on each make.

This looks better (how could I be at fault :-) ):
```
1836480429d173c01664a633b61e525b13d41a2a is the first bad commit
commit 1836480429d173c01664a633b61e525b13d41a2a
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
Date:   Wed Aug 16 13:53:26 2023 -0300

   perf bpf_skel augmented_raw_syscalls: Cap the socklen parameter
using &= sizeof(saddr)
...
   Cc: Adrian Hunter <adrian.hunter@intel.com>
   Cc: Ian Rogers <irogers@google.com>
   Cc: Jiri Olsa <jolsa@kernel.org>
   Cc: Namhyung Kim <namhyung@kernel.org>
   Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
```
No LKML link.

Thanks,
Ian
Ian Rogers Feb. 14, 2024, 9:42 p.m. UTC | #5
On Wed, Feb 14, 2024 at 1:36 PM Ian Rogers <irogers@google.com> wrote:
>
> On Wed, Feb 14, 2024 at 1:15 PM Ian Rogers <irogers@google.com> wrote:
> >
> > On Wed, Feb 14, 2024 at 10:27 AM Ian Rogers <irogers@google.com> wrote:
> > >
> > > On Wed, Feb 14, 2024 at 9:25 AM Arnaldo Carvalho de Melo
> > > <acme@kernel.org> wrote:
> > > >
> > > > On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> > > > > Commit 91e467bc568f ("perf machine: Use hashtable for machine
> > > > > threads") made the iteration of thread tids unordered. The perf trace
> > > > > --summary output sorts and prints each hash bucket, rather than all
> > > > > threads globally. Change this behavior by turn all threads into a
> > > > > list, sort the list by number of trace events then by tids, finally
> > > > > print the list. This also allows the rbtree in threads to be not
> > > > > accessed outside of machine.
> > > >
> > > > Can you please provide a refresh of the output that is changed by your patch?
> > >
> > > Hmm.. looks like perf trace record has broken and doesn't produce
> > > output in newer perfs. It works on 6.5 and so a bisect is necessary.
> >
> > Bisect result:
> > ```
> > 9925495d96efc14d885ba66c5696f664fe0e663c is the first bad commit
> > commit 9925495d96efc14d885ba66c5696f664fe0e663c
> > Author: Ian Rogers <irogers@google.com>
> > Date:   Thu Sep 14 14:19:45 2023 -0700
> >
> >    perf build: Default BUILD_BPF_SKEL, warn/disable for missing deps
> > ...
> > https://lore.kernel.org/r/20230914211948.814999-3-irogers@google.com
> > ```
> >
> > Now to do the bisect with BUILD_BPF_SKEL=1 on each make.
>
> This looks better (how could I be at fault :-) ):
> ```
> 1836480429d173c01664a633b61e525b13d41a2a is the first bad commit
> commit 1836480429d173c01664a633b61e525b13d41a2a
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Wed Aug 16 13:53:26 2023 -0300
>
>    perf bpf_skel augmented_raw_syscalls: Cap the socklen parameter
> using &= sizeof(saddr)
> ...
>    Cc: Adrian Hunter <adrian.hunter@intel.com>
>    Cc: Ian Rogers <irogers@google.com>
>    Cc: Jiri Olsa <jolsa@kernel.org>
>    Cc: Namhyung Kim <namhyung@kernel.org>
>    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> ```
> No LKML link.

Hmm.. basically that change fixed the BPF program to verify and so the
problem has been long standing with the BPF code. Maybe perf trace
record never worked with BPF.

Thanks,
Ian
Arnaldo Carvalho de Melo Feb. 16, 2024, 2:57 p.m. UTC | #6
On Wed, Feb 14, 2024 at 01:36:46PM -0800, Ian Rogers wrote:
> On Wed, Feb 14, 2024 at 1:15 PM Ian Rogers <irogers@google.com> wrote:
> > On Wed, Feb 14, 2024 at 10:27 AM Ian Rogers <irogers@google.com> wrote:
> > > On Wed, Feb 14, 2024 at 9:25 AM Arnaldo Carvalho de Melo
> > > <acme@kernel.org> wrote:
> > > > On Tue, Feb 13, 2024 at 10:37:04PM -0800, Ian Rogers wrote:
> > > > > Commit 91e467bc568f ("perf machine: Use hashtable for machine
> > > > > threads") made the iteration of thread tids unordered. The perf trace
> > > > > --summary output sorts and prints each hash bucket, rather than all
> > > > > threads globally. Change this behavior by turn all threads into a
> > > > > list, sort the list by number of trace events then by tids, finally
> > > > > print the list. This also allows the rbtree in threads to be not
> > > > > accessed outside of machine.

> > > > Can you please provide a refresh of the output that is changed by your patch?
> > >
> > > Hmm.. looks like perf trace record has broken and doesn't produce
> > > output in newer perfs. It works on 6.5 and so a bisect is necessary.
> >
> > Bisect result:
> > ```
> > 9925495d96efc14d885ba66c5696f664fe0e663c is the first bad commit
> > commit 9925495d96efc14d885ba66c5696f664fe0e663c
> > Author: Ian Rogers <irogers@google.com>
> > Date:   Thu Sep 14 14:19:45 2023 -0700
> >
> >    perf build: Default BUILD_BPF_SKEL, warn/disable for missing deps
> > ...
> > https://lore.kernel.org/r/20230914211948.814999-3-irogers@google.com
> > ```
> >
> > Now to do the bisect with BUILD_BPF_SKEL=1 on each make.
> 
> This looks better (how could I be at fault :-) ):
> ```
> 1836480429d173c01664a633b61e525b13d41a2a is the first bad commit
> commit 1836480429d173c01664a633b61e525b13d41a2a
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Wed Aug 16 13:53:26 2023 -0300
> 
>    perf bpf_skel augmented_raw_syscalls: Cap the socklen parameter
> using &= sizeof(saddr)
> ...
>    Cc: Adrian Hunter <adrian.hunter@intel.com>
>    Cc: Ian Rogers <irogers@google.com>
>    Cc: Jiri Olsa <jolsa@kernel.org>
>    Cc: Namhyung Kim <namhyung@kernel.org>
>    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> ```
> No LKML link.

So simple... ;-\

I've reproduced your steps and got to the same cset while testing on a
recent distro kernel (6.6.13-200.fc39.x86_64), scratching my head now
and trying to figure this out.

Wonder if trying to run on an older kernel the problem would appear.

Will try and add a perf test shell entry with a simple:

root@number:~# perf trace record sleep 0.001 && perf script | head | wc -l
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.034 MB perf.data ]
0
root@number:~#

Has to be 10 :-)

Thanks,

- Arnaldo
Namhyung Kim Feb. 27, 2024, 6:55 a.m. UTC | #7
On Tue, Feb 13, 2024 at 10:37 PM Ian Rogers <irogers@google.com> wrote:
>
> Commit 91e467bc568f ("perf machine: Use hashtable for machine
> threads") made the iteration of thread tids unordered. The perf trace
> --summary output sorts and prints each hash bucket, rather than all
> threads globally. Change this behavior by turn all threads into a
> list, sort the list by number of trace events then by tids, finally
> print the list. This also allows the rbtree in threads to be not
> accessed outside of machine.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-trace.c  | 41 +++++++++++++++++++++----------------
>  tools/perf/util/rb_resort.h |  5 -----
>  2 files changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 109b8e64fe69..90eaff8c0f6e 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -74,6 +74,7 @@
>  #include <linux/err.h>
>  #include <linux/filter.h>
>  #include <linux/kernel.h>
> +#include <linux/list_sort.h>
>  #include <linux/random.h>
>  #include <linux/stringify.h>
>  #include <linux/time64.h>
> @@ -4312,34 +4313,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
>         return ttrace ? ttrace->nr_events : 0;
>  }
>
> -DEFINE_RESORT_RB(threads,
> -               (thread__nr_events(thread__priv(a->thread)) <
> -                thread__nr_events(thread__priv(b->thread))),
> -       struct thread *thread;
> -)
> +static int trace_nr_events_cmp(void *priv __maybe_unused,
> +                              const struct list_head *la,
> +                              const struct list_head *lb)
>  {
> -       entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
> +       struct thread_list *a = list_entry(la, struct thread_list, list);
> +       struct thread_list *b = list_entry(lb, struct thread_list, list);
> +       unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
> +       unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
> +
> +       if (a_nr_events != b_nr_events)
> +               return a_nr_events < b_nr_events ? -1 : 1;
> +
> +       /* Identical number of threads, place smaller tids first. */
> +       return thread__tid(a->thread) < thread__tid(b->thread)
> +               ? -1
> +               : (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);

I'm not sure if it can have a case where two different threads in the
hash table can have the same tid.  If not, it can simplify the last case.


>  }
>
>  static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
>  {
>         size_t printed = trace__fprintf_threads_header(fp);
> -       struct rb_node *nd;
> -       int i;
> -
> -       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
> -               DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
> +       LIST_HEAD(threads);
>
> -               if (threads == NULL) {
> -                       fprintf(fp, "%s", "Error sorting output by nr_events!\n");
> -                       return 0;
> -               }
> +       if (machine__thread_list(trace->host, &threads) == 0) {
> +               struct thread_list *pos;
>
> -               resort_rb__for_each_entry(nd, threads)
> -                       printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
> +               list_sort(NULL, &threads, trace_nr_events_cmp);

Same concern, it'd be nice if we can use an array instead.

Thanks,
Namhyung


>
> -               resort_rb__delete(threads);
> +               list_for_each_entry(pos, &threads, list)
> +                       printed += trace__fprintf_thread(fp, pos->thread, trace);
>         }
> +       thread_list__delete(&threads);
>         return printed;
>  }
>
> diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
> index 376e86cb4c3c..d927a0d25052 100644
> --- a/tools/perf/util/rb_resort.h
> +++ b/tools/perf/util/rb_resort.h
> @@ -143,9 +143,4 @@ struct __name##_sorted *__name = __name##_sorted__new
>         DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,             \
>                                   __ilist->rblist.nr_entries)
>
> -/* For 'struct machine->threads' */
> -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
> - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
> -                          __machine->threads[hash_bucket].nr)
> -
>  #endif /* _PERF_RESORT_RB_H_ */
> --
> 2.43.0.687.g38aa6559b0-goog
>
diff mbox series

Patch

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 109b8e64fe69..90eaff8c0f6e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -74,6 +74,7 @@ 
 #include <linux/err.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/random.h>
 #include <linux/stringify.h>
 #include <linux/time64.h>
@@ -4312,34 +4313,38 @@  static unsigned long thread__nr_events(struct thread_trace *ttrace)
 	return ttrace ? ttrace->nr_events : 0;
 }
 
-DEFINE_RESORT_RB(threads,
-		(thread__nr_events(thread__priv(a->thread)) <
-		 thread__nr_events(thread__priv(b->thread))),
-	struct thread *thread;
-)
+static int trace_nr_events_cmp(void *priv __maybe_unused,
+			       const struct list_head *la,
+			       const struct list_head *lb)
 {
-	entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+	struct thread_list *a = list_entry(la, struct thread_list, list);
+	struct thread_list *b = list_entry(lb, struct thread_list, list);
+	unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
+	unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
+
+	if (a_nr_events != b_nr_events)
+		return a_nr_events < b_nr_events ? -1 : 1;
+
+	/* Identical number of threads, place smaller tids first. */
+	return thread__tid(a->thread) < thread__tid(b->thread)
+		? -1
+		: (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
 	size_t printed = trace__fprintf_threads_header(fp);
-	struct rb_node *nd;
-	int i;
-
-	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
+	LIST_HEAD(threads);
 
-		if (threads == NULL) {
-			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
-			return 0;
-		}
+	if (machine__thread_list(trace->host, &threads) == 0) {
+		struct thread_list *pos;
 
-		resort_rb__for_each_entry(nd, threads)
-			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+		list_sort(NULL, &threads, trace_nr_events_cmp);
 
-		resort_rb__delete(threads);
+		list_for_each_entry(pos, &threads, list)
+			printed += trace__fprintf_thread(fp, pos->thread, trace);
 	}
+	thread_list__delete(&threads);
 	return printed;
 }
 
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index 376e86cb4c3c..d927a0d25052 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -143,9 +143,4 @@  struct __name##_sorted *__name = __name##_sorted__new
 	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,		\
 				  __ilist->rblist.nr_entries)
 
-/* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
- DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
-			   __machine->threads[hash_bucket].nr)
-
 #endif /* _PERF_RESORT_RB_H_ */