diff mbox series

[i-g-t] intel-gpu-top: Support for client stats

Message ID 20210121181005.762333-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [i-g-t] intel-gpu-top: Support for client stats | expand

Commit Message

Tvrtko Ursulin Jan. 21, 2021, 6:10 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Adds support for per-client engine busyness stats i915 exports in sysfs
and produces output like the below:

===============================================================================
intel-gpu-top: Intel Skylake (Gen9) @ /dev/dri/card0 -  951/ 950 MHz;
    0% RC6; 14.60/23.62 W;     1482 irqs/s

      IMC reads:     1287 MiB/s
     IMC writes:      115 MiB/s

         ENGINES     BUSY                                       MI_SEMA MI_WAIT
       Render/3D   95.48% |█████████████████████████████████  |      5%      0%
         Blitter   15.98% |█████                              |     10%      0%
           Video   35.40% |████████████                       |     26%      0%
    VideoEnhance    0.00% |                                   |      0%      0%

   PID              NAME   Render/3D     Blitter       Video    VideoEnhance
  1053          gem_wsim |███        ||           ||███        ||           |
  1054          gem_wsim |███████    ||█          ||           ||           |
===============================================================================

Apart from the existing physical engine utilization it now also shows
utilization per client and per engine class.

v2:
 * Version to match removal of global enable_stats toggle.
 * Plus various fixes.

v3:
 * Support brief backward jumps in client stats.

v4:
 * Support device selection.

v5:
 * Rebase for class aggregation.
 * Optimise sysfs reads a tiny bit by openat(2) and caching client root.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tools/intel_gpu_top.c | 557 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 545 insertions(+), 12 deletions(-)

Comments

Chris Wilson Jan. 21, 2021, 9:34 p.m. UTC | #1
Quoting Tvrtko Ursulin (2021-01-21 18:10:05)
> +static struct clients *init_clients(const char *drm_card)
> +{
> +       struct clients *clients = malloc(sizeof(*clients));
> +       const char *slash;
> +       ssize_t ret;
> +
> +       memset(clients, 0, sizeof(*clients));
> +
> +       if (drm_card) {
> +               slash = rindex(drm_card, '/');
> +               assert(slash);
> +       } else {
> +               slash = "card0";
> +       }
> +
> +       ret = snprintf(clients->sysfs_root, sizeof(clients->sysfs_root),
> +                      "/sys/class/drm/%s/clients/", slash);
> +       assert(ret > 0 && ret < sizeof(clients->sysfs_root));

Afaict, igt_device_card_match_pci et al are not guaranteed to fill in
dev->drm_card and may leave it blank instead of finding the
corresponding /dev/dri/cardN.

> +
> +       return clients;
> +}
> +
> +static struct client *
> +find_client(struct clients *clients, enum client_status status, unsigned int id)
> +{
> +       struct client *c;
> +       int tmp;
> +
> +       for_each_client(clients, c, tmp) {
> +               if ((status == FREE && c->status == FREE) ||
> +                   (status == c->status && c->id == id))
> +                       return c;

if (status != c->status)
	continue;

if (status == FREE || c->id == id)
	return c;

> +       }
> +
> +       return NULL;
> +}
> +
> +static void update_client(struct client *c, unsigned int pid, char *name)
> +{
> +       uint64_t val[c->clients->num_classes];
> +       unsigned int i;
> +
> +       if (c->pid != pid)
> +               c->pid = pid;
> +
> +       if (strcmp(c->name, name))
> +               strncpy(c->name, name, sizeof(c->name) - 1);

That answers my question about what happens to clients that change
their name after new contexts are created.

You don't trust that (pid, name) are tied together? Right, either may
change during the read of the pair.

> +
> +       for (i = 0; i < c->clients->num_classes; i++)
> +               val[i] = read_client_busy(c, c->clients->class[i].class);
> +
> +       c->total = 0;
> +
> +       for (i = 0; i < c->clients->num_classes; i++) {
> +               if (val[i] < c->last[i])
> +                       continue; /* It will catch up soon. */
> +
> +               c->val[i] = val[i] - c->last[i];
> +               c->total += c->val[i];
> +               c->last[i] = val[i];
> +       }
> +
> +       c->samples++;
> +       c->status = ALIVE;
> +}
> +
> +static void
> +add_client(struct clients *clients, unsigned int id, unsigned int pid,
> +          char *name, int sysfs_root)
> +{
> +       struct client *c;
> +
> +       if (find_client(clients, ALIVE, id))
> +               return;
> +
> +       c = find_client(clients, FREE, 0);
> +       if (!c) {
> +               unsigned int idx = clients->num_clients;
> +
> +               clients->num_clients += (clients->num_clients + 2) / 2;
> +               clients->client = realloc(clients->client,
> +                                         clients->num_clients * sizeof(*c));
> +               assert(clients->client);
> +
> +               c = &clients->client[idx];
> +               memset(c, 0, (clients->num_clients - idx) * sizeof(*c));

Hence must be FREE==zero. And for_each_client() walks an [] not list.

> +       }
> +
> +       c->sysfs_root = sysfs_root;
> +       c->id = id;
> +       c->clients = clients;
> +       c->val = calloc(clients->num_classes, sizeof(c->val));
> +       c->last = calloc(clients->num_classes, sizeof(c->last));
> +       assert(c->val && c->last);
> +
> +       update_client(c, pid, name);
> +}
> +
> +static void free_client(struct client *c)
> +{
> +       if (c->sysfs_root >= 0)
> +               close(c->sysfs_root);
> +       free(c->val);
> +       free(c->last);
> +       memset(c, 0, sizeof(*c));

Worth compacting the array?

I expect the array will trend to become quite sparse rather than
auto-defragment.


> +}
> +
> +static int
> +read_client_sysfs(char *buf, int bufsize, const char *sysfs_root,
> +                 unsigned int id, const char *field, int *client_root)
> +{
> +       ssize_t ret;
> +
> +       if (*client_root < 0) {
> +               char namebuf[256];
> +
> +               ret = snprintf(namebuf, sizeof(namebuf), "%s/%u",
> +                              sysfs_root, id);
> +               assert(ret > 0 && ret < sizeof(namebuf));
> +               if (ret <= 0 || ret == sizeof(namebuf))
> +                       return -1;
> +
> +               *client_root = open(namebuf, O_RDONLY | O_DIRECTORY);
> +       }
> +
> +       if (*client_root < 0)
> +               return -1;
> +
> +       return __read_client_field(*client_root, field, buf, bufsize);
> +}
> +
> +static void scan_clients(struct clients *clients)
> +{
> +       struct dirent *dent;
> +       struct client *c;
> +       unsigned int id;
> +       int tmp;
> +       DIR *d;
> +
> +       if (!clients)
> +               return;
> +
> +       for_each_client(clients, c, tmp) {
> +               if (c->status == ALIVE)
> +                       c->status = PROBE;
> +       }
> +
> +       d = opendir(clients->sysfs_root);
> +       if (!d)
> +               return;
> +
> +       while ((dent = readdir(d)) != NULL) {
> +               char name[256], pid[256];
> +               int ret, root = -1, *pr;
> +
> +               if (dent->d_type != DT_DIR)
> +                       continue;
> +               if (!isdigit(dent->d_name[0]))
> +                       continue;
> +
> +               id = atoi(dent->d_name);
> +
> +               c = find_client(clients, PROBE, id);
> +
> +               if (c)
> +                       pr = &c->sysfs_root;
> +               else
> +                       pr = &root;
> +
> +               ret = read_client_sysfs(name, sizeof(name), clients->sysfs_root,
> +                                       id, "name", pr);
> +               ret |= read_client_sysfs(pid, sizeof(pid), clients->sysfs_root,
> +                                       id, "pid", pr);
> +               if (!ret) {
> +                       if (!c)
> +                               add_client(clients, id, atoi(pid), name, root);
> +                       else
> +                               update_client(c, atoi(pid), name);
> +               } else if (c) {
> +                       c->status = PROBE; /* Will be deleted below. */
> +               }
> +       }
> +
> +       closedir(d);
> +
> +       for_each_client(clients, c, tmp) {
> +               if (c->status == PROBE)
> +                       free_client(c);
> +       }
> +}
> +
> +static int cmp(const void *_a, const void *_b)
> +{
> +       const struct client *a = _a;
> +       const struct client *b = _b;
> +       long tot_a = a->total;
> +       long tot_b = b->total;
> +
> +       tot_a *= a->status == ALIVE && a->samples > 1;
> +       tot_b *= b->status == ALIVE && b->samples > 1;
> +
> +       tot_b -= tot_a;
> +
> +       if (!tot_b)
> +               return (int)b->id - a->id;

If both active and identical runtimes, sort in ascending id, which is the
client/<id> (chronological order of creation)

return (int)(b->id - a->id);

> +
> +       while (tot_b > INT_MAX || tot_b < INT_MIN)
> +               tot_b /= 2;

This is just
	if (tot_b > 0)
		return 1;
	if (tot_b < 0)
		return -1;
	return 0;

Right? Sort in order of runtimes, if active. If inactive, age of
creation.

> +
> +       return tot_b;
> +}
> +
>  static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
>  
> +static void n_spaces(const unsigned int n)
> +{
> +       unsigned int i;
> +
> +       for (i = 0; i < n; i++)
> +               putchar(' ');
> +}
> +
>  static void
>  print_percentage_bar(double percent, int max_len)
>  {
> -       int bar_len = percent * (8 * (max_len - 2)) / 100.0;
> -       int i;
> +       int bar_len, i, len = max_len - 2;
> +       const int w = 8;
> +
> +       assert(max_len > 0);
> +
> +       bar_len = percent * len / 100.0;
> +       if (bar_len > len)
> +               bar_len = len;
> +       bar_len *= w;
>  
>         putchar('|');
>  
> -       for (i = bar_len; i >= 8; i -= 8)
> -               printf("%s", bars[8]);
> +       for (i = bar_len; i >= w; i -= w)
> +               printf("%s", bars[w]);
>         if (i)
>                 printf("%s", bars[i]);
>  
> -       for (i = 0; i < (max_len - 2 - (bar_len + 7) / 8); i++)
> -               putchar(' ');
> +       len -= (bar_len + (w - 1)) / w;
> +       n_spaces(len);
>  
>         putchar('|');
>  }
> @@ -744,6 +1077,18 @@ json_close_struct(void)
>                 fflush(stdout);
>  }
>  
> +static void
> +__json_add_member(const char *key, const char *val)
> +{
> +       assert(json_indent_level < ARRAY_SIZE(json_indent));
> +
> +       fprintf(out, "%s%s\"%s\": \"%s\"",
> +               json_struct_members ? ",\n" : "",
> +               json_indent[json_indent_level], key, val);
> +
> +       json_struct_members++;
> +}
> +
>  static unsigned int
>  json_add_member(const struct cnt_group *parent, struct cnt_item *item,
>                 unsigned int headers)
> @@ -1046,8 +1391,6 @@ print_header(const struct igt_device_card *card,
>                 memmove(&groups[0], &groups[1],
>                         sizeof(groups) - sizeof(groups[0]));
>  
> -       pops->open_struct(NULL);
> -
>         *consumed = print_groups(groups);
>  
>         if (output_mode == INTERACTIVE) {
> @@ -1204,7 +1547,7 @@ print_engine(struct engines *engines, unsigned int i, double t,
>                               engine->display_name, engine_items[0].buf);
>  
>                 val = pmu_calc(&engine->busy.val, 1e9, t, 100);
> -               print_percentage_bar(val, max_w - len);
> +               print_percentage_bar(val, max_w > len ? max_w - len : 0);
>  
>                 printf("%s\n", buf);
>  
> @@ -1219,7 +1562,6 @@ print_engines_footer(struct engines *engines, double t,
>                      int lines, int con_w, int con_h)
>  {
>         pops->close_struct();
> -       pops->close_struct();
>  
>         if (output_mode == INTERACTIVE) {
>                 if (lines++ < con_h)
> @@ -1243,6 +1585,9 @@ static void init_engine_classes(struct engines *engines)
>         unsigned int i, num;
>         int max = -1;
>  
> +       if (engines->num_classes)
> +               return;
> +
>         for (i = 0; i < engines->num_engines; i++) {
>                 struct engine *engine = engine_ptr(engines, i);
>  
> @@ -1404,6 +1749,148 @@ print_engines(struct engines *engines, double t, int lines, int w, int h)
>         return lines;
>  }
>  
> +static int
> +print_clients_header(struct clients *clients, int lines,
> +                    int con_w, int con_h, int *class_w)
> +{
> +       if (output_mode == INTERACTIVE) {
> +               const char *pidname = "   PID              NAME ";
> +               unsigned int num_active = 0;
> +               int len = strlen(pidname);
> +
> +               if (lines++ >= con_h)
> +                       return lines;
> +
> +               printf("\033[7m");
> +               printf("%s", pidname);
> +
> +               if (lines++ >= con_h || len >= con_w)
> +                       return lines;
> +
> +               if (clients->num_classes) {
> +                       unsigned int i;
> +                       int width;
> +
> +                       for (i = 0; i < clients->num_classes; i++) {
> +                               if (clients->class[i].num_engines)
> +                                       num_active++;
> +                       }
> +
> +                       *class_w = width = (con_w - len) / num_active;
> +
> +                       for (i = 0; i < clients->num_classes; i++) {
> +                               const char *name = clients->class[i].name;
> +                               int name_len = strlen(name);
> +                               int pad = (width - name_len) / 2;
> +                               int spaces = width - pad - name_len;
> +
> +                               if (!clients->class[i].num_engines)
> +                                       continue; /* Assert in the ideal world. */
> +
> +                               if (pad < 0 || spaces < 0)
> +                                       continue;
> +
> +                               n_spaces(pad);
> +                               printf("%s", name);

Should we sanitize the names and remove ansi control codes or only show
printable char?

> +                               n_spaces(spaces);
> +                               len += pad + name_len + spaces;
> +                       }
> +               }
> +
> +               n_spaces(con_w - len);
> +               printf("\033[0m\n");
> +       } else {
> +               if (clients->num_classes)
> +                       pops->open_struct("clients");
> +       }
> +
> +       return lines;
> +}
> +
> +static int
> +print_client(struct client *c, struct engines *engines, double t, int lines,
> +            int con_w, int con_h, unsigned int period_us, int *class_w)
> +{
> +       struct clients *clients = c->clients;
> +       unsigned int i;
> +
> +       if (output_mode == INTERACTIVE) {
> +               printf("%6u %17s ", c->pid, c->name);
> +
> +               for (i = 0; i < clients->num_classes; i++) {
> +                       double pct;
> +
> +                       if (!clients->class[i].num_engines)
> +                               continue; /* Assert in the ideal world. */
> +
> +                       pct = (double)c->val[i] / period_us / 1e3 * 100 /
> +                             clients->class[i].num_engines;
> +
> +                       /*
> +                        * Guard against possible time-drift between sampling
> +                        * client data and time we obtained our time-delta from
> +                        * PMU.
> +                        */
> +                       if (pct > 100.0)
> +                               pct = 100.0;
> +
> +                       print_percentage_bar(pct, *class_w);
> +
> +                       lines++;
> +               }
> +
> +               putchar('\n');
> +       } else if (output_mode == JSON) {
> +               char buf[64];
> +
> +               snprintf(buf, sizeof(buf), "%u", c->id);
> +               pops->open_struct(buf);
> +
> +               __json_add_member("name", c->name);
> +
> +               snprintf(buf, sizeof(buf), "%u", c->pid);
> +               __json_add_member("pid", buf);
> +
> +               pops->open_struct("engine-classes");
> +
> +               for (i = 0; i < clients->num_classes; i++) {
> +                       double pct;
> +
> +                       snprintf(buf, sizeof(buf), "%s",
> +                                clients->class[i].name);
> +                       pops->open_struct(buf);
> +
> +                       pct = (double)c->val[i] / period_us / 1e3 * 100;
> +                       snprintf(buf, sizeof(buf), "%f", pct);
> +                       __json_add_member("busy", buf);
> +
> +                       __json_add_member("unit", "%");
> +
> +                       pops->close_struct();
> +               }
> +
> +               pops->close_struct();
> +               pops->close_struct();
> +       }
> +
> +       return lines;
> +}
> +
> +static int
> +print_clients_footer(struct clients *clients, double t,
> +                    int lines, int con_w, int con_h)
> +{
> +       if (output_mode == INTERACTIVE) {
> +               if (lines++ < con_h)
> +                       printf("\n");
> +       } else {
> +               if (clients->num_classes)
> +                       pops->close_struct();
> +       }
> +
> +       return lines;
> +}
> +
>  static bool stop_top;
>  
>  static void sigint_handler(int  sig)
> @@ -1492,6 +1979,7 @@ static void process_stdin(unsigned int timeout_us)
>  int main(int argc, char **argv)
>  {
>         unsigned int period_us = DEFAULT_PERIOD_MS * 1000;
> +       struct clients *clients = NULL;
>         int con_w = -1, con_h = -1;
>         char *output_path = NULL;
>         struct engines *engines;
> @@ -1625,13 +2113,20 @@ int main(int argc, char **argv)
>  
>         ret = EXIT_SUCCESS;
>  
> +       clients = init_clients(card.pci_slot_name[0] ? card.card : NULL);
> +       init_engine_classes(engines);
> +       clients->num_classes = engines->num_classes;
> +       clients->class = engines->class;
> +
>         pmu_sample(engines);
> +       scan_clients(clients);
>         codename = igt_device_get_pretty_name(&card, false);
>  
>         while (!stop_top) {
>                 bool consumed = false;
> -               int lines = 0;
> +               int j, lines = 0;
>                 struct winsize ws;
> +               struct client *c;
>                 double t;
>  
>                 /* Update terminal size. */
> @@ -1650,10 +2145,18 @@ int main(int argc, char **argv)
>                 pmu_sample(engines);
>                 t = (double)(engines->ts.cur - engines->ts.prev) / 1e9;
>  
> +               scan_clients(clients);
> +               if (clients) {
> +                       qsort(clients->client, clients->num_clients,
> +                             sizeof(*clients->client), cmp);
> +               }
> +
>                 if (stop_top)
>                         break;
>  
>                 while (!consumed) {
> +                       pops->open_struct(NULL);
> +
>                         lines = print_header(&card, codename, engines,
>                                              t, lines, con_w, con_h,
>                                              &consumed);
> @@ -1661,6 +2164,36 @@ int main(int argc, char **argv)
>                         lines = print_imc(engines, t, lines, con_w, con_h);
>  
>                         lines = print_engines(engines, t, lines, con_w, con_h);
> +
> +                       if (clients) {
> +                               int class_w;
> +
> +                               lines = print_clients_header(clients, lines,
> +                                                            con_w, con_h,
> +                                                            &class_w);
> +
> +                               for_each_client(clients, c, j) {
> +                                       if (lines++ > con_h)
> +                                               break;
> +
> +                                       assert(c->status != PROBE);
> +                                       if (c->status != ALIVE)
> +                                               break;
> +
> +                                       if (c->samples < 2)
> +                                               continue;
> +
> +                                       lines = print_client(c, engines, t,
> +                                                            lines, con_w,
> +                                                            con_h, period_us,
> +                                                            &class_w);
> +                               }
> +
> +                               lines = print_clients_footer(clients, t, lines,
> +                                                            con_w, con_h);
> +                       }
> +
> +                       pops->close_struct();
>                 }
>  
>                 if (stop_top)
> -- 
> 2.27.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin Jan. 22, 2021, 11:36 a.m. UTC | #2
On 21/01/2021 21:34, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2021-01-21 18:10:05)
>> +static struct clients *init_clients(const char *drm_card)
>> +{
>> +       struct clients *clients = malloc(sizeof(*clients));
>> +       const char *slash;
>> +       ssize_t ret;
>> +
>> +       memset(clients, 0, sizeof(*clients));
>> +
>> +       if (drm_card) {
>> +               slash = rindex(drm_card, '/');
>> +               assert(slash);
>> +       } else {
>> +               slash = "card0";
>> +       }
>> +
>> +       ret = snprintf(clients->sysfs_root, sizeof(clients->sysfs_root),
>> +                      "/sys/class/drm/%s/clients/", slash);
>> +       assert(ret > 0 && ret < sizeof(clients->sysfs_root));
> 
> Afaict, igt_device_card_match_pci et al are not guaranteed to fill in
> dev->drm_card and may leave it blank instead of finding the
> corresponding /dev/dri/cardN.

I did the rest but this is giving me trouble - can't see locally that it 
doesn't populate it. Do you know how to trigger that?

Regards,

Tvrtko
Chris Wilson Jan. 22, 2021, 11:41 a.m. UTC | #3
Quoting Tvrtko Ursulin (2021-01-22 11:36:48)
> 
> On 21/01/2021 21:34, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2021-01-21 18:10:05)
> >> +static struct clients *init_clients(const char *drm_card)
> >> +{
> >> +       struct clients *clients = malloc(sizeof(*clients));
> >> +       const char *slash;
> >> +       ssize_t ret;
> >> +
> >> +       memset(clients, 0, sizeof(*clients));
> >> +
> >> +       if (drm_card) {
> >> +               slash = rindex(drm_card, '/');
> >> +               assert(slash);
> >> +       } else {
> >> +               slash = "card0";
> >> +       }
> >> +
> >> +       ret = snprintf(clients->sysfs_root, sizeof(clients->sysfs_root),
> >> +                      "/sys/class/drm/%s/clients/", slash);
> >> +       assert(ret > 0 && ret < sizeof(clients->sysfs_root));
> > 
> > Afaict, igt_device_card_match_pci et al are not guaranteed to fill in
> > dev->drm_card and may leave it blank instead of finding the
> > corresponding /dev/dri/cardN.
> 
> I did the rest but this is giving me trouble - can't see locally that it 
> doesn't populate it. Do you know how to trigger that?

Maybe it doesn't, I was just looking at the udev population of the card
struct and surmised that if I could tell it to open a render device,
dev->drm_card will not be found.

This is all speculation on my part, I've just been burnt enough times
trying to set engine properties on the render sysfs :(
-Chris
diff mbox series

Patch

diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index 72ad7cbe9a8c..abd7d3cfc8d4 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright © 2007-2019 Intel Corporation
+ * Copyright © 2007-2021 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -625,23 +625,356 @@  static void pmu_sample(struct engines *engines)
 	}
 }
 
+enum client_status {
+	FREE = 0, /* mbz */
+	ALIVE,
+	PROBE
+};
+
+struct clients;
+
+struct client {
+	struct clients *clients;
+
+	enum client_status status;
+	int sysfs_root;
+	unsigned int id;
+	unsigned int pid;
+	char name[128];
+	unsigned int samples;
+	unsigned long total;
+	struct engines *engines;
+	unsigned long *val;
+	uint64_t *last;
+};
+
+struct clients {
+	unsigned int num_clients;
+
+	unsigned int num_classes;
+	struct engine_class *class;
+
+	char sysfs_root[64];
+
+	struct client *client;
+};
+
+#define for_each_client(clients, c, tmp) \
+	for ((tmp) = (clients)->num_clients, c = (clients)->client; \
+	     (tmp > 0); (tmp)--, (c)++)
+
+static struct clients *init_clients(const char *drm_card)
+{
+	struct clients *clients = malloc(sizeof(*clients));
+	const char *slash;
+	ssize_t ret;
+
+	memset(clients, 0, sizeof(*clients));
+
+	if (drm_card) {
+		slash = rindex(drm_card, '/');
+		assert(slash);
+	} else {
+		slash = "card0";
+	}
+
+	ret = snprintf(clients->sysfs_root, sizeof(clients->sysfs_root),
+		       "/sys/class/drm/%s/clients/", slash);
+	assert(ret > 0 && ret < sizeof(clients->sysfs_root));
+
+	return clients;
+}
+
+static int __read_to_buf(int fd, char *buf, unsigned int bufsize)
+{
+	ssize_t ret;
+	int err;
+
+	ret = read(fd, buf, bufsize - 1);
+	err = errno;
+	if (ret < 1) {
+		errno = ret < 0 ? err : ENOMSG;
+
+		return -1;
+	}
+
+	if (ret > 1 && buf[ret - 1] == '\n')
+		buf[ret - 1] = '\0';
+	else
+		buf[ret] = '\0';
+
+	return 0;
+}
+
+static int
+__read_client_field(int root, const char *field, char *buf, unsigned int bufsize)
+{
+	int fd, ret;
+
+	fd = openat(root, field, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	ret = __read_to_buf(fd, buf, bufsize);
+
+	close(fd);
+
+	return ret;
+}
+
+static uint64_t
+read_client_busy(const struct client *client, unsigned int class)
+{
+	char buf[256], *b;
+	int ret;
+
+	assert(client->sysfs_root >= 0);
+	if (client->sysfs_root < 0)
+		return 0;
+
+	ret = snprintf(buf, sizeof(buf), "busy/%u", class);
+	assert(ret > 0 && ret < sizeof(buf));
+	if (ret <= 0 || ret == sizeof(buf))
+		return 0;
+
+	ret = __read_client_field(client->sysfs_root, buf, buf, sizeof(buf));
+	assert(ret == 0);
+	if (ret)
+		return 0;
+
+	/*
+	 * Handle both single integer and key=value formats by skipping
+	 * leading non-digits.
+	 */
+	b = buf;
+	while (*b && !isdigit(*b))
+		b++;
+
+	return strtoull(b, NULL, 10);
+}
+
+static struct client *
+find_client(struct clients *clients, enum client_status status, unsigned int id)
+{
+	struct client *c;
+	int tmp;
+
+	for_each_client(clients, c, tmp) {
+		if ((status == FREE && c->status == FREE) ||
+		    (status == c->status && c->id == id))
+			return c;
+	}
+
+	return NULL;
+}
+
+static void update_client(struct client *c, unsigned int pid, char *name)
+{
+	uint64_t val[c->clients->num_classes];
+	unsigned int i;
+
+	if (c->pid != pid)
+		c->pid = pid;
+
+	if (strcmp(c->name, name))
+		strncpy(c->name, name, sizeof(c->name) - 1);
+
+	for (i = 0; i < c->clients->num_classes; i++)
+		val[i] = read_client_busy(c, c->clients->class[i].class);
+
+	c->total = 0;
+
+	for (i = 0; i < c->clients->num_classes; i++) {
+		if (val[i] < c->last[i])
+			continue; /* It will catch up soon. */
+
+		c->val[i] = val[i] - c->last[i];
+		c->total += c->val[i];
+		c->last[i] = val[i];
+	}
+
+	c->samples++;
+	c->status = ALIVE;
+}
+
+static void
+add_client(struct clients *clients, unsigned int id, unsigned int pid,
+	   char *name, int sysfs_root)
+{
+	struct client *c;
+
+	if (find_client(clients, ALIVE, id))
+		return;
+
+	c = find_client(clients, FREE, 0);
+	if (!c) {
+		unsigned int idx = clients->num_clients;
+
+		clients->num_clients += (clients->num_clients + 2) / 2;
+		clients->client = realloc(clients->client,
+					  clients->num_clients * sizeof(*c));
+		assert(clients->client);
+
+		c = &clients->client[idx];
+		memset(c, 0, (clients->num_clients - idx) * sizeof(*c));
+	}
+
+	c->sysfs_root = sysfs_root;
+	c->id = id;
+	c->clients = clients;
+	c->val = calloc(clients->num_classes, sizeof(c->val));
+	c->last = calloc(clients->num_classes, sizeof(c->last));
+	assert(c->val && c->last);
+
+	update_client(c, pid, name);
+}
+
+static void free_client(struct client *c)
+{
+	if (c->sysfs_root >= 0)
+		close(c->sysfs_root);
+	free(c->val);
+	free(c->last);
+	memset(c, 0, sizeof(*c));
+}
+
+static int
+read_client_sysfs(char *buf, int bufsize, const char *sysfs_root,
+		  unsigned int id, const char *field, int *client_root)
+{
+	ssize_t ret;
+
+	if (*client_root < 0) {
+		char namebuf[256];
+
+		ret = snprintf(namebuf, sizeof(namebuf), "%s/%u",
+			       sysfs_root, id);
+		assert(ret > 0 && ret < sizeof(namebuf));
+		if (ret <= 0 || ret == sizeof(namebuf))
+			return -1;
+
+		*client_root = open(namebuf, O_RDONLY | O_DIRECTORY);
+	}
+
+	if (*client_root < 0)
+		return -1;
+
+	return __read_client_field(*client_root, field, buf, bufsize);
+}
+
+static void scan_clients(struct clients *clients)
+{
+	struct dirent *dent;
+	struct client *c;
+	unsigned int id;
+	int tmp;
+	DIR *d;
+
+	if (!clients)
+		return;
+
+	for_each_client(clients, c, tmp) {
+		if (c->status == ALIVE)
+			c->status = PROBE;
+	}
+
+	d = opendir(clients->sysfs_root);
+	if (!d)
+		return;
+
+	while ((dent = readdir(d)) != NULL) {
+		char name[256], pid[256];
+		int ret, root = -1, *pr;
+
+		if (dent->d_type != DT_DIR)
+			continue;
+		if (!isdigit(dent->d_name[0]))
+			continue;
+
+		id = atoi(dent->d_name);
+
+		c = find_client(clients, PROBE, id);
+
+		if (c)
+			pr = &c->sysfs_root;
+		else
+			pr = &root;
+
+		ret = read_client_sysfs(name, sizeof(name), clients->sysfs_root,
+					id, "name", pr);
+		ret |= read_client_sysfs(pid, sizeof(pid), clients->sysfs_root,
+					id, "pid", pr);
+		if (!ret) {
+			if (!c)
+				add_client(clients, id, atoi(pid), name, root);
+			else
+				update_client(c, atoi(pid), name);
+		} else if (c) {
+			c->status = PROBE; /* Will be deleted below. */
+		}
+	}
+
+	closedir(d);
+
+	for_each_client(clients, c, tmp) {
+		if (c->status == PROBE)
+			free_client(c);
+	}
+}
+
+static int cmp(const void *_a, const void *_b)
+{
+	const struct client *a = _a;
+	const struct client *b = _b;
+	long tot_a = a->total;
+	long tot_b = b->total;
+
+	tot_a *= a->status == ALIVE && a->samples > 1;
+	tot_b *= b->status == ALIVE && b->samples > 1;
+
+	tot_b -= tot_a;
+
+	if (!tot_b)
+		return (int)b->id - a->id;
+
+	while (tot_b > INT_MAX || tot_b < INT_MIN)
+		tot_b /= 2;
+
+	return tot_b;
+}
+
 static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
 
+static void n_spaces(const unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		putchar(' ');
+}
+
 static void
 print_percentage_bar(double percent, int max_len)
 {
-	int bar_len = percent * (8 * (max_len - 2)) / 100.0;
-	int i;
+	int bar_len, i, len = max_len - 2;
+	const int w = 8;
+
+	assert(max_len > 0);
+
+	bar_len = percent * len / 100.0;
+	if (bar_len > len)
+		bar_len = len;
+	bar_len *= w;
 
 	putchar('|');
 
-	for (i = bar_len; i >= 8; i -= 8)
-		printf("%s", bars[8]);
+	for (i = bar_len; i >= w; i -= w)
+		printf("%s", bars[w]);
 	if (i)
 		printf("%s", bars[i]);
 
-	for (i = 0; i < (max_len - 2 - (bar_len + 7) / 8); i++)
-		putchar(' ');
+	len -= (bar_len + (w - 1)) / w;
+	n_spaces(len);
 
 	putchar('|');
 }
@@ -744,6 +1077,18 @@  json_close_struct(void)
 		fflush(stdout);
 }
 
+static void
+__json_add_member(const char *key, const char *val)
+{
+	assert(json_indent_level < ARRAY_SIZE(json_indent));
+
+	fprintf(out, "%s%s\"%s\": \"%s\"",
+		json_struct_members ? ",\n" : "",
+		json_indent[json_indent_level], key, val);
+
+	json_struct_members++;
+}
+
 static unsigned int
 json_add_member(const struct cnt_group *parent, struct cnt_item *item,
 		unsigned int headers)
@@ -1046,8 +1391,6 @@  print_header(const struct igt_device_card *card,
 		memmove(&groups[0], &groups[1],
 			sizeof(groups) - sizeof(groups[0]));
 
-	pops->open_struct(NULL);
-
 	*consumed = print_groups(groups);
 
 	if (output_mode == INTERACTIVE) {
@@ -1204,7 +1547,7 @@  print_engine(struct engines *engines, unsigned int i, double t,
 			      engine->display_name, engine_items[0].buf);
 
 		val = pmu_calc(&engine->busy.val, 1e9, t, 100);
-		print_percentage_bar(val, max_w - len);
+		print_percentage_bar(val, max_w > len ? max_w - len : 0);
 
 		printf("%s\n", buf);
 
@@ -1219,7 +1562,6 @@  print_engines_footer(struct engines *engines, double t,
 		     int lines, int con_w, int con_h)
 {
 	pops->close_struct();
-	pops->close_struct();
 
 	if (output_mode == INTERACTIVE) {
 		if (lines++ < con_h)
@@ -1243,6 +1585,9 @@  static void init_engine_classes(struct engines *engines)
 	unsigned int i, num;
 	int max = -1;
 
+	if (engines->num_classes)
+		return;
+
 	for (i = 0; i < engines->num_engines; i++) {
 		struct engine *engine = engine_ptr(engines, i);
 
@@ -1404,6 +1749,148 @@  print_engines(struct engines *engines, double t, int lines, int w, int h)
 	return lines;
 }
 
+static int
+print_clients_header(struct clients *clients, int lines,
+		     int con_w, int con_h, int *class_w)
+{
+	if (output_mode == INTERACTIVE) {
+		const char *pidname = "   PID              NAME ";
+		unsigned int num_active = 0;
+		int len = strlen(pidname);
+
+		if (lines++ >= con_h)
+			return lines;
+
+		printf("\033[7m");
+		printf("%s", pidname);
+
+		if (lines++ >= con_h || len >= con_w)
+			return lines;
+
+		if (clients->num_classes) {
+			unsigned int i;
+			int width;
+
+			for (i = 0; i < clients->num_classes; i++) {
+				if (clients->class[i].num_engines)
+					num_active++;
+			}
+
+			*class_w = width = (con_w - len) / num_active;
+
+			for (i = 0; i < clients->num_classes; i++) {
+				const char *name = clients->class[i].name;
+				int name_len = strlen(name);
+				int pad = (width - name_len) / 2;
+				int spaces = width - pad - name_len;
+
+				if (!clients->class[i].num_engines)
+					continue; /* Assert in the ideal world. */
+
+				if (pad < 0 || spaces < 0)
+					continue;
+
+				n_spaces(pad);
+				printf("%s", name);
+				n_spaces(spaces);
+				len += pad + name_len + spaces;
+			}
+		}
+
+		n_spaces(con_w - len);
+		printf("\033[0m\n");
+	} else {
+		if (clients->num_classes)
+			pops->open_struct("clients");
+	}
+
+	return lines;
+}
+
+static int
+print_client(struct client *c, struct engines *engines, double t, int lines,
+	     int con_w, int con_h, unsigned int period_us, int *class_w)
+{
+	struct clients *clients = c->clients;
+	unsigned int i;
+
+	if (output_mode == INTERACTIVE) {
+		printf("%6u %17s ", c->pid, c->name);
+
+		for (i = 0; i < clients->num_classes; i++) {
+			double pct;
+
+			if (!clients->class[i].num_engines)
+				continue; /* Assert in the ideal world. */
+
+			pct = (double)c->val[i] / period_us / 1e3 * 100 /
+			      clients->class[i].num_engines;
+
+			/*
+			 * Guard against possible time-drift between sampling
+			 * client data and time we obtained our time-delta from
+			 * PMU.
+			 */
+			if (pct > 100.0)
+				pct = 100.0;
+
+			print_percentage_bar(pct, *class_w);
+
+			lines++;
+		}
+
+		putchar('\n');
+	} else if (output_mode == JSON) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%u", c->id);
+		pops->open_struct(buf);
+
+		__json_add_member("name", c->name);
+
+		snprintf(buf, sizeof(buf), "%u", c->pid);
+		__json_add_member("pid", buf);
+
+		pops->open_struct("engine-classes");
+
+		for (i = 0; i < clients->num_classes; i++) {
+			double pct;
+
+			snprintf(buf, sizeof(buf), "%s",
+				 clients->class[i].name);
+			pops->open_struct(buf);
+
+			pct = (double)c->val[i] / period_us / 1e3 * 100;
+			snprintf(buf, sizeof(buf), "%f", pct);
+			__json_add_member("busy", buf);
+
+			__json_add_member("unit", "%");
+
+			pops->close_struct();
+		}
+
+		pops->close_struct();
+		pops->close_struct();
+	}
+
+	return lines;
+}
+
+static int
+print_clients_footer(struct clients *clients, double t,
+		     int lines, int con_w, int con_h)
+{
+	if (output_mode == INTERACTIVE) {
+		if (lines++ < con_h)
+			printf("\n");
+	} else {
+		if (clients->num_classes)
+			pops->close_struct();
+	}
+
+	return lines;
+}
+
 static bool stop_top;
 
 static void sigint_handler(int  sig)
@@ -1492,6 +1979,7 @@  static void process_stdin(unsigned int timeout_us)
 int main(int argc, char **argv)
 {
 	unsigned int period_us = DEFAULT_PERIOD_MS * 1000;
+	struct clients *clients = NULL;
 	int con_w = -1, con_h = -1;
 	char *output_path = NULL;
 	struct engines *engines;
@@ -1625,13 +2113,20 @@  int main(int argc, char **argv)
 
 	ret = EXIT_SUCCESS;
 
+	clients = init_clients(card.pci_slot_name[0] ? card.card : NULL);
+	init_engine_classes(engines);
+	clients->num_classes = engines->num_classes;
+	clients->class = engines->class;
+
 	pmu_sample(engines);
+	scan_clients(clients);
 	codename = igt_device_get_pretty_name(&card, false);
 
 	while (!stop_top) {
 		bool consumed = false;
-		int lines = 0;
+		int j, lines = 0;
 		struct winsize ws;
+		struct client *c;
 		double t;
 
 		/* Update terminal size. */
@@ -1650,10 +2145,18 @@  int main(int argc, char **argv)
 		pmu_sample(engines);
 		t = (double)(engines->ts.cur - engines->ts.prev) / 1e9;
 
+		scan_clients(clients);
+		if (clients) {
+			qsort(clients->client, clients->num_clients,
+			      sizeof(*clients->client), cmp);
+		}
+
 		if (stop_top)
 			break;
 
 		while (!consumed) {
+			pops->open_struct(NULL);
+
 			lines = print_header(&card, codename, engines,
 					     t, lines, con_w, con_h,
 					     &consumed);
@@ -1661,6 +2164,36 @@  int main(int argc, char **argv)
 			lines = print_imc(engines, t, lines, con_w, con_h);
 
 			lines = print_engines(engines, t, lines, con_w, con_h);
+
+			if (clients) {
+				int class_w;
+
+				lines = print_clients_header(clients, lines,
+							     con_w, con_h,
+							     &class_w);
+
+				for_each_client(clients, c, j) {
+					if (lines++ > con_h)
+						break;
+
+					assert(c->status != PROBE);
+					if (c->status != ALIVE)
+						break;
+
+					if (c->samples < 2)
+						continue;
+
+					lines = print_client(c, engines, t,
+							     lines, con_w,
+							     con_h, period_us,
+							     &class_w);
+				}
+
+				lines = print_clients_footer(clients, t, lines,
+							     con_w, con_h);
+			}
+
+			pops->close_struct();
 		}
 
 		if (stop_top)