[RFC,6/8] drm/i915: Expose per-engine client busyness
diff mbox series

Message ID 20200110133049.2705-7-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series
  • Per client engine busyness
Related show

Commit Message

Tvrtko Ursulin Jan. 10, 2020, 1:30 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Expose per-client and per-engine busyness under the previously added sysfs
client root.

The new files are one per-engine instance and located under the 'busy'
directory. Each contains a monotonically increasing nano-second resolution
times each client's jobs were executing on the GPU.

This enables userspace to create a top-like tool for GPU utilization:

==========================================================================
intel-gpu-top -  935/ 935 MHz;    0% RC6; 14.73 Watts;     1097 irqs/s

      IMC reads:     1401 MiB/s
     IMC writes:        4 MiB/s

          ENGINE      BUSY                                 MI_SEMA MI_WAIT
     Render/3D/0   63.73% |███████████████████           |      3%      0%
       Blitter/0    9.53% |██▊                           |      6%      0%
         Video/0   39.32% |███████████▊                  |     16%      0%
         Video/1   15.62% |████▋                         |      0%      0%
  VideoEnhance/0    0.00% |                              |      0%      0%

  PID            NAME     RCS          BCS          VCS         VECS
 4084        gem_wsim |█████▌     ||█          ||           ||           |
 4086        gem_wsim |█▌         ||           ||███        ||           |
==========================================================================

v2: Use intel_context_engine_get_busy_time.
v3: New directory structure.
v4: Rebase.
v5: sysfs_attr_init.
v6: Small tidy in i915_gem_add_client.
v7: Rebase to be engine class based.
v8:
 * Always enable stats.
 * Walk all client contexts.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drm_client.c | 127 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_drm_client.h |  13 +++
 2 files changed, 139 insertions(+), 1 deletion(-)

Comments

Chris Wilson Jan. 10, 2020, 1:58 p.m. UTC | #1
Quoting Tvrtko Ursulin (2020-01-10 13:30:47)
> +static ssize_t
> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
> +{
> +       struct i915_engine_busy_attribute *i915_attr =
> +               container_of(attr, typeof(*i915_attr), attr);
> +       struct list_head *list = &i915_attr->client->ctx_list;
> +       unsigned int engine_class = i915_attr->engine_class;
> +       struct i915_gem_context *ctx;
> +       u64 total = 0;
> +
> +       if (i915_attr->no_busy_stats)
> +               return -ENODEV;
> +
> +       rcu_read_lock();
> +       list_for_each_entry_rcu(ctx, list, client_link)
> +               total += sw_busy_add(ctx, engine_class);
> +       rcu_read_unlock();
> +
> +       return snprintf(buf, PAGE_SIZE, "%llu\n", total);
> +}
> +
> +static const char *uabi_class_names[] = {
> +       [I915_ENGINE_CLASS_RENDER] = "0",
> +       [I915_ENGINE_CLASS_COPY] = "1",
> +       [I915_ENGINE_CLASS_VIDEO] = "2",
> +       [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "3",
> +};

Hmm. /sys/class/drm/card0/clients/0/busy/0

Ok. I was worried this was 0/0 and so very bland and liable to clash
later.

> +
>  int
>  __i915_drm_client_register(struct i915_drm_client *client,
>                            struct task_struct *task)
>  {
>         struct i915_drm_clients *clients = client->clients;
> +       struct drm_i915_private *i915 =
> +               container_of(clients, typeof(*i915), clients);
> +       struct intel_engine_cs *engine;
>         struct device_attribute *attr;
> -       int ret = -ENOMEM;
> +       int i, ret = -ENOMEM;
>         char idstr[32];
>  
>         if (!clients->root)
> @@ -77,10 +130,71 @@ __i915_drm_client_register(struct i915_drm_client *client,
>         if (ret)
>                 goto err_attr;
>  
> +       if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
> +               client->busy_root =
> +                       kobject_create_and_add("busy", client->root);
> +               if (!client->busy_root)
> +                       goto err_attr;
> +
> +               for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
> +                       struct i915_engine_busy_attribute *i915_attr =
> +                               &client->attr.busy[i];


if (!intel_engine_lookup_user(i915, i, 0))
	continue;

i.e. skip if we don't have any engines of that class in the system.

> +
> +                       i915_attr->client = client;
> +                       i915_attr->engine_class = i;
> +
> +                       attr = &i915_attr->attr;
> +
> +                       sysfs_attr_init(&attr->attr);
> +
> +                       attr->attr.name = uabi_class_names[i];
> +                       attr->attr.mode = 0444;
> +                       attr->show = show_client_busy;
> +
> +                       ret = sysfs_create_file(client->busy_root,
> +                                               (struct attribute *)attr);
> +                       if (ret)
> +                               goto err_busy;
> +               }
> +
> +               /* Enable busy stats on all engines. */
> +               i = 0;
> +               for_each_uabi_engine(engine, i915) {
> +                       ret = intel_enable_engine_stats(engine);

Hmm. We gave it a global bit in 

	i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED.

That'll avoid having to do the individual checking and rollback.
-Chris
Tvrtko Ursulin Jan. 10, 2020, 2:09 p.m. UTC | #2
On 10/01/2020 13:58, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-01-10 13:30:47)
>> +static ssize_t
>> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
>> +{
>> +       struct i915_engine_busy_attribute *i915_attr =
>> +               container_of(attr, typeof(*i915_attr), attr);
>> +       struct list_head *list = &i915_attr->client->ctx_list;
>> +       unsigned int engine_class = i915_attr->engine_class;
>> +       struct i915_gem_context *ctx;
>> +       u64 total = 0;
>> +
>> +       if (i915_attr->no_busy_stats)
>> +               return -ENODEV;
>> +
>> +       rcu_read_lock();
>> +       list_for_each_entry_rcu(ctx, list, client_link)
>> +               total += sw_busy_add(ctx, engine_class);
>> +       rcu_read_unlock();
>> +
>> +       return snprintf(buf, PAGE_SIZE, "%llu\n", total);
>> +}
>> +
>> +static const char *uabi_class_names[] = {
>> +       [I915_ENGINE_CLASS_RENDER] = "0",
>> +       [I915_ENGINE_CLASS_COPY] = "1",
>> +       [I915_ENGINE_CLASS_VIDEO] = "2",
>> +       [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "3",
>> +};
> 
> Hmm. /sys/class/drm/card0/clients/0/busy/0
> 
> Ok. I was worried this was 0/0 and so very bland and liable to clash
> later.
> 
>> +
>>   int
>>   __i915_drm_client_register(struct i915_drm_client *client,
>>                             struct task_struct *task)
>>   {
>>          struct i915_drm_clients *clients = client->clients;
>> +       struct drm_i915_private *i915 =
>> +               container_of(clients, typeof(*i915), clients);
>> +       struct intel_engine_cs *engine;
>>          struct device_attribute *attr;
>> -       int ret = -ENOMEM;
>> +       int i, ret = -ENOMEM;
>>          char idstr[32];
>>   
>>          if (!clients->root)
>> @@ -77,10 +130,71 @@ __i915_drm_client_register(struct i915_drm_client *client,
>>          if (ret)
>>                  goto err_attr;
>>   
>> +       if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>> +               client->busy_root =
>> +                       kobject_create_and_add("busy", client->root);
>> +               if (!client->busy_root)
>> +                       goto err_attr;
>> +
>> +               for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
>> +                       struct i915_engine_busy_attribute *i915_attr =
>> +                               &client->attr.busy[i];
> 
> 
> if (!intel_engine_lookup_user(i915, i, 0))
> 	continue;
> 
> i.e. skip if we don't have any engines of that class in the system.

Yes, thanks.

>> +
>> +                       i915_attr->client = client;
>> +                       i915_attr->engine_class = i;
>> +
>> +                       attr = &i915_attr->attr;
>> +
>> +                       sysfs_attr_init(&attr->attr);
>> +
>> +                       attr->attr.name = uabi_class_names[i];
>> +                       attr->attr.mode = 0444;
>> +                       attr->show = show_client_busy;
>> +
>> +                       ret = sysfs_create_file(client->busy_root,
>> +                                               (struct attribute *)attr);
>> +                       if (ret)
>> +                               goto err_busy;
>> +               }
>> +
>> +               /* Enable busy stats on all engines. */
>> +               i = 0;
>> +               for_each_uabi_engine(engine, i915) {
>> +                       ret = intel_enable_engine_stats(engine);
> 
> Hmm. We gave it a global bit in
> 
> 	i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED.
> 
> That'll avoid having to do the individual checking and rollback.

I could add a top level check as a short circuit, but I prefer to check 
return code from intel_enable_engine_stats since it returns one.

Also if new GuC will have I915_SCHEDULER_CAP_ENABLED it will still fail 
to enable engine stats and then fallback to pphwsp has to happen.

Regards,

Tvrtko
Chris Wilson Jan. 10, 2020, 2:12 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-01-10 14:09:09)
> 
> On 10/01/2020 13:58, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-01-10 13:30:47)
> >> +static ssize_t
> >> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
> >> +{
> >> +       struct i915_engine_busy_attribute *i915_attr =
> >> +               container_of(attr, typeof(*i915_attr), attr);
> >> +       struct list_head *list = &i915_attr->client->ctx_list;
> >> +       unsigned int engine_class = i915_attr->engine_class;
> >> +       struct i915_gem_context *ctx;
> >> +       u64 total = 0;
> >> +
> >> +       if (i915_attr->no_busy_stats)
> >> +               return -ENODEV;
> >> +
> >> +       rcu_read_lock();
> >> +       list_for_each_entry_rcu(ctx, list, client_link)
> >> +               total += sw_busy_add(ctx, engine_class);
> >> +       rcu_read_unlock();
> >> +
> >> +       return snprintf(buf, PAGE_SIZE, "%llu\n", total);
> >> +}
> >> +
> >> +static const char *uabi_class_names[] = {
> >> +       [I915_ENGINE_CLASS_RENDER] = "0",
> >> +       [I915_ENGINE_CLASS_COPY] = "1",
> >> +       [I915_ENGINE_CLASS_VIDEO] = "2",
> >> +       [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "3",
> >> +};
> > 
> > Hmm. /sys/class/drm/card0/clients/0/busy/0
> > 
> > Ok. I was worried this was 0/0 and so very bland and liable to clash
> > later.
> > 
> >> +
> >>   int
> >>   __i915_drm_client_register(struct i915_drm_client *client,
> >>                             struct task_struct *task)
> >>   {
> >>          struct i915_drm_clients *clients = client->clients;
> >> +       struct drm_i915_private *i915 =
> >> +               container_of(clients, typeof(*i915), clients);
> >> +       struct intel_engine_cs *engine;
> >>          struct device_attribute *attr;
> >> -       int ret = -ENOMEM;
> >> +       int i, ret = -ENOMEM;
> >>          char idstr[32];
> >>   
> >>          if (!clients->root)
> >> @@ -77,10 +130,71 @@ __i915_drm_client_register(struct i915_drm_client *client,
> >>          if (ret)
> >>                  goto err_attr;
> >>   
> >> +       if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
> >> +               client->busy_root =
> >> +                       kobject_create_and_add("busy", client->root);
> >> +               if (!client->busy_root)
> >> +                       goto err_attr;
> >> +
> >> +               for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
> >> +                       struct i915_engine_busy_attribute *i915_attr =
> >> +                               &client->attr.busy[i];
> > 
> > 
> > if (!intel_engine_lookup_user(i915, i, 0))
> >       continue;
> > 
> > i.e. skip if we don't have any engines of that class in the system.
> 
> Yes, thanks.
> 
> >> +
> >> +                       i915_attr->client = client;
> >> +                       i915_attr->engine_class = i;
> >> +
> >> +                       attr = &i915_attr->attr;
> >> +
> >> +                       sysfs_attr_init(&attr->attr);
> >> +
> >> +                       attr->attr.name = uabi_class_names[i];
> >> +                       attr->attr.mode = 0444;
> >> +                       attr->show = show_client_busy;
> >> +
> >> +                       ret = sysfs_create_file(client->busy_root,
> >> +                                               (struct attribute *)attr);
> >> +                       if (ret)
> >> +                               goto err_busy;
> >> +               }
> >> +
> >> +               /* Enable busy stats on all engines. */
> >> +               i = 0;
> >> +               for_each_uabi_engine(engine, i915) {
> >> +                       ret = intel_enable_engine_stats(engine);
> > 
> > Hmm. We gave it a global bit in
> > 
> >       i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED.
> > 
> > That'll avoid having to do the individual checking and rollback.
> 
> I could add a top level check as a short circuit, but I prefer to check 
> return code from intel_enable_engine_stats since it returns one.

My suggestion was to remove the return code and make it bug out, as we
[can] check before use in i915_pmu.c as well.

> Also if new GuC will have I915_SCHEDULER_CAP_ENABLED it will still fail 
> to enable engine stats and then fallback to pphwsp has to happen.

Brainfart, CAP_SUPPORTS_STATS. 
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 195777b95891..55b2f86cc4c1 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -8,7 +8,11 @@ 
 #include <linux/slab.h>
 #include <linux/types.h>
 
+#include <uapi/drm/i915_drm.h>
+
 #include "i915_drm_client.h"
+#include "gem/i915_gem_context.h"
+#include "i915_drv.h"
 #include "i915_gem.h"
 #include "i915_utils.h"
 
@@ -36,13 +40,62 @@  show_client_pid(struct device *kdev, struct device_attribute *attr, char *buf)
 			client->closed ? ">" : "");
 }
 
+static u64
+sw_busy_add(struct i915_gem_context *ctx, unsigned int engine_class)
+{
+	struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	u64 total = 0;
+
+	for_each_gem_engine(ce, engines, it) {
+		if (ce->engine->uabi_class != engine_class)
+			continue;
+
+		total += ktime_to_ns(intel_context_get_busy_time(ce));
+	}
+
+	return total;
+}
+
+static ssize_t
+show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
+{
+	struct i915_engine_busy_attribute *i915_attr =
+		container_of(attr, typeof(*i915_attr), attr);
+	struct list_head *list = &i915_attr->client->ctx_list;
+	unsigned int engine_class = i915_attr->engine_class;
+	struct i915_gem_context *ctx;
+	u64 total = 0;
+
+	if (i915_attr->no_busy_stats)
+		return -ENODEV;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, list, client_link)
+		total += sw_busy_add(ctx, engine_class);
+	rcu_read_unlock();
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", total);
+}
+
+static const char *uabi_class_names[] = {
+	[I915_ENGINE_CLASS_RENDER] = "0",
+	[I915_ENGINE_CLASS_COPY] = "1",
+	[I915_ENGINE_CLASS_VIDEO] = "2",
+	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "3",
+};
+
 int
 __i915_drm_client_register(struct i915_drm_client *client,
 			   struct task_struct *task)
 {
 	struct i915_drm_clients *clients = client->clients;
+	struct drm_i915_private *i915 =
+		container_of(clients, typeof(*i915), clients);
+	struct intel_engine_cs *engine;
 	struct device_attribute *attr;
-	int ret = -ENOMEM;
+	int i, ret = -ENOMEM;
 	char idstr[32];
 
 	if (!clients->root)
@@ -77,10 +130,71 @@  __i915_drm_client_register(struct i915_drm_client *client,
 	if (ret)
 		goto err_attr;
 
+       if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
+		client->busy_root =
+			kobject_create_and_add("busy", client->root);
+		if (!client->busy_root)
+			goto err_attr;
+
+		for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+			struct i915_engine_busy_attribute *i915_attr =
+				&client->attr.busy[i];
+
+			i915_attr->client = client;
+			i915_attr->engine_class = i;
+
+			attr = &i915_attr->attr;
+
+			sysfs_attr_init(&attr->attr);
+
+			attr->attr.name = uabi_class_names[i];
+			attr->attr.mode = 0444;
+			attr->show = show_client_busy;
+
+			ret = sysfs_create_file(client->busy_root,
+						(struct attribute *)attr);
+			if (ret)
+				goto err_busy;
+		}
+
+		/* Enable busy stats on all engines. */
+		i = 0;
+		for_each_uabi_engine(engine, i915) {
+			ret = intel_enable_engine_stats(engine);
+			if (ret) {
+				int j, k;
+
+				/* Unwind if not available. */
+				j = 0;
+				for_each_uabi_engine(engine, i915) {
+					if (j++ == i)
+						break;
+
+					intel_disable_engine_stats(engine);
+				}
+
+				for (k = 0;
+				     k < ARRAY_SIZE(uabi_class_names);
+				     k++) {
+					GEM_WARN_ON(client->attr.busy[k].no_busy_stats);
+					client->attr.busy[k].no_busy_stats = true;
+				}
+
+				dev_notice_once(i915->drm.dev,
+						"Engine busy stats not available! (%d)",
+						ret);
+				break;
+			}
+			i++;
+		}
+       }
+
 	client->pid = get_task_pid(task, PIDTYPE_PID);
 
 	return 0;
 
+err_busy:
+	kobject_put(client->busy_root);
 err_attr:
 	kobject_put(client->root);
 err_client:
@@ -91,9 +205,20 @@  __i915_drm_client_register(struct i915_drm_client *client,
 
 void __i915_drm_client_unregister(struct i915_drm_client *client)
 {
+	struct i915_drm_clients *clients = client->clients;
+	struct drm_i915_private *i915 =
+		container_of(clients, typeof(*i915), clients);
+	struct intel_engine_cs *engine;
+
 	if (!client->name)
 		return; /* fbdev client or error during drm open */
 
+	if (client->busy_root && !client->attr.busy[0].no_busy_stats) {
+		for_each_uabi_engine(engine, i915)
+			intel_disable_engine_stats(engine);
+	}
+
+	kobject_put(fetch_and_zero(&client->busy_root));
 	kobject_put(fetch_and_zero(&client->root));
 	put_pid(fetch_and_zero(&client->pid));
 	kfree(fetch_and_zero(&client->name));
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 16d8db075a7d..4b4b9ea0abdf 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -17,11 +17,22 @@ 
 #include <linux/spinlock.h>
 #include <linux/xarray.h>
 
+#include "gt/intel_engine_types.h"
+
 struct i915_drm_clients {
 	struct xarray xarray;
 	struct kobject *root;
 };
 
+struct i915_drm_client;
+
+struct i915_engine_busy_attribute {
+	struct device_attribute attr;
+	struct i915_drm_client *client;
+	unsigned int engine_class;
+	bool no_busy_stats;
+};
+
 struct i915_drm_client {
 	struct kref kref;
 
@@ -38,9 +49,11 @@  struct i915_drm_client {
 	struct i915_drm_clients *clients;
 
 	struct kobject *root;
+	struct kobject *busy_root;
 	struct {
 		struct device_attribute pid;
 		struct device_attribute name;
+		struct i915_engine_busy_attribute busy[MAX_ENGINE_CLASS];
 	} attr;
 };