[08/10] drm/i915: Expose per-engine client busyness
diff mbox series

Message ID 20200318121138.909-9-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series
  • Per client engine busyness
Related show

Commit Message

Tvrtko Ursulin March 18, 2020, 12:11 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Expose per-client and per-engine busyness under the previously added sysfs
client root.

The new files are one per-engine instance and located under the 'busy'
directory. Each contains a monotonically increasing nano-second resolution
times each client's jobs were executing on the GPU.

This enables userspace to create a top-like tool for GPU utilization:

==========================================================================
intel-gpu-top -  935/ 935 MHz;    0% RC6; 14.73 Watts;     1097 irqs/s

      IMC reads:     1401 MiB/s
     IMC writes:        4 MiB/s

          ENGINE      BUSY                                 MI_SEMA MI_WAIT
     Render/3D/0   63.73% |███████████████████           |      3%      0%
       Blitter/0    9.53% |██▊                           |      6%      0%
         Video/0   39.32% |███████████▊                  |     16%      0%
         Video/1   15.62% |████▋                         |      0%      0%
  VideoEnhance/0    0.00% |                              |      0%      0%

  PID            NAME     RCS          BCS          VCS         VECS
 4084        gem_wsim |█████▌     ||█          ||           ||           |
 4086        gem_wsim |█▌         ||           ||███        ||           |
==========================================================================

v2: Use intel_context_engine_get_busy_time.
v3: New directory structure.
v4: Rebase.
v5: sysfs_attr_init.
v6: Small tidy in i915_gem_add_client.
v7: Rebase to be engine class based.
v8:
 * Always enable stats.
 * Walk all client contexts.
v9:
 * Skip unsupported engine classes. (Chris)
 * Use scheduler caps. (Chris)
v10:
 * Use pphwsp runtime only.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drm_client.c | 110 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_drm_client.h |  11 +++
 2 files changed, 120 insertions(+), 1 deletion(-)

Comments

Chris Wilson March 18, 2020, 2 p.m. UTC | #1
Quoting Tvrtko Ursulin (2020-03-18 12:11:37)
> +static u64
> +pphwsp_busy_add(struct i915_gem_context *ctx, unsigned int class)
> +{
> +       struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
> +       struct i915_gem_engines_iter it;
> +       struct intel_context *ce;
> +       u64 total = 0;
> +
> +       for_each_gem_engine(ce, engines, it) {
> +               if (ce->engine->uabi_class == class)
> +                       total += ce->runtime.total;
> +       }
> +
> +       return total;
> +}
> +
> +static ssize_t
> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
> +{
> +       struct i915_engine_busy_attribute *i915_attr =
> +               container_of(attr, typeof(*i915_attr), attr);
> +       unsigned int class = i915_attr->engine_class;
> +       struct i915_drm_client *client = i915_attr->client;
> +       u64 total = atomic64_read(&client->past_runtime[class]);
> +       struct list_head *list = &client->ctx_list;
> +       struct i915_gem_context *ctx;
> +
> +       rcu_read_lock();
> +       list_for_each_entry_rcu(ctx, list, client_link) {
> +               total += atomic64_read(&ctx->past_runtime[class]);
> +               total += pphwsp_busy_add(ctx, class);

Hmm. I would like to have some GEM context agnosticism here. At the
moment, all I have to offer is

struct client_runtime {
	struct list_head client_link;
	atomic64_t past_runtime;
	u64 (*cur_runtime)(struct client_runtime *);
};
-Chris
Tvrtko Ursulin March 18, 2020, 2:15 p.m. UTC | #2
On 18/03/2020 14:00, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-03-18 12:11:37)
>> +static u64
>> +pphwsp_busy_add(struct i915_gem_context *ctx, unsigned int class)
>> +{
>> +       struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
>> +       struct i915_gem_engines_iter it;
>> +       struct intel_context *ce;
>> +       u64 total = 0;
>> +
>> +       for_each_gem_engine(ce, engines, it) {
>> +               if (ce->engine->uabi_class == class)
>> +                       total += ce->runtime.total;
>> +       }
>> +
>> +       return total;
>> +}
>> +
>> +static ssize_t
>> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
>> +{
>> +       struct i915_engine_busy_attribute *i915_attr =
>> +               container_of(attr, typeof(*i915_attr), attr);
>> +       unsigned int class = i915_attr->engine_class;
>> +       struct i915_drm_client *client = i915_attr->client;
>> +       u64 total = atomic64_read(&client->past_runtime[class]);
>> +       struct list_head *list = &client->ctx_list;
>> +       struct i915_gem_context *ctx;
>> +
>> +       rcu_read_lock();
>> +       list_for_each_entry_rcu(ctx, list, client_link) {
>> +               total += atomic64_read(&ctx->past_runtime[class]);
>> +               total += pphwsp_busy_add(ctx, class);
> 
> Hmm. I would like to have some GEM context agnosticism here. At the
> moment, all I have to offer is
> 
> struct client_runtime {
> 	struct list_head client_link;
> 	atomic64_t past_runtime;
> 	u64 (*cur_runtime)(struct client_runtime *);
> };

What exactly do you mean here? Who keeps a list and of what and what 
does the vfunc do?

Regards,

Tvrtko
Chris Wilson March 18, 2020, 2:34 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-03-18 14:15:31)
> 
> On 18/03/2020 14:00, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-03-18 12:11:37)
> >> +static u64
> >> +pphwsp_busy_add(struct i915_gem_context *ctx, unsigned int class)
> >> +{
> >> +       struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
> >> +       struct i915_gem_engines_iter it;
> >> +       struct intel_context *ce;
> >> +       u64 total = 0;
> >> +
> >> +       for_each_gem_engine(ce, engines, it) {
> >> +               if (ce->engine->uabi_class == class)
> >> +                       total += ce->runtime.total;
> >> +       }
> >> +
> >> +       return total;
> >> +}
> >> +
> >> +static ssize_t
> >> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
> >> +{
> >> +       struct i915_engine_busy_attribute *i915_attr =
> >> +               container_of(attr, typeof(*i915_attr), attr);
> >> +       unsigned int class = i915_attr->engine_class;
> >> +       struct i915_drm_client *client = i915_attr->client;
> >> +       u64 total = atomic64_read(&client->past_runtime[class]);
> >> +       struct list_head *list = &client->ctx_list;
> >> +       struct i915_gem_context *ctx;
> >> +
> >> +       rcu_read_lock();
> >> +       list_for_each_entry_rcu(ctx, list, client_link) {
> >> +               total += atomic64_read(&ctx->past_runtime[class]);
> >> +               total += pphwsp_busy_add(ctx, class);
> > 
> > Hmm. I would like to have some GEM context agnosticism here. At the
> > moment, all I have to offer is
> > 
> > struct client_runtime {
> >       struct list_head client_link;
> >       atomic64_t past_runtime;
> >       u64 (*cur_runtime)(struct client_runtime *);
> > };
> 
> What exactly do you mean here? Who keeps a list and of what and what 
> does the vfunc do?

This is what you've added to GEM context. Here in i915/i915_drm_client.c
we shouldn't have to know about i915/gem/i915_gem_context internals. So
the GEM context registers its client_runtime (by coupling that into the
list).
-Chris
Tvrtko Ursulin April 15, 2020, 10:19 a.m. UTC | #4
On 18/03/2020 14:34, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-03-18 14:15:31)
>>
>> On 18/03/2020 14:00, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-03-18 12:11:37)
>>>> +static u64
>>>> +pphwsp_busy_add(struct i915_gem_context *ctx, unsigned int class)
>>>> +{
>>>> +       struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
>>>> +       struct i915_gem_engines_iter it;
>>>> +       struct intel_context *ce;
>>>> +       u64 total = 0;
>>>> +
>>>> +       for_each_gem_engine(ce, engines, it) {
>>>> +               if (ce->engine->uabi_class == class)
>>>> +                       total += ce->runtime.total;
>>>> +       }
>>>> +
>>>> +       return total;
>>>> +}
>>>> +
>>>> +static ssize_t
>>>> +show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
>>>> +{
>>>> +       struct i915_engine_busy_attribute *i915_attr =
>>>> +               container_of(attr, typeof(*i915_attr), attr);
>>>> +       unsigned int class = i915_attr->engine_class;
>>>> +       struct i915_drm_client *client = i915_attr->client;
>>>> +       u64 total = atomic64_read(&client->past_runtime[class]);
>>>> +       struct list_head *list = &client->ctx_list;
>>>> +       struct i915_gem_context *ctx;
>>>> +
>>>> +       rcu_read_lock();
>>>> +       list_for_each_entry_rcu(ctx, list, client_link) {
>>>> +               total += atomic64_read(&ctx->past_runtime[class]);
>>>> +               total += pphwsp_busy_add(ctx, class);
>>>
>>> Hmm. I would like to have some GEM context agnosticism here. At the
>>> moment, all I have to offer is
>>>
>>> struct client_runtime {
>>>        struct list_head client_link;
>>>        atomic64_t past_runtime;
>>>        u64 (*cur_runtime)(struct client_runtime *);
>>> };
>>
>> What exactly do you mean here? Who keeps a list and of what and what
>> does the vfunc do?
> 
> This is what you've added to GEM context. Here in i915/i915_drm_client.c
> we shouldn't have to know about i915/gem/i915_gem_context internals. So
> the GEM context registers its client_runtime (by coupling that into the
> list).

I've ignored this for now as it was feeling a bit too much for too 
little benefit and just decided to send out another simple rebase, given 
that there seems to be some urgency around this work now.

Regards,

Tvrtko

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index c88d9ff448e0..0a2d933fe83c 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -9,8 +9,13 @@ 
 
 #include <drm/drm_print.h>
 
+#include <uapi/drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_drm_client.h"
+#include "gem/i915_gem_context.h"
+#include "gt/intel_engine_user.h"
+#include "i915_drv.h"
 #include "i915_gem.h"
 #include "i915_utils.h"
 
@@ -52,6 +57,104 @@  show_client_pid(struct device *kdev, struct device_attribute *attr, char *buf)
 	return ret;
 }
 
+static u64
+pphwsp_busy_add(struct i915_gem_context *ctx, unsigned int class)
+{
+	struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	u64 total = 0;
+
+	for_each_gem_engine(ce, engines, it) {
+		if (ce->engine->uabi_class == class)
+			total += ce->runtime.total;
+	}
+
+	return total;
+}
+
+static ssize_t
+show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
+{
+	struct i915_engine_busy_attribute *i915_attr =
+		container_of(attr, typeof(*i915_attr), attr);
+	unsigned int class = i915_attr->engine_class;
+	struct i915_drm_client *client = i915_attr->client;
+	u64 total = atomic64_read(&client->past_runtime[class]);
+	struct list_head *list = &client->ctx_list;
+	struct i915_gem_context *ctx;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, list, client_link) {
+		total += atomic64_read(&ctx->past_runtime[class]);
+		total += pphwsp_busy_add(ctx, class);
+	}
+	rcu_read_unlock();
+
+	total *= RUNTIME_INFO(i915_attr->i915)->cs_timestamp_period_ns;
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", total);
+}
+
+static const char * const uabi_class_names[] = {
+	[I915_ENGINE_CLASS_RENDER] = "0",
+	[I915_ENGINE_CLASS_COPY] = "1",
+	[I915_ENGINE_CLASS_VIDEO] = "2",
+	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = "3",
+};
+
+static int
+__client_register_sysfs_busy(struct i915_drm_client *client)
+{
+	struct i915_drm_clients *clients = client->clients;
+	struct drm_i915_private *i915 =
+		container_of(clients, typeof(*i915), clients);
+	unsigned int i;
+	int ret = 0;
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+		return 0;
+
+	client->busy_root = kobject_create_and_add("busy", client->root);
+	if (!client->busy_root)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+		struct i915_engine_busy_attribute *i915_attr =
+			&client->attr.busy[i];
+		struct device_attribute *attr = &i915_attr->attr;
+
+		if (!intel_engine_lookup_user(i915, i, 0))
+			continue;
+
+		i915_attr->client = client;
+		i915_attr->i915 = i915;
+		i915_attr->engine_class = i;
+
+		sysfs_attr_init(&attr->attr);
+
+		attr->attr.name = uabi_class_names[i];
+		attr->attr.mode = 0444;
+		attr->show = show_client_busy;
+
+		ret = sysfs_create_file(client->busy_root,
+					(struct attribute *)attr);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	kobject_put(client->busy_root);
+	return ret;
+}
+
+static void __client_unregister_sysfs_busy(struct i915_drm_client *client)
+{
+	kobject_put(fetch_and_zero(&client->busy_root));
+}
+
 static int
 __client_register_sysfs(struct i915_drm_client *client)
 {
@@ -88,9 +191,12 @@  __client_register_sysfs(struct i915_drm_client *client)
 
 		ret = sysfs_create_file(client->root, (struct attribute *)attr);
 		if (ret)
-			break;
+			goto out;
 	}
 
+	ret = __client_register_sysfs_busy(client);
+
+out:
 	if (ret)
 		kobject_put(client->root);
 
@@ -99,6 +205,8 @@  __client_register_sysfs(struct i915_drm_client *client)
 
 static void __client_unregister_sysfs(struct i915_drm_client *client)
 {
+	__client_unregister_sysfs_busy(client);
+
 	kobject_put(fetch_and_zero(&client->root));
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 0be27aa9bbda..98c5fc24fcb4 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -26,6 +26,15 @@  struct i915_drm_clients {
 	struct kobject *root;
 };
 
+struct i915_drm_client;
+
+struct i915_engine_busy_attribute {
+	struct device_attribute attr;
+	struct drm_i915_private *i915;
+	struct i915_drm_client *client;
+	unsigned int engine_class;
+};
+
 struct i915_drm_client {
 	struct kref kref;
 
@@ -44,9 +53,11 @@  struct i915_drm_client {
 	struct i915_drm_clients *clients;
 
 	struct kobject *root;
+	struct kobject *busy_root;
 	struct {
 		struct device_attribute pid;
 		struct device_attribute name;
+		struct i915_engine_busy_attribute busy[MAX_ENGINE_CLASS + 1];
 	} attr;
 
 	/**