[2/5] drm/i915: Expose list of clients in sysfs
diff mbox series

Message ID 20191216120704.958-3-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series
  • Per client engine busyness
Related show

Commit Message

Tvrtko Ursulin Dec. 16, 2019, 12:07 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Expose a list of clients with open file handles in sysfs.

This will be a basis for a top-like utility showing per-client and per-
engine GPU load.

Currently we only expose each client's pid and name under opaque numbered
directories in /sys/class/drm/card0/clients/.

For instance:

/sys/class/drm/card0/clients/3/name: Xorg
/sys/class/drm/card0/clients/3/pid: 5664

v2:
 Chris Wilson:
 * Enclose new members into dedicated structs.
 * Protect against failed sysfs registration.

v3:
 * sysfs_attr_init.

v4:
 * Fix for internal clients.

v5:
 * Use cyclic ida for client id. (Chris)
 * Do not leak pid reference. (Chris)
 * Tidy code with some locals.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h   |  21 +++++
 drivers/gpu/drm/i915/i915_gem.c   | 148 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_sysfs.c |   8 ++
 3 files changed, 167 insertions(+), 10 deletions(-)

Comments

Chris Wilson Dec. 16, 2019, 12:51 p.m. UTC | #1
Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Expose a list of clients with open file handles in sysfs.
> 
> This will be a basis for a top-like utility showing per-client and per-
> engine GPU load.
> 
> Currently we only expose each client's pid and name under opaque numbered
> directories in /sys/class/drm/card0/clients/.
> 
> For instance:
> 
> /sys/class/drm/card0/clients/3/name: Xorg
> /sys/class/drm/card0/clients/3/pid: 5664
> 
> v2:
>  Chris Wilson:
>  * Enclose new members into dedicated structs.
>  * Protect against failed sysfs registration.
> 
> v3:
>  * sysfs_attr_init.
> 
> v4:
>  * Fix for internal clients.
> 
> v5:
>  * Use cyclic ida for client id. (Chris)

I think we are now in the age of xa_alloc_cyclic(). At least the
immediate benefit is that we don't have to worry about the ida locking.
-Chris
Chris Wilson Dec. 16, 2019, 12:53 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0781b6326b8c..9fcbcb6d6f76 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -224,6 +224,20 @@ struct drm_i915_file_private {
>         /** ban_score: Accumulated score of all ctx bans and fast hangs. */
>         atomic_t ban_score;
>         unsigned long hang_timestamp;
> +
> +       struct i915_drm_client {
> +               unsigned int id;
> +
> +               struct pid *pid;
> +               char *name;

Hmm. Should we scrap i915_gem_context.pid and just use the client.pid?

> +
> +               struct kobject *root;
> +
> +               struct {
> +                       struct device_attribute pid;
> +                       struct device_attribute name;
> +               } attr;
> +       } client;
>  };
Chris Wilson Dec. 16, 2019, 12:55 p.m. UTC | #3
Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
> +static void i915_gem_remove_client(struct drm_i915_file_private *file_priv)
> +{
> +       struct i915_drm_clients *clients = &file_priv->dev_priv->clients;
> +       struct i915_drm_client *client = &file_priv->client;
> +
> +       if (!client->name)
> +               return; /* intel_fbdev_init registers a client before sysfs */
> +
> +       sysfs_remove_file(client->root, (struct attribute *)&client->attr.pid);
> +       sysfs_remove_file(client->root, (struct attribute *)&client->attr.name);
> +       kobject_put(client->root);

Do we need to remove individual files if we unplug the root?
sysfs_remove_dir(client->root) ?
-Chris
Tvrtko Ursulin Dec. 16, 2019, 1:13 p.m. UTC | #4
On 16/12/2019 12:53, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 0781b6326b8c..9fcbcb6d6f76 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -224,6 +224,20 @@ struct drm_i915_file_private {
>>          /** ban_score: Accumulated score of all ctx bans and fast hangs. */
>>          atomic_t ban_score;
>>          unsigned long hang_timestamp;
>> +
>> +       struct i915_drm_client {
>> +               unsigned int id;
>> +
>> +               struct pid *pid;
>> +               char *name;
> 
> Hmm. Should we scrap i915_gem_context.pid and just use the client.pid?

I guess so, did not realize we already keep a reference.

Regards,

Tvrtko

>> +
>> +               struct kobject *root;
>> +
>> +               struct {
>> +                       struct device_attribute pid;
>> +                       struct device_attribute name;
>> +               } attr;
>> +       } client;
>>   };
>
Tvrtko Ursulin Dec. 16, 2019, 1:16 p.m. UTC | #5
On 16/12/2019 12:55, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
>> +static void i915_gem_remove_client(struct drm_i915_file_private *file_priv)
>> +{
>> +       struct i915_drm_clients *clients = &file_priv->dev_priv->clients;
>> +       struct i915_drm_client *client = &file_priv->client;
>> +
>> +       if (!client->name)
>> +               return; /* intel_fbdev_init registers a client before sysfs */
>> +
>> +       sysfs_remove_file(client->root, (struct attribute *)&client->attr.pid);
>> +       sysfs_remove_file(client->root, (struct attribute *)&client->attr.name);
>> +       kobject_put(client->root);
> 
> Do we need to remove individual files if we unplug the root?
> sysfs_remove_dir(client->root) ?

Kerneldoc indeed suggests this should work. Will try.

Regards,

Tvrtko
Chris Wilson Dec. 16, 2019, 1:17 p.m. UTC | #6
Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Expose a list of clients with open file handles in sysfs.
> 
> This will be a basis for a top-like utility showing per-client and per-
> engine GPU load.
> 
> Currently we only expose each client's pid and name under opaque numbered
> directories in /sys/class/drm/card0/clients/.
> 
> For instance:
> 
> /sys/class/drm/card0/clients/3/name: Xorg
> /sys/class/drm/card0/clients/3/pid: 5664

Should we even bother having the name here? And just have a link to pid
instead? Contemplating even pidfd for ultramodern.
-Chris
Tvrtko Ursulin Dec. 16, 2019, 1:28 p.m. UTC | #7
On 16/12/2019 13:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Expose a list of clients with open file handles in sysfs.
>>
>> This will be a basis for a top-like utility showing per-client and per-
>> engine GPU load.
>>
>> Currently we only expose each client's pid and name under opaque numbered
>> directories in /sys/class/drm/card0/clients/.
>>
>> For instance:
>>
>> /sys/class/drm/card0/clients/3/name: Xorg
>> /sys/class/drm/card0/clients/3/pid: 5664
> 
> Should we even bother having the name here? And just have a link to pid
> instead? Contemplating even pidfd for ultramodern.

I haven't looked at what symlink creation facilities sysfs would allow. 
But even then, I don't see how we could link to proc from sysfs.

I had a quick read on pidfd and don't see how it fits. What did you have 
in mind?

Regards,

Tvrtko
Chris Wilson Dec. 16, 2019, 1:41 p.m. UTC | #8
Quoting Tvrtko Ursulin (2019-12-16 13:28:18)
> 
> On 16/12/2019 13:17, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Expose a list of clients with open file handles in sysfs.
> >>
> >> This will be a basis for a top-like utility showing per-client and per-
> >> engine GPU load.
> >>
> >> Currently we only expose each client's pid and name under opaque numbered
> >> directories in /sys/class/drm/card0/clients/.
> >>
> >> For instance:
> >>
> >> /sys/class/drm/card0/clients/3/name: Xorg
> >> /sys/class/drm/card0/clients/3/pid: 5664
> > 
> > Should we even bother having the name here? And just have a link to pid
> > instead? Contemplating even pidfd for ultramodern.
> 
> I haven't looked at what symlink creation facilities sysfs would allow. 
> But even then, I don't see how we could link to proc from sysfs.
> 
> I had a quick read on pidfd and don't see how it fits. What did you have 
> in mind?

Just thinking if we should do something like 
	pidfd = open(/.../clients/3/pid);

Ok, looking at pidfd_fops, it doesn't provide anything useful like being
able to safely acquire the pid->comm. Shame.
-Chris
Tvrtko Ursulin Dec. 16, 2019, 6:34 p.m. UTC | #9
On 16/12/2019 12:51, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Expose a list of clients with open file handles in sysfs.
>>
>> This will be a basis for a top-like utility showing per-client and per-
>> engine GPU load.
>>
>> Currently we only expose each client's pid and name under opaque numbered
>> directories in /sys/class/drm/card0/clients/.
>>
>> For instance:
>>
>> /sys/class/drm/card0/clients/3/name: Xorg
>> /sys/class/drm/card0/clients/3/pid: 5664
>>
>> v2:
>>   Chris Wilson:
>>   * Enclose new members into dedicated structs.
>>   * Protect against failed sysfs registration.
>>
>> v3:
>>   * sysfs_attr_init.
>>
>> v4:
>>   * Fix for internal clients.
>>
>> v5:
>>   * Use cyclic ida for client id. (Chris)
> 
> I think we are now in the age of xa_alloc_cyclic(). At least the
> immediate benefit is that we don't have to worry about the ida locking.

Also spin locks and GFP_KERNEL in the current patch do not mix well. Use 
with caution until I send the updated version out.

Regards,

Tvrtko
Tvrtko Ursulin Dec. 17, 2019, 5:21 p.m. UTC | #10
On 16/12/2019 12:53, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 0781b6326b8c..9fcbcb6d6f76 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -224,6 +224,20 @@ struct drm_i915_file_private {
>>          /** ban_score: Accumulated score of all ctx bans and fast hangs. */
>>          atomic_t ban_score;
>>          unsigned long hang_timestamp;
>> +
>> +       struct i915_drm_client {
>> +               unsigned int id;
>> +
>> +               struct pid *pid;
>> +               char *name;
> 
> Hmm. Should we scrap i915_gem_context.pid and just use the client.pid?

Or maybe leave as it so I don't have to worry about ctx vs client 
lifetime. In other words places where we access ctx->pid and the client 
is maybe long gone. I don't want to ref count clients, or maybe I do.. 
hmm.. keeping GPU load of a client which exited and left work running 
visible?

Regards,

Tvrtko

>> +
>> +               struct kobject *root;
>> +
>> +               struct {
>> +                       struct device_attribute pid;
>> +                       struct device_attribute name;
>> +               } attr;
>> +       } client;
>>   };
>
Chris Wilson Dec. 17, 2019, 5:26 p.m. UTC | #11
Quoting Tvrtko Ursulin (2019-12-17 17:21:28)
> 
> On 16/12/2019 12:53, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-12-16 12:07:01)
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 0781b6326b8c..9fcbcb6d6f76 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -224,6 +224,20 @@ struct drm_i915_file_private {
> >>          /** ban_score: Accumulated score of all ctx bans and fast hangs. */
> >>          atomic_t ban_score;
> >>          unsigned long hang_timestamp;
> >> +
> >> +       struct i915_drm_client {
> >> +               unsigned int id;
> >> +
> >> +               struct pid *pid;
> >> +               char *name;
> > 
> > Hmm. Should we scrap i915_gem_context.pid and just use the client.pid?
> 
> Or maybe leave as it so I don't have to worry about ctx vs client 
> lifetime. In other words places where we access ctx->pid and the client 
> is maybe long gone. I don't want to ref count clients, or maybe I do.. 
> hmm.. keeping GPU load of a client which exited and left work running 
> visible?

Yeah. If we don't and all the GPU time is being hogged by zombies, users
of the interface will not be impressed they can't identify those. Next
up, kill(client_id, SIGKILL).
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0781b6326b8c..9fcbcb6d6f76 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -224,6 +224,20 @@  struct drm_i915_file_private {
 	/** ban_score: Accumulated score of all ctx bans and fast hangs. */
 	atomic_t ban_score;
 	unsigned long hang_timestamp;
+
+	struct i915_drm_client {
+		unsigned int id;
+
+		struct pid *pid;
+		char *name;
+
+		struct kobject *root;
+
+		struct {
+			struct device_attribute pid;
+			struct device_attribute name;
+		} attr;
+	} client;
 };
 
 /* Interface history:
@@ -1278,6 +1292,13 @@  struct drm_i915_private {
 
 	struct i915_pmu pmu;
 
+	struct i915_drm_clients {
+		spinlock_t idr_lock;
+		struct idr idr;
+
+		struct kobject *root;
+	} clients;
+
 	struct i915_hdcp_comp_master *hdcp_master;
 	bool hdcp_comp_added;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5eeef1ef7448..a65cd7e1ce7b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1457,11 +1457,14 @@  static void i915_gem_init__mm(struct drm_i915_private *i915)
 	i915_gem_init__objects(i915);
 }
 
-void i915_gem_init_early(struct drm_i915_private *dev_priv)
+void i915_gem_init_early(struct drm_i915_private *i915)
 {
-	i915_gem_init__mm(dev_priv);
+	i915_gem_init__mm(i915);
 
-	spin_lock_init(&dev_priv->fb_tracking.lock);
+	spin_lock_init(&i915->fb_tracking.lock);
+
+	spin_lock_init(&i915->clients.idr_lock);
+	idr_init(&i915->clients.idr);
 }
 
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
@@ -1518,6 +1521,106 @@  int i915_gem_freeze_late(struct drm_i915_private *i915)
 	return 0;
 }
 
+static ssize_t
+show_client_name(struct device *kdev, struct device_attribute *attr, char *buf)
+{
+	struct drm_i915_file_private *file_priv =
+		container_of(attr, struct drm_i915_file_private,
+			     client.attr.name);
+
+	return snprintf(buf, PAGE_SIZE, "%s", file_priv->client.name);
+}
+
+static ssize_t
+show_client_pid(struct device *kdev, struct device_attribute *attr, char *buf)
+{
+	struct drm_i915_file_private *file_priv =
+		container_of(attr, struct drm_i915_file_private,
+			     client.attr.pid);
+
+	return snprintf(buf, PAGE_SIZE, "%u", pid_nr(file_priv->client.pid));
+}
+
+static int
+i915_gem_add_client(struct drm_i915_private *i915,
+		struct drm_i915_file_private *file_priv,
+		struct task_struct *task,
+		unsigned int serial)
+{
+	struct i915_drm_client *client = &file_priv->client;
+	struct i915_drm_clients *clients = &i915->clients;
+	struct device_attribute *attr;
+	int ret = -ENOMEM;
+	char id[32];
+
+	if (!clients->root)
+		return 0; /* intel_fbdev_init registers a client before sysfs */
+
+	client->name = kstrdup(task->comm, GFP_KERNEL);
+	if (!client->name)
+		goto err_name;
+
+	snprintf(id, sizeof(id), "%u", serial);
+	client->root = kobject_create_and_add(id, clients->root);
+	if (!client->root)
+		goto err_client;
+
+	attr = &client->attr.name;
+	sysfs_attr_init(&attr->attr);
+	attr->attr.name = "name";
+	attr->attr.mode = 0444;
+	attr->show = show_client_name;
+
+	ret = sysfs_create_file(client->root, (struct attribute *)attr);
+	if (ret)
+		goto err_attr_name;
+
+	attr = &client->attr.pid;
+	sysfs_attr_init(&attr->attr);
+	attr->attr.name = "pid";
+	attr->attr.mode = 0444;
+	attr->show = show_client_pid;
+
+	ret = sysfs_create_file(client->root, (struct attribute *)attr);
+	if (ret)
+		goto err_attr_pid;
+
+	client->id = serial;
+	client->pid = get_task_pid(task, PIDTYPE_PID);
+
+	return 0;
+
+err_attr_pid:
+	sysfs_remove_file(client->root, (struct attribute *)&client->attr.name);
+err_attr_name:
+	kobject_put(client->root);
+err_client:
+	kfree(client->name);
+err_name:
+	return ret;
+}
+
+static void i915_gem_remove_client(struct drm_i915_file_private *file_priv)
+{
+	struct i915_drm_clients *clients = &file_priv->dev_priv->clients;
+	struct i915_drm_client *client = &file_priv->client;
+
+	if (!client->name)
+		return; /* intel_fbdev_init registers a client before sysfs */
+
+	sysfs_remove_file(client->root, (struct attribute *)&client->attr.pid);
+	sysfs_remove_file(client->root, (struct attribute *)&client->attr.name);
+	kobject_put(client->root);
+
+	spin_lock(&clients->idr_lock);
+	idr_remove(&clients->idr, client->id);
+	spin_unlock(&clients->idr_lock);
+
+	put_pid(client->pid);
+
+	kfree(client->name);
+}
+
 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
@@ -1531,33 +1634,58 @@  void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
 		request->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
+
+	i915_gem_remove_client(file_priv);
 }
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 {
+	struct i915_drm_clients *clients = &i915->clients;
 	struct drm_i915_file_private *file_priv;
-	int ret;
+	int ret = -ENOMEM;
+	int id;
 
 	DRM_DEBUG("\n");
 
 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
 	if (!file_priv)
-		return -ENOMEM;
+		goto err_alloc;
+
+	spin_lock(&clients->idr_lock);
+	id = idr_alloc_cyclic(&clients->idr, file_priv, 0, -1, GFP_KERNEL);
+	spin_unlock(&clients->idr_lock);
+	if (id < 0)
+		goto err_alloc;
+
+	ret = i915_gem_add_client(i915, file_priv, current, id);
+	if (ret)
+		goto err_client;
 
 	file->driver_priv = file_priv;
+	ret = i915_gem_context_open(i915, file);
+	if (ret)
+		goto err_context;
+
 	file_priv->dev_priv = i915;
 	file_priv->file = file;
+	file_priv->bsd_engine = -1;
+	file_priv->hang_timestamp = jiffies;
 
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
 
-	file_priv->bsd_engine = -1;
-	file_priv->hang_timestamp = jiffies;
+	return 0;
 
-	ret = i915_gem_context_open(i915, file);
-	if (ret)
-		kfree(file_priv);
+err_context:
+	i915_gem_remove_client(file_priv);
+
+err_client:
+	spin_lock(&clients->idr_lock);
+	idr_remove(&clients->idr, id);
+	spin_unlock(&clients->idr_lock);
+	kfree(file_priv);
 
+err_alloc:
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index ad2b1b833d7b..3ab50e29fddf 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -559,6 +559,11 @@  void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 	struct device *kdev = dev_priv->drm.primary->kdev;
 	int ret;
 
+	dev_priv->clients.root =
+		kobject_create_and_add("clients", &kdev->kobj);
+	if (!dev_priv->clients.root)
+		DRM_ERROR("Per-client sysfs setup failed\n");
+
 #ifdef CONFIG_PM
 	if (HAS_RC6(dev_priv)) {
 		ret = sysfs_merge_group(&kdev->kobj,
@@ -619,4 +624,7 @@  void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 	sysfs_unmerge_group(&kdev->kobj, &rc6_attr_group);
 	sysfs_unmerge_group(&kdev->kobj, &rc6p_attr_group);
 #endif
+
+	if (dev_priv->clients.root)
+		kobject_put(dev_priv->clients.root);
 }