diff mbox

[RFC,3/3] drm/i915: Export engine busy stats in debugfs

Message ID 20170509140936.19060-4-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin May 9, 2017, 2:09 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Export the stats added in the previous patch in debugfs.

Number of active clients reading this data is tracked and the
static key is only enabled whilst there are some.

Userspace is intended to keep the file descriptor open, seeking
to the beginning of the file periodically, and re-reading the
stats.

This is because the underlying implementation is costly on every
first open/last close transition, and also, because the stats
exported mostly make sense when they are considered relative to
the previous sample.

File lists nanoseconds each engine was active since the tracking
has started.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 120 ++++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)

Comments

Rogozhkin, Dmitry V May 9, 2017, 6:17 p.m. UTC | #1
On 5/9/2017 7:09 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Export the stats added in the previous patch in debugfs.
>
> Number of active clients reading this data is tracked and the
> static key is only enabled whilst there are some.
>
> Userspace is intended to keep the file descriptor open, seeking
> to the beginning of the file periodically, and re-reading the
> stats.
>
> This is because the underlying implementation is costly on every
> first open/last close transition, and also, because the stats
> exported mostly make sense when they are considered relative to
> the previous sample.
>
> File lists nanoseconds each engine was active since the tracking
> has started.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 120 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 120 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 1003511f28cc..db588ef858cb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4752,6 +4752,120 @@ static const struct file_operations i915_hpd_storm_ctl_fops = {
>   	.write = i915_hpd_storm_ctl_write
>   };
>   
> +DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
> +static DEFINE_MUTEX(i915_engine_stats_mutex);
> +static int i915_engine_stats_ref;
> +
> +struct i915_engine_stats_buf {
> +	unsigned int len;
> +	size_t available;
> +	char buf[0];
> +};
> +
> +static int i915_engine_stats_open(struct inode *inode, struct file *file)
> +{
> +	const unsigned int engine_name_len =
> +		sizeof(((struct intel_engine_cs *)0)->name);
> +	struct i915_engine_stats_buf *buf;
> +	const unsigned int buf_size =
> +		(engine_name_len + 2 + 19 + 1) * I915_NUM_ENGINES + 1 +
> +		sizeof(*buf);
> +	int ret;
> +
> +	buf = kzalloc(buf_size, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	buf->len = buf_size;
> +	file->private_data = buf;
> +
> +	ret = mutex_lock_interruptible(&i915_engine_stats_mutex);
> +	if (ret)
> +		return ret;
> +
> +	if (i915_engine_stats_ref++ == 0) {
> +		struct drm_i915_private *dev_priv = file->f_inode->i_private;
> +		struct intel_engine_cs *engine;
> +		enum intel_engine_id id;
> +
> +		for_each_engine(engine, dev_priv, id) {
> +			memset(&engine->stats, 0, sizeof(engine->stats));
> +			spin_lock_init(&engine->stats.lock);
> +		}
> +
> +		static_branch_enable(&i915_engine_stats_key);
> +	}
> +
> +	mutex_unlock(&i915_engine_stats_mutex);
> +
> +	return 0;
> +}
> +
> +static int i915_engine_stats_release(struct inode *inode, struct file *file)
> +{
> +	mutex_lock(&i915_engine_stats_mutex);
> +	if (--i915_engine_stats_ref == 0)
> +		static_branch_disable(&i915_engine_stats_key);
> +	mutex_unlock(&i915_engine_stats_mutex);
> +
> +	kfree(file->private_data);
> +
> +	return 0;
> +}
> +
> +static ssize_t i915_engine_stats_read(struct file *file, char __user *ubuf,
> +				      size_t count, loff_t *pos)
> +{
> +	struct i915_engine_stats_buf *buf =
> +		(struct i915_engine_stats_buf *)file->private_data;
> +
> +	if (*pos == 0) {
> +		struct drm_i915_private *dev_priv = file->f_inode->i_private;
> +		char *ptr = &buf->buf[0];
> +		int left = buf->len;
> +		struct intel_engine_cs *engine;
> +		enum intel_engine_id id;
> +
> +		buf->available = 0;
> +
> +		for_each_engine(engine, dev_priv, id) {
> +			u64 total;
> +			int len;
> +
> +			spin_lock_irq(&engine->stats.lock);
> +			total = engine->stats.total;
> +			/*
> +			 * If the engine is executing something at the moment
> +			 * add it to the total.
> +			 */
> +			if (engine->stats.ref)
> +				total += ktime_get_real_ns() -
> +					 engine->stats.start;
> +			spin_unlock_irq(&engine->stats.lock);
> +
> +			len = snprintf(ptr, left, "%s: %llu\n",
> +				       engine->name, total);
If I caught it right, file format is:
   render ring: 12345
   bsd ring: 12345
   ...
where numbers are busy clocks (ns) from the system boot time. Is that 
right? What if we will want to expose some other statistics information 
later, not only busy clocks? For example, engines i915 queues depths is 
a next interest. Maybe later we will find something else interesting. 
So, do we want to consider this file to contain all kind of statistics 
in the future, and hence it should be of somewhat different format, or 
it will have only busy clocks, and maybe we need other file name then?
> +			buf->available += len;
> +			left -= len;
> +			ptr += len;
> +
> +			if (len == 0)
> +				return -EFBIG;
> +		}
> +	}
> +
> +	return simple_read_from_buffer(ubuf, count, pos, &buf->buf[0],
> +				       buf->available);
> +}
> +
> +static const struct file_operations i915_engine_stats_fops = {
> +	.owner = THIS_MODULE,
> +	.open = i915_engine_stats_open,
> +	.release = i915_engine_stats_release,
> +	.read = i915_engine_stats_read,
> +	.llseek = default_llseek,
> +};
> +
>   static const struct drm_info_list i915_debugfs_list[] = {
>   	{"i915_capabilities", i915_capabilities, 0},
>   	{"i915_gem_objects", i915_gem_object_info, 0},
> @@ -4839,6 +4953,12 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv)
>   	struct dentry *ent;
>   	int ret, i;
>   
> +	ent = debugfs_create_file("i915_engine_stats", S_IRUGO,
> +				  minor->debugfs_root, to_i915(minor->dev),
> +				  &i915_engine_stats_fops);
> +	if (!ent)
> +		return -ENOMEM;
> +
>   	ent = debugfs_create_file("i915_forcewake_user", S_IRUSR,
>   				  minor->debugfs_root, to_i915(minor->dev),
>   				  &i915_forcewake_fops);
Tvrtko Ursulin May 10, 2017, 8:30 a.m. UTC | #2
On 09/05/2017 19:17, Dmitry Rogozhkin wrote:
> On 5/9/2017 7:09 AM, Tvrtko Ursulin wrote:

[snip]

>> +static ssize_t i915_engine_stats_read(struct file *file, char __user
>> *ubuf,
>> +                      size_t count, loff_t *pos)
>> +{
>> +    struct i915_engine_stats_buf *buf =
>> +        (struct i915_engine_stats_buf *)file->private_data;
>> +
>> +    if (*pos == 0) {
>> +        struct drm_i915_private *dev_priv = file->f_inode->i_private;
>> +        char *ptr = &buf->buf[0];
>> +        int left = buf->len;
>> +        struct intel_engine_cs *engine;
>> +        enum intel_engine_id id;
>> +
>> +        buf->available = 0;
>> +
>> +        for_each_engine(engine, dev_priv, id) {
>> +            u64 total;
>> +            int len;
>> +
>> +            spin_lock_irq(&engine->stats.lock);
>> +            total = engine->stats.total;
>> +            /*
>> +             * If the engine is executing something at the moment
>> +             * add it to the total.
>> +             */
>> +            if (engine->stats.ref)
>> +                total += ktime_get_real_ns() -
>> +                     engine->stats.start;
>> +            spin_unlock_irq(&engine->stats.lock);
>> +
>> +            len = snprintf(ptr, left, "%s: %llu\n",
>> +                       engine->name, total);
> If I caught it right, file format is:
>   render ring: 12345
>   bsd ring: 12345
>   ...

Yes almost, just that the engine names have been changed to likes of 
rcs0, vcs0, vcs1, vecs0 and bcs0 in the meantime.

> where numbers are busy clocks (ns) from the system boot time. Is that

Nanoseconds, but not since boot time but since the last time tracking 
got enabled.

Because the most important thing in this version, from the point of view 
of overhead in interrupt tasklet, is that the tracking is not done 
unless somebody is listening (has the file open).

As I wrote in the cover letter and the 2nd patch, when nobody has the 
file open the only thing which exists in the interrupt tasklets are 
three no-nop instructions. They only get patched to jumps (to sections 
actually collecting the stats) for as long as someone has the file open.

> right? What if we will want to expose some other statistics information
> later, not only busy clocks? For example, engines i915 queues depths is
> a next interest. Maybe later we will find something else interesting.
> So, do we want to consider this file to contain all kind of statistics
> in the future, and hence it should be of somewhat different format, or
> it will have only busy clocks, and maybe we need other file name then?

It can be either of the two, or some third option. It sounds like it is 
too early to discuss those level of detail. At this point it was an RFC 
only to gather some opinions on the overall idea.

Regards,

Tvrtko
Rogozhkin, Dmitry V May 10, 2017, 3:57 p.m. UTC | #3
On 5/10/2017 1:30 AM, Tvrtko Ursulin wrote:
>
> On 09/05/2017 19:17, Dmitry Rogozhkin wrote:
>> On 5/9/2017 7:09 AM, Tvrtko Ursulin wrote:
>
> [snip]
>
>>> +static ssize_t i915_engine_stats_read(struct file *file, char __user
>>> *ubuf,
>>> +                      size_t count, loff_t *pos)
>>> +{
>>> +    struct i915_engine_stats_buf *buf =
>>> +        (struct i915_engine_stats_buf *)file->private_data;
>>> +
>>> +    if (*pos == 0) {
>>> +        struct drm_i915_private *dev_priv = file->f_inode->i_private;
>>> +        char *ptr = &buf->buf[0];
>>> +        int left = buf->len;
>>> +        struct intel_engine_cs *engine;
>>> +        enum intel_engine_id id;
>>> +
>>> +        buf->available = 0;
>>> +
>>> +        for_each_engine(engine, dev_priv, id) {
>>> +            u64 total;
>>> +            int len;
>>> +
>>> +            spin_lock_irq(&engine->stats.lock);
>>> +            total = engine->stats.total;
>>> +            /*
>>> +             * If the engine is executing something at the moment
>>> +             * add it to the total.
>>> +             */
>>> +            if (engine->stats.ref)
>>> +                total += ktime_get_real_ns() -
>>> +                     engine->stats.start;
>>> +            spin_unlock_irq(&engine->stats.lock);
>>> +
>>> +            len = snprintf(ptr, left, "%s: %llu\n",
>>> +                       engine->name, total);
>> If I caught it right, file format is:
>>   render ring: 12345
>>   bsd ring: 12345
>>   ...
>
> Yes almost, just that the engine names have been changed to likes of 
> rcs0, vcs0, vcs1, vecs0 and bcs0 in the meantime.
>
>> where numbers are busy clocks (ns) from the system boot time. Is that
>
> Nanoseconds, but not since boot time but since the last time tracking 
> got enabled.

 From my perspective that's bad: clocks from the boot time is more 
natural metric. And with it you will be able to definitely know what you 
did with GPU on the boot time. For certain customers like on Android and 
other embedded devices this is critically important. Just recently we 
worked in one of our project on the boot time optimization. Thus, I 
would recommend to have this metric permanently available.

Now, if we will still fall to the clocks from the some moment in time, I 
do not like "the last time tracking got enabled" approach. You did not 
count on the few consumers of the metric? Why? What if there are few 
independent clients requesting the access to the metric in parallel? If 
you track from the moment when last client requested an access, then you 
will damage data for the clients already having access.

>
> Because the most important thing in this version, from the point of 
> view of overhead in interrupt tasklet, is that the tracking is not 
> done unless somebody is listening (has the file open).
>
> As I wrote in the cover letter and the 2nd patch, when nobody has the 
> file open the only thing which exists in the interrupt tasklets are 
> three no-nop instructions. They only get patched to jumps (to sections 
> actually collecting the stats) for as long as someone has the file open.
>
>> right? What if we will want to expose some other statistics information
>> later, not only busy clocks? For example, engines i915 queues depths is
>> a next interest. Maybe later we will find something else interesting.
>> So, do we want to consider this file to contain all kind of statistics
>> in the future, and hence it should be of somewhat different format, or
>> it will have only busy clocks, and maybe we need other file name then?
>
> It can be either of the two, or some third option. It sounds like it 
> is too early to discuss those level of detail. At this point it was an 
> RFC only to gather some opinions on the overall idea.
Yep, agree. Just something to remember going forward...
>
> Regards,
>
> Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1003511f28cc..db588ef858cb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4752,6 +4752,120 @@  static const struct file_operations i915_hpd_storm_ctl_fops = {
 	.write = i915_hpd_storm_ctl_write
 };
 
+DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
+static DEFINE_MUTEX(i915_engine_stats_mutex);
+static int i915_engine_stats_ref;
+
+struct i915_engine_stats_buf {
+	unsigned int len;
+	size_t available;
+	char buf[0];
+};
+
+static int i915_engine_stats_open(struct inode *inode, struct file *file)
+{
+	const unsigned int engine_name_len =
+		sizeof(((struct intel_engine_cs *)0)->name);
+	struct i915_engine_stats_buf *buf;
+	const unsigned int buf_size =
+		(engine_name_len + 2 + 19 + 1) * I915_NUM_ENGINES + 1 +
+		sizeof(*buf);
+	int ret;
+
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->len = buf_size;
+	file->private_data = buf;
+
+	ret = mutex_lock_interruptible(&i915_engine_stats_mutex);
+	if (ret)
+		return ret;
+
+	if (i915_engine_stats_ref++ == 0) {
+		struct drm_i915_private *dev_priv = file->f_inode->i_private;
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		for_each_engine(engine, dev_priv, id) {
+			memset(&engine->stats, 0, sizeof(engine->stats));
+			spin_lock_init(&engine->stats.lock);
+		}
+
+		static_branch_enable(&i915_engine_stats_key);
+	}
+
+	mutex_unlock(&i915_engine_stats_mutex);
+
+	return 0;
+}
+
+static int i915_engine_stats_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&i915_engine_stats_mutex);
+	if (--i915_engine_stats_ref == 0)
+		static_branch_disable(&i915_engine_stats_key);
+	mutex_unlock(&i915_engine_stats_mutex);
+
+	kfree(file->private_data);
+
+	return 0;
+}
+
+static ssize_t i915_engine_stats_read(struct file *file, char __user *ubuf,
+				      size_t count, loff_t *pos)
+{
+	struct i915_engine_stats_buf *buf =
+		(struct i915_engine_stats_buf *)file->private_data;
+
+	if (*pos == 0) {
+		struct drm_i915_private *dev_priv = file->f_inode->i_private;
+		char *ptr = &buf->buf[0];
+		int left = buf->len;
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		buf->available = 0;
+
+		for_each_engine(engine, dev_priv, id) {
+			u64 total;
+			int len;
+
+			spin_lock_irq(&engine->stats.lock);
+			total = engine->stats.total;
+			/*
+			 * If the engine is executing something at the moment
+			 * add it to the total.
+			 */
+			if (engine->stats.ref)
+				total += ktime_get_real_ns() -
+					 engine->stats.start;
+			spin_unlock_irq(&engine->stats.lock);
+
+			len = snprintf(ptr, left, "%s: %llu\n",
+				       engine->name, total);
+			buf->available += len;
+			left -= len;
+			ptr += len;
+
+			if (len == 0)
+				return -EFBIG;
+		}
+	}
+
+	return simple_read_from_buffer(ubuf, count, pos, &buf->buf[0],
+				       buf->available);
+}
+
+static const struct file_operations i915_engine_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_engine_stats_open,
+	.release = i915_engine_stats_release,
+	.read = i915_engine_stats_read,
+	.llseek = default_llseek,
+};
+
 static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
@@ -4839,6 +4953,12 @@  int i915_debugfs_register(struct drm_i915_private *dev_priv)
 	struct dentry *ent;
 	int ret, i;
 
+	ent = debugfs_create_file("i915_engine_stats", S_IRUGO,
+				  minor->debugfs_root, to_i915(minor->dev),
+				  &i915_engine_stats_fops);
+	if (!ent)
+		return -ENOMEM;
+
 	ent = debugfs_create_file("i915_forcewake_user", S_IRUSR,
 				  minor->debugfs_root, to_i915(minor->dev),
 				  &i915_forcewake_fops);