Message ID | 20170509140936.19060-4-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 5/9/2017 7:09 AM, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Export the stats added in the previous patch in debugfs. > > Number of active clients reading this data is tracked and the > static key is only enabled whilst there are some. > > Userspace is intended to keep the file descriptor open, seeking > to the beginning of the file periodically, and re-reading the > stats. > > This is because the underlying implementation is costly on every > first open/last close transition, and also, because the stats > exported mostly make sense when they are considered relative to > the previous sample. > > File lists nanoseconds each engine was active since the tracking > has started. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 120 ++++++++++++++++++++++++++++++++++++ > 1 file changed, 120 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 1003511f28cc..db588ef858cb 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -4752,6 +4752,120 @@ static const struct file_operations i915_hpd_storm_ctl_fops = { > .write = i915_hpd_storm_ctl_write > }; > > +DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key); > +static DEFINE_MUTEX(i915_engine_stats_mutex); > +static int i915_engine_stats_ref; > + > +struct i915_engine_stats_buf { > + unsigned int len; > + size_t available; > + char buf[0]; > +}; > + > +static int i915_engine_stats_open(struct inode *inode, struct file *file) > +{ > + const unsigned int engine_name_len = > + sizeof(((struct intel_engine_cs *)0)->name); > + struct i915_engine_stats_buf *buf; > + const unsigned int buf_size = > + (engine_name_len + 2 + 19 + 1) * I915_NUM_ENGINES + 1 + > + sizeof(*buf); > + int ret; > + > + buf = kzalloc(buf_size, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + > + buf->len = buf_size; > + file->private_data = buf; > + > + ret = mutex_lock_interruptible(&i915_engine_stats_mutex); > + if (ret) > + return ret; > + > + if (i915_engine_stats_ref++ == 0) { > + struct drm_i915_private *dev_priv = file->f_inode->i_private; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + > + for_each_engine(engine, dev_priv, id) { > + memset(&engine->stats, 0, sizeof(engine->stats)); > + spin_lock_init(&engine->stats.lock); > + } > + > + static_branch_enable(&i915_engine_stats_key); > + } > + > + mutex_unlock(&i915_engine_stats_mutex); > + > + return 0; > +} > + > +static int i915_engine_stats_release(struct inode *inode, struct file *file) > +{ > + mutex_lock(&i915_engine_stats_mutex); > + if (--i915_engine_stats_ref == 0) > + static_branch_disable(&i915_engine_stats_key); > + mutex_unlock(&i915_engine_stats_mutex); > + > + kfree(file->private_data); > + > + return 0; > +} > + > +static ssize_t i915_engine_stats_read(struct file *file, char __user *ubuf, > + size_t count, loff_t *pos) > +{ > + struct i915_engine_stats_buf *buf = > + (struct i915_engine_stats_buf *)file->private_data; > + > + if (*pos == 0) { > + struct drm_i915_private *dev_priv = file->f_inode->i_private; > + char *ptr = &buf->buf[0]; > + int left = buf->len; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + > + buf->available = 0; > + > + for_each_engine(engine, dev_priv, id) { > + u64 total; > + int len; > + > + spin_lock_irq(&engine->stats.lock); > + total = engine->stats.total; > + /* > + * If the engine is executing something at the moment > + * add it to the total. > + */ > + if (engine->stats.ref) > + total += ktime_get_real_ns() - > + engine->stats.start; > + spin_unlock_irq(&engine->stats.lock); > + > + len = snprintf(ptr, left, "%s: %llu\n", > + engine->name, total); If I caught it right, file format is: render ring: 12345 bsd ring: 12345 ... where numbers are busy clocks (ns) from the system boot time. Is that right? What if we will want to expose some other statistics information later, not only busy clocks? For example, engines i915 queues depths is a next interest. Maybe later we will find something else interesting. So, do we want to consider this file to contain all kind of statistics in the future, and hence it should be of somewhat different format, or it will have only busy clocks, and maybe we need other file name then? > + buf->available += len; > + left -= len; > + ptr += len; > + > + if (len == 0) > + return -EFBIG; > + } > + } > + > + return simple_read_from_buffer(ubuf, count, pos, &buf->buf[0], > + buf->available); > +} > + > +static const struct file_operations i915_engine_stats_fops = { > + .owner = THIS_MODULE, > + .open = i915_engine_stats_open, > + .release = i915_engine_stats_release, > + .read = i915_engine_stats_read, > + .llseek = default_llseek, > +}; > + > static const struct drm_info_list i915_debugfs_list[] = { > {"i915_capabilities", i915_capabilities, 0}, > {"i915_gem_objects", i915_gem_object_info, 0}, > @@ -4839,6 +4953,12 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv) > struct dentry *ent; > int ret, i; > > + ent = debugfs_create_file("i915_engine_stats", S_IRUGO, > + minor->debugfs_root, to_i915(minor->dev), > + &i915_engine_stats_fops); > + if (!ent) > + return -ENOMEM; > + > ent = debugfs_create_file("i915_forcewake_user", S_IRUSR, > minor->debugfs_root, to_i915(minor->dev), > &i915_forcewake_fops);
On 09/05/2017 19:17, Dmitry Rogozhkin wrote: > On 5/9/2017 7:09 AM, Tvrtko Ursulin wrote: [snip] >> +static ssize_t i915_engine_stats_read(struct file *file, char __user >> *ubuf, >> + size_t count, loff_t *pos) >> +{ >> + struct i915_engine_stats_buf *buf = >> + (struct i915_engine_stats_buf *)file->private_data; >> + >> + if (*pos == 0) { >> + struct drm_i915_private *dev_priv = file->f_inode->i_private; >> + char *ptr = &buf->buf[0]; >> + int left = buf->len; >> + struct intel_engine_cs *engine; >> + enum intel_engine_id id; >> + >> + buf->available = 0; >> + >> + for_each_engine(engine, dev_priv, id) { >> + u64 total; >> + int len; >> + >> + spin_lock_irq(&engine->stats.lock); >> + total = engine->stats.total; >> + /* >> + * If the engine is executing something at the moment >> + * add it to the total. >> + */ >> + if (engine->stats.ref) >> + total += ktime_get_real_ns() - >> + engine->stats.start; >> + spin_unlock_irq(&engine->stats.lock); >> + >> + len = snprintf(ptr, left, "%s: %llu\n", >> + engine->name, total); > If I caught it right, file format is: > render ring: 12345 > bsd ring: 12345 > ... Yes almost, just that the engine names have been changed to likes of rcs0, vcs0, vcs1, vecs0 and bcs0 in the meantime. > where numbers are busy clocks (ns) from the system boot time. Is that Nanoseconds, but not since boot time but since the last time tracking got enabled. Because the most important thing in this version, from the point of view of overhead in interrupt tasklet, is that the tracking is not done unless somebody is listening (has the file open). As I wrote in the cover letter and the 2nd patch, when nobody has the file open the only thing which exists in the interrupt tasklets are three no-nop instructions. They only get patched to jumps (to sections actually collecting the stats) for as long as someone has the file open. > right? What if we will want to expose some other statistics information > later, not only busy clocks? For example, engines i915 queues depths is > a next interest. Maybe later we will find something else interesting. > So, do we want to consider this file to contain all kind of statistics > in the future, and hence it should be of somewhat different format, or > it will have only busy clocks, and maybe we need other file name then? It can be either of the two, or some third option. It sounds like it is too early to discuss those level of detail. At this point it was an RFC only to gather some opinions on the overall idea. Regards, Tvrtko
On 5/10/2017 1:30 AM, Tvrtko Ursulin wrote: > > On 09/05/2017 19:17, Dmitry Rogozhkin wrote: >> On 5/9/2017 7:09 AM, Tvrtko Ursulin wrote: > > [snip] > >>> +static ssize_t i915_engine_stats_read(struct file *file, char __user >>> *ubuf, >>> + size_t count, loff_t *pos) >>> +{ >>> + struct i915_engine_stats_buf *buf = >>> + (struct i915_engine_stats_buf *)file->private_data; >>> + >>> + if (*pos == 0) { >>> + struct drm_i915_private *dev_priv = file->f_inode->i_private; >>> + char *ptr = &buf->buf[0]; >>> + int left = buf->len; >>> + struct intel_engine_cs *engine; >>> + enum intel_engine_id id; >>> + >>> + buf->available = 0; >>> + >>> + for_each_engine(engine, dev_priv, id) { >>> + u64 total; >>> + int len; >>> + >>> + spin_lock_irq(&engine->stats.lock); >>> + total = engine->stats.total; >>> + /* >>> + * If the engine is executing something at the moment >>> + * add it to the total. >>> + */ >>> + if (engine->stats.ref) >>> + total += ktime_get_real_ns() - >>> + engine->stats.start; >>> + spin_unlock_irq(&engine->stats.lock); >>> + >>> + len = snprintf(ptr, left, "%s: %llu\n", >>> + engine->name, total); >> If I caught it right, file format is: >> render ring: 12345 >> bsd ring: 12345 >> ... > > Yes almost, just that the engine names have been changed to likes of > rcs0, vcs0, vcs1, vecs0 and bcs0 in the meantime. > >> where numbers are busy clocks (ns) from the system boot time. Is that > > Nanoseconds, but not since boot time but since the last time tracking > got enabled. From my perspective that's bad: clocks from the boot time is more natural metric. And with it you will be able to definitely know what you did with GPU on the boot time. For certain customers like on Android and other embedded devices this is critically important. Just recently we worked in one of our project on the boot time optimization. Thus, I would recommend to have this metric permanently available. Now, if we will still fall to the clocks from the some moment in time, I do not like "the last time tracking got enabled" approach. You did not count on the few consumers of the metric? Why? What if there are few independent clients requesting the access to the metric in parallel? If you track from the moment when last client requested an access, then you will damage data for the clients already having access. > > Because the most important thing in this version, from the point of > view of overhead in interrupt tasklet, is that the tracking is not > done unless somebody is listening (has the file open). > > As I wrote in the cover letter and the 2nd patch, when nobody has the > file open the only thing which exists in the interrupt tasklets are > three no-nop instructions. They only get patched to jumps (to sections > actually collecting the stats) for as long as someone has the file open. > >> right? What if we will want to expose some other statistics information >> later, not only busy clocks? For example, engines i915 queues depths is >> a next interest. Maybe later we will find something else interesting. >> So, do we want to consider this file to contain all kind of statistics >> in the future, and hence it should be of somewhat different format, or >> it will have only busy clocks, and maybe we need other file name then? > > It can be either of the two, or some third option. It sounds like it > is too early to discuss those level of detail. At this point it was an > RFC only to gather some opinions on the overall idea. Yep, agree. Just something to remember going forward... > > Regards, > > Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1003511f28cc..db588ef858cb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4752,6 +4752,120 @@ static const struct file_operations i915_hpd_storm_ctl_fops = { .write = i915_hpd_storm_ctl_write }; +DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key); +static DEFINE_MUTEX(i915_engine_stats_mutex); +static int i915_engine_stats_ref; + +struct i915_engine_stats_buf { + unsigned int len; + size_t available; + char buf[0]; +}; + +static int i915_engine_stats_open(struct inode *inode, struct file *file) +{ + const unsigned int engine_name_len = + sizeof(((struct intel_engine_cs *)0)->name); + struct i915_engine_stats_buf *buf; + const unsigned int buf_size = + (engine_name_len + 2 + 19 + 1) * I915_NUM_ENGINES + 1 + + sizeof(*buf); + int ret; + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->len = buf_size; + file->private_data = buf; + + ret = mutex_lock_interruptible(&i915_engine_stats_mutex); + if (ret) + return ret; + + if (i915_engine_stats_ref++ == 0) { + struct drm_i915_private *dev_priv = file->f_inode->i_private; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + memset(&engine->stats, 0, sizeof(engine->stats)); + spin_lock_init(&engine->stats.lock); + } + + static_branch_enable(&i915_engine_stats_key); + } + + mutex_unlock(&i915_engine_stats_mutex); + + return 0; +} + +static int i915_engine_stats_release(struct inode *inode, struct file *file) +{ + mutex_lock(&i915_engine_stats_mutex); + if (--i915_engine_stats_ref == 0) + static_branch_disable(&i915_engine_stats_key); + mutex_unlock(&i915_engine_stats_mutex); + + kfree(file->private_data); + + return 0; +} + +static ssize_t i915_engine_stats_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) +{ + struct i915_engine_stats_buf *buf = + (struct i915_engine_stats_buf *)file->private_data; + + if (*pos == 0) { + struct drm_i915_private *dev_priv = file->f_inode->i_private; + char *ptr = &buf->buf[0]; + int left = buf->len; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + buf->available = 0; + + for_each_engine(engine, dev_priv, id) { + u64 total; + int len; + + spin_lock_irq(&engine->stats.lock); + total = engine->stats.total; + /* + * If the engine is executing something at the moment + * add it to the total. + */ + if (engine->stats.ref) + total += ktime_get_real_ns() - + engine->stats.start; + spin_unlock_irq(&engine->stats.lock); + + len = snprintf(ptr, left, "%s: %llu\n", + engine->name, total); + buf->available += len; + left -= len; + ptr += len; + + if (len == 0) + return -EFBIG; + } + } + + return simple_read_from_buffer(ubuf, count, pos, &buf->buf[0], + buf->available); +} + +static const struct file_operations i915_engine_stats_fops = { + .owner = THIS_MODULE, + .open = i915_engine_stats_open, + .release = i915_engine_stats_release, + .read = i915_engine_stats_read, + .llseek = default_llseek, +}; + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4839,6 +4953,12 @@ int i915_debugfs_register(struct drm_i915_private *dev_priv) struct dentry *ent; int ret, i; + ent = debugfs_create_file("i915_engine_stats", S_IRUGO, + minor->debugfs_root, to_i915(minor->dev), + &i915_engine_stats_fops); + if (!ent) + return -ENOMEM; + ent = debugfs_create_file("i915_forcewake_user", S_IRUSR, minor->debugfs_root, to_i915(minor->dev), &i915_forcewake_fops);