diff mbox series

[18/40] drm/i915: Report the number of closed vma held by each context in debugfs

Message ID 20180919195544.1511-18-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/40] drm: Use default dma_fence hooks where possible for null syncobj | expand

Commit Message

Chris Wilson Sept. 19, 2018, 7:55 p.m. UTC
Include the total size of closed vma when reporting the per_ctx_stats of
debugfs/i915_gem_objects.

Whilst adjusting the context tracking, note that we can simply use our
list of contexts in i915->contexts rather than circumlocute via
dev->filelist and the per-file context idr.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 113 +++++++++++-----------------
 1 file changed, 42 insertions(+), 71 deletions(-)

Comments

Tvrtko Ursulin Sept. 24, 2018, 11:57 a.m. UTC | #1
On 19/09/2018 20:55, Chris Wilson wrote:
> Include the total size of closed vma when reporting the per_ctx_stats of
> debugfs/i915_gem_objects.

Why do we need/want this?

> Whilst adjusting the context tracking, note that we can simply use our
> list of contexts in i915->contexts rather than circumlocute via
> dev->filelist and the per-file context idr.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 113 +++++++++++-----------------
>   1 file changed, 42 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 2ac75bc10afa..6b5cc30f3e09 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -302,6 +302,7 @@ struct file_stats {
>   	u64 total, unbound;
>   	u64 global, shared;
>   	u64 active, inactive;
> +	u64 closed;
>   };
>   
>   static int per_file_stats(int id, void *ptr, void *data)
> @@ -336,6 +337,9 @@ static int per_file_stats(int id, void *ptr, void *data)
>   			stats->active += vma->node.size;
>   		else
>   			stats->inactive += vma->node.size;
> +
> +		if (i915_vma_is_closed(vma))
> +			stats->closed += vma->node.size;

We can have closed and active?

>   	}
>   
>   	return 0;
> @@ -343,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>   
>   #define print_file_stats(m, name, stats) do { \
>   	if (stats.count) \
> -		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
> +		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
>   			   name, \
>   			   stats.count, \
>   			   stats.total, \
> @@ -351,7 +355,8 @@ static int per_file_stats(int id, void *ptr, void *data)
>   			   stats.inactive, \
>   			   stats.global, \
>   			   stats.shared, \
> -			   stats.unbound); \
> +			   stats.unbound, \
> +			   stats.closed); \
>   } while (0)
>   
>   static void print_batch_pool_stats(struct seq_file *m,
> @@ -377,44 +382,44 @@ static void print_batch_pool_stats(struct seq_file *m,
>   	print_file_stats(m, "[k]batch pool", stats);
>   }
>   
> -static int per_file_ctx_stats(int idx, void *ptr, void *data)
> +static void print_context_stats(struct seq_file *m,
> +				struct drm_i915_private *i915)
>   {
> -	struct i915_gem_context *ctx = ptr;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, ctx->i915, id) {
> -		struct intel_context *ce = to_intel_context(ctx, engine);
> +	struct file_stats kstats = {};
> +	struct i915_gem_context *ctx;
>   
> -		if (ce->state)
> -			per_file_stats(0, ce->state->obj, data);
> -		if (ce->ring)
> -			per_file_stats(0, ce->ring->vma->obj, data);
> -	}
> +	list_for_each_entry(ctx, &i915->contexts.list, link) {
> +		struct file_stats stats = { .file_priv = ctx->file_priv };
> +		struct intel_engine_cs *engine;
> +		enum intel_engine_id id;
>   
> -	return 0;
> -}
> +		for_each_engine(engine, i915, id) {
> +			struct intel_context *ce = to_intel_context(ctx, engine);
>   
> -static void print_context_stats(struct seq_file *m,
> -				struct drm_i915_private *dev_priv)
> -{
> -	struct drm_device *dev = &dev_priv->drm;
> -	struct file_stats stats;
> -	struct drm_file *file;
> +			if (ce->state)
> +				per_file_stats(0, ce->state->obj, &kstats);
> +			if (ce->ring)
> +				per_file_stats(0, ce->ring->vma->obj, &kstats);
> +		}
>   
> -	memset(&stats, 0, sizeof(stats));
> +		if (!IS_ERR_OR_NULL(stats.file_priv)) {
> +			struct drm_file *file = stats.file_priv->file;
> +			struct task_struct *task;
>   
> -	mutex_lock(&dev->struct_mutex);
> -	if (dev_priv->kernel_context)
> -		per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
> +			spin_lock(&file->table_lock);
> +			idr_for_each(&file->object_idr, per_file_stats, &stats);

Headache inducing diff.. however, doesn't this over-account objects on 
the account of walking the same file from multiple-contexts?

> +			spin_unlock(&file->table_lock);
>   
> -	list_for_each_entry(file, &dev->filelist, lhead) {
> -		struct drm_i915_file_private *fpriv = file->driver_priv;
> -		idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
> +			rcu_read_lock();
> +			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
> +			print_file_stats(m,
> +					 task ? task->comm : "<unknown>",
> +					 stats);

And this as well looks like it'll end up duplicated.

> +			rcu_read_unlock();
> +		}
>   	}
> -	mutex_unlock(&dev->struct_mutex);
>   
> -	print_file_stats(m, "[k]contexts", stats);
> +	print_file_stats(m, "[k]contexts", kstats);
>   }
>   
>   static int i915_gem_object_info(struct seq_file *m, void *data)
> @@ -426,14 +431,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
>   	struct drm_i915_gem_object *obj;
>   	unsigned int page_sizes = 0;
> -	struct drm_file *file;
>   	char buf[80];
>   	int ret;
>   
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
>   	seq_printf(m, "%u objects, %llu bytes\n",
>   		   dev_priv->mm.object_count,
>   		   dev_priv->mm.object_memory);

Noticed we technically need mm.object_stat_lock here for atomic readout, 
but I guess we don't care much.

> @@ -514,43 +514,14 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   					buf, sizeof(buf)));
>   
>   	seq_putc(m, '\n');
> -	print_batch_pool_stats(m, dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
> -
> -	mutex_lock(&dev->filelist_mutex);
> -	print_context_stats(m, dev_priv);
> -	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> -		struct file_stats stats;
> -		struct drm_i915_file_private *file_priv = file->driver_priv;
> -		struct i915_request *request;
> -		struct task_struct *task;
> -
> -		mutex_lock(&dev->struct_mutex);
>   
> -		memset(&stats, 0, sizeof(stats));
> -		stats.file_priv = file->driver_priv;
> -		spin_lock(&file->table_lock);
> -		idr_for_each(&file->object_idr, per_file_stats, &stats);
> -		spin_unlock(&file->table_lock);
> -		/*
> -		 * Although we have a valid reference on file->pid, that does
> -		 * not guarantee that the task_struct who called get_pid() is
> -		 * still alive (e.g. get_pid(current) => fork() => exit()).
> -		 * Therefore, we need to protect this ->comm access using RCU.
> -		 */
> -		request = list_first_entry_or_null(&file_priv->mm.request_list,
> -						   struct i915_request,
> -						   client_link);
> -		rcu_read_lock();
> -		task = pid_task(request && request->gem_context->pid ?
> -				request->gem_context->pid : file->pid,
> -				PIDTYPE_PID);
> -		print_file_stats(m, task ? task->comm : "<unknown>", stats);
> -		rcu_read_unlock();
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
>   
> -		mutex_unlock(&dev->struct_mutex);
> -	}
> -	mutex_unlock(&dev->filelist_mutex);
> +	print_batch_pool_stats(m, dev_priv);
> +	print_context_stats(m, dev_priv);
> +	mutex_unlock(&dev->struct_mutex);
>   
>   	return 0;
>   }
> 

Regards,

Tvrtko
Chris Wilson Sept. 25, 2018, 12:20 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-09-24 12:57:31)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > Include the total size of closed vma when reporting the per_ctx_stats of
> > debugfs/i915_gem_objects.
> 
> Why do we need/want this?

Removing extra locks, reducing struct_mutex coverage and because it
would have shown a transient leak fixed a while back. Raison d'etre of
this debugfs.
 
> > Whilst adjusting the context tracking, note that we can simply use our
> > list of contexts in i915->contexts rather than circumlocute via
> > dev->filelist and the per-file context idr.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c | 113 +++++++++++-----------------
> >   1 file changed, 42 insertions(+), 71 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 2ac75bc10afa..6b5cc30f3e09 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -302,6 +302,7 @@ struct file_stats {
> >       u64 total, unbound;
> >       u64 global, shared;
> >       u64 active, inactive;
> > +     u64 closed;
> >   };
> >   
> >   static int per_file_stats(int id, void *ptr, void *data)
> > @@ -336,6 +337,9 @@ static int per_file_stats(int id, void *ptr, void *data)
> >                       stats->active += vma->node.size;
> >               else
> >                       stats->inactive += vma->node.size;
> > +
> > +             if (i915_vma_is_closed(vma))
> > +                     stats->closed += vma->node.size;
> 
> We can have closed and active?

Yes.

> >       }
> >   
> >       return 0;
> > @@ -343,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> >   
> >   #define print_file_stats(m, name, stats) do { \
> >       if (stats.count) \
> > -             seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
> > +             seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
> >                          name, \
> >                          stats.count, \
> >                          stats.total, \
> > @@ -351,7 +355,8 @@ static int per_file_stats(int id, void *ptr, void *data)
> >                          stats.inactive, \
> >                          stats.global, \
> >                          stats.shared, \
> > -                        stats.unbound); \
> > +                        stats.unbound, \
> > +                        stats.closed); \
> >   } while (0)
> >   
> >   static void print_batch_pool_stats(struct seq_file *m,
> > @@ -377,44 +382,44 @@ static void print_batch_pool_stats(struct seq_file *m,
> >       print_file_stats(m, "[k]batch pool", stats);
> >   }
> >   
> > -static int per_file_ctx_stats(int idx, void *ptr, void *data)
> > +static void print_context_stats(struct seq_file *m,
> > +                             struct drm_i915_private *i915)
> >   {
> > -     struct i915_gem_context *ctx = ptr;
> > -     struct intel_engine_cs *engine;
> > -     enum intel_engine_id id;
> > -
> > -     for_each_engine(engine, ctx->i915, id) {
> > -             struct intel_context *ce = to_intel_context(ctx, engine);
> > +     struct file_stats kstats = {};
> > +     struct i915_gem_context *ctx;
> >   
> > -             if (ce->state)
> > -                     per_file_stats(0, ce->state->obj, data);
> > -             if (ce->ring)
> > -                     per_file_stats(0, ce->ring->vma->obj, data);
> > -     }
> > +     list_for_each_entry(ctx, &i915->contexts.list, link) {
> > +             struct file_stats stats = { .file_priv = ctx->file_priv };
> > +             struct intel_engine_cs *engine;
> > +             enum intel_engine_id id;
> >   
> > -     return 0;
> > -}
> > +             for_each_engine(engine, i915, id) {
> > +                     struct intel_context *ce = to_intel_context(ctx, engine);
> >   
> > -static void print_context_stats(struct seq_file *m,
> > -                             struct drm_i915_private *dev_priv)
> > -{
> > -     struct drm_device *dev = &dev_priv->drm;
> > -     struct file_stats stats;
> > -     struct drm_file *file;
> > +                     if (ce->state)
> > +                             per_file_stats(0, ce->state->obj, &kstats);
> > +                     if (ce->ring)
> > +                             per_file_stats(0, ce->ring->vma->obj, &kstats);
> > +             }
> >   
> > -     memset(&stats, 0, sizeof(stats));
> > +             if (!IS_ERR_OR_NULL(stats.file_priv)) {
> > +                     struct drm_file *file = stats.file_priv->file;
> > +                     struct task_struct *task;
> >   
> > -     mutex_lock(&dev->struct_mutex);
> > -     if (dev_priv->kernel_context)
> > -             per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
> > +                     spin_lock(&file->table_lock);
> > +                     idr_for_each(&file->object_idr, per_file_stats, &stats);
> 
> Headache inducing diff.. however, doesn't this over-account objects on 
> the account of walking the same file from multiple-contexts?

Sure, but it doesn't matter since we are using this as an indication of
how many objects are usable from each context. We can go beyond the
original code and try and actually narrow it down to only objects in use
by a single context. Looks fiddly, since all I want is rough usage to
spot a leak, not too concerned.

> 
> > +                     spin_unlock(&file->table_lock);
> >   
> > -     list_for_each_entry(file, &dev->filelist, lhead) {
> > -             struct drm_i915_file_private *fpriv = file->driver_priv;
> > -             idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
> > +                     rcu_read_lock();
> > +                     task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
> > +                     print_file_stats(m,
> > +                                      task ? task->comm : "<unknown>",
> > +                                      stats);
> 
> And this as well looks like it'll end up duplicated.

Sure, it should be since it's reporting each context.
> 
> > +                     rcu_read_unlock();
> > +             }
> >       }
> > -     mutex_unlock(&dev->struct_mutex);
> >   
> > -     print_file_stats(m, "[k]contexts", stats);
> > +     print_file_stats(m, "[k]contexts", kstats);
> >   }
> >   
> >   static int i915_gem_object_info(struct seq_file *m, void *data)
> > @@ -426,14 +431,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
> >       u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
> >       struct drm_i915_gem_object *obj;
> >       unsigned int page_sizes = 0;
> > -     struct drm_file *file;
> >       char buf[80];
> >       int ret;
> >   
> > -     ret = mutex_lock_interruptible(&dev->struct_mutex);
> > -     if (ret)
> > -             return ret;
> > -
> >       seq_printf(m, "%u objects, %llu bytes\n",
> >                  dev_priv->mm.object_count,
> >                  dev_priv->mm.object_memory);
> 
> Noticed we technically need mm.object_stat_lock here for atomic readout, 
> but I guess we don't care much.

Correct.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2ac75bc10afa..6b5cc30f3e09 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -302,6 +302,7 @@  struct file_stats {
 	u64 total, unbound;
 	u64 global, shared;
 	u64 active, inactive;
+	u64 closed;
 };
 
 static int per_file_stats(int id, void *ptr, void *data)
@@ -336,6 +337,9 @@  static int per_file_stats(int id, void *ptr, void *data)
 			stats->active += vma->node.size;
 		else
 			stats->inactive += vma->node.size;
+
+		if (i915_vma_is_closed(vma))
+			stats->closed += vma->node.size;
 	}
 
 	return 0;
@@ -343,7 +347,7 @@  static int per_file_stats(int id, void *ptr, void *data)
 
 #define print_file_stats(m, name, stats) do { \
 	if (stats.count) \
-		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
+		seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound, %llu closed)\n", \
 			   name, \
 			   stats.count, \
 			   stats.total, \
@@ -351,7 +355,8 @@  static int per_file_stats(int id, void *ptr, void *data)
 			   stats.inactive, \
 			   stats.global, \
 			   stats.shared, \
-			   stats.unbound); \
+			   stats.unbound, \
+			   stats.closed); \
 } while (0)
 
 static void print_batch_pool_stats(struct seq_file *m,
@@ -377,44 +382,44 @@  static void print_batch_pool_stats(struct seq_file *m,
 	print_file_stats(m, "[k]batch pool", stats);
 }
 
-static int per_file_ctx_stats(int idx, void *ptr, void *data)
+static void print_context_stats(struct seq_file *m,
+				struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx = ptr;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, ctx->i915, id) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+	struct file_stats kstats = {};
+	struct i915_gem_context *ctx;
 
-		if (ce->state)
-			per_file_stats(0, ce->state->obj, data);
-		if (ce->ring)
-			per_file_stats(0, ce->ring->vma->obj, data);
-	}
+	list_for_each_entry(ctx, &i915->contexts.list, link) {
+		struct file_stats stats = { .file_priv = ctx->file_priv };
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
 
-	return 0;
-}
+		for_each_engine(engine, i915, id) {
+			struct intel_context *ce = to_intel_context(ctx, engine);
 
-static void print_context_stats(struct seq_file *m,
-				struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = &dev_priv->drm;
-	struct file_stats stats;
-	struct drm_file *file;
+			if (ce->state)
+				per_file_stats(0, ce->state->obj, &kstats);
+			if (ce->ring)
+				per_file_stats(0, ce->ring->vma->obj, &kstats);
+		}
 
-	memset(&stats, 0, sizeof(stats));
+		if (!IS_ERR_OR_NULL(stats.file_priv)) {
+			struct drm_file *file = stats.file_priv->file;
+			struct task_struct *task;
 
-	mutex_lock(&dev->struct_mutex);
-	if (dev_priv->kernel_context)
-		per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
+			spin_lock(&file->table_lock);
+			idr_for_each(&file->object_idr, per_file_stats, &stats);
+			spin_unlock(&file->table_lock);
 
-	list_for_each_entry(file, &dev->filelist, lhead) {
-		struct drm_i915_file_private *fpriv = file->driver_priv;
-		idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
+			rcu_read_lock();
+			task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID);
+			print_file_stats(m,
+					 task ? task->comm : "<unknown>",
+					 stats);
+			rcu_read_unlock();
+		}
 	}
-	mutex_unlock(&dev->struct_mutex);
 
-	print_file_stats(m, "[k]contexts", stats);
+	print_file_stats(m, "[k]contexts", kstats);
 }
 
 static int i915_gem_object_info(struct seq_file *m, void *data)
@@ -426,14 +431,9 @@  static int i915_gem_object_info(struct seq_file *m, void *data)
 	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
 	struct drm_i915_gem_object *obj;
 	unsigned int page_sizes = 0;
-	struct drm_file *file;
 	char buf[80];
 	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
 	seq_printf(m, "%u objects, %llu bytes\n",
 		   dev_priv->mm.object_count,
 		   dev_priv->mm.object_memory);
@@ -514,43 +514,14 @@  static int i915_gem_object_info(struct seq_file *m, void *data)
 					buf, sizeof(buf)));
 
 	seq_putc(m, '\n');
-	print_batch_pool_stats(m, dev_priv);
-	mutex_unlock(&dev->struct_mutex);
-
-	mutex_lock(&dev->filelist_mutex);
-	print_context_stats(m, dev_priv);
-	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-		struct file_stats stats;
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-		struct i915_request *request;
-		struct task_struct *task;
-
-		mutex_lock(&dev->struct_mutex);
 
-		memset(&stats, 0, sizeof(stats));
-		stats.file_priv = file->driver_priv;
-		spin_lock(&file->table_lock);
-		idr_for_each(&file->object_idr, per_file_stats, &stats);
-		spin_unlock(&file->table_lock);
-		/*
-		 * Although we have a valid reference on file->pid, that does
-		 * not guarantee that the task_struct who called get_pid() is
-		 * still alive (e.g. get_pid(current) => fork() => exit()).
-		 * Therefore, we need to protect this ->comm access using RCU.
-		 */
-		request = list_first_entry_or_null(&file_priv->mm.request_list,
-						   struct i915_request,
-						   client_link);
-		rcu_read_lock();
-		task = pid_task(request && request->gem_context->pid ?
-				request->gem_context->pid : file->pid,
-				PIDTYPE_PID);
-		print_file_stats(m, task ? task->comm : "<unknown>", stats);
-		rcu_read_unlock();
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
 
-		mutex_unlock(&dev->struct_mutex);
-	}
-	mutex_unlock(&dev->filelist_mutex);
+	print_batch_pool_stats(m, dev_priv);
+	print_context_stats(m, dev_priv);
+	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }