diff mbox series

[08/28] drm/i915/gt: Show all active timelines for debugging

Message ID 20201117113103.21480-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/28] drm/i915/selftests: Improve granularity for mocs reset checks | expand

Commit Message

Chris Wilson Nov. 17, 2020, 11:30 a.m. UTC
Include the active timelines for debugfs/i915_engine_info, so that we
can see which have unready requests inflight which are not shown
otherwise.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_timeline.c | 79 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_timeline.h |  8 +++
 drivers/gpu/drm/i915/i915_debugfs.c      | 18 +++---
 3 files changed, 97 insertions(+), 8 deletions(-)

Comments

Tvrtko Ursulin Nov. 17, 2020, 12:59 p.m. UTC | #1
On 17/11/2020 11:30, Chris Wilson wrote:
> Include the active timelines for debugfs/i915_engine_info, so that we
> can see which have unready requests inflight which are not shown
> otherwise.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_timeline.c | 79 ++++++++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_timeline.h |  8 +++
>   drivers/gpu/drm/i915/i915_debugfs.c      | 18 +++---
>   3 files changed, 97 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 7ea94d201fe6..2b4ed4b2b67c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -617,6 +617,85 @@ void intel_gt_fini_timelines(struct intel_gt *gt)
>   	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
>   }
>   
> +void intel_gt_show_timelines(struct intel_gt *gt,
> +			     struct drm_printer *m,
> +			     void (*show_request)(struct drm_printer *m,
> +						  const struct i915_request *rq,
> +						  const char *prefix))
> +{
> +	struct intel_gt_timelines *timelines = &gt->timelines;
> +	struct intel_timeline *tl, *tn;
> +	LIST_HEAD(free);
> +
> +	spin_lock(&timelines->lock);
> +	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
> +		unsigned long count, ready, inflight;
> +		struct i915_request *rq, *rn;
> +		struct dma_fence *fence;
> +
> +		if (!mutex_trylock(&tl->mutex))
> +			continue;

I suggest to print a marker like "Timeline %llx: busy" or so to avoid 
confusion.

> +
> +		intel_timeline_get(tl);
> +		GEM_BUG_ON(!atomic_read(&tl->active_count));
> +		atomic_inc(&tl->active_count); /* pin the list element */
> +		spin_unlock(&timelines->lock);
> +
> +		count = 0;
> +		ready = 0;
> +		inflight = 0;
> +		list_for_each_entry_safe(rq, rn, &tl->requests, link) {
> +			if (i915_request_completed(rq))
> +				continue;
> +
> +			count++;
> +			if (i915_request_is_ready(rq))
> +				ready++;
> +			if (i915_request_is_active(rq))
> +				inflight++;
> +		}
> +
> +		drm_printf(m, "Timeline %llx: { ", tl->fence_context);
> +		drm_printf(m, "count %lu, ready: %lu, inflight: %lu",
> +			   count, ready, inflight);
> +		drm_printf(m, ", seqno: { current: %d, last: %d }",
> +			   *tl->hwsp_seqno, tl->seqno);
> +		fence = i915_active_fence_get(&tl->last_request);
> +		if (fence) {
> +			drm_printf(m, ", engine: %s",
> +				   to_request(fence)->engine->name);
> +			dma_fence_put(fence);
> +		}
> +		drm_printf(m, " }\n");
> +
> +		if (show_request) {
> +			list_for_each_entry_safe(rq, rn, &tl->requests, link)
> +				show_request(m, rq,
> +					     i915_request_is_active(rq) ? "  E" :
> +					     i915_request_is_ready(rq) ? "  Q" :
> +					     "  U");

Can we get some consistency between the category counts and flags.

s/count/queued/ -> Q
ready -> R (also matches with term runnable)
active -> E ? hmmm E is consistent with the engine info dump.

Not ideal but perhaps every bit of more consistency is good.

> +		}
> +
> +		mutex_unlock(&tl->mutex);
> +		spin_lock(&timelines->lock);
> +
> +		/* Resume list iteration after reacquiring spinlock */
> +		list_safe_reset_next(tl, tn, link);
> +		if (atomic_dec_and_test(&tl->active_count))
> +			list_del(&tl->link);
> +
> +		/* Defer the final release to after the spinlock */
> +		if (refcount_dec_and_test(&tl->kref.refcount)) {
> +			GEM_BUG_ON(atomic_read(&tl->active_count));
> +			list_add(&tl->link, &free);
> +		}
> +	}
> +	spin_unlock(&timelines->lock);
> +
> +	list_for_each_entry_safe(tl, tn, &free, link)
> +		__intel_timeline_free(&tl->kref);
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "gt/selftests/mock_timeline.c"
>   #include "gt/selftest_timeline.c"
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
> index 9882cd911d8e..9b88f220be2b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
> @@ -31,6 +31,8 @@
>   #include "i915_syncmap.h"
>   #include "intel_timeline_types.h"
>   
> +struct drm_printer;
> +
>   struct intel_timeline *
>   __intel_timeline_create(struct intel_gt *gt,
>   			struct i915_vma *global_hwsp,
> @@ -106,4 +108,10 @@ int intel_timeline_read_hwsp(struct i915_request *from,
>   void intel_gt_init_timelines(struct intel_gt *gt);
>   void intel_gt_fini_timelines(struct intel_gt *gt);
>   
> +void intel_gt_show_timelines(struct intel_gt *gt,
> +			     struct drm_printer *m,
> +			     void (*show_request)(struct drm_printer *m,
> +						  const struct i915_request *rq,
> +						  const char *prefix));
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 337293c7bb7d..498c82dcc7e9 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1306,26 +1306,28 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>   
>   static int i915_engine_info(struct seq_file *m, void *unused)
>   {
> -	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> +	struct drm_i915_private *i915 = node_to_i915(m->private);
>   	struct intel_engine_cs *engine;
>   	intel_wakeref_t wakeref;
>   	struct drm_printer p;
>   
> -	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
> +	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>   
>   	seq_printf(m, "GT awake? %s [%d]\n",
> -		   yesno(dev_priv->gt.awake),
> -		   atomic_read(&dev_priv->gt.wakeref.count));
> +		   yesno(i915->gt.awake),
> +		   atomic_read(&i915->gt.wakeref.count));
>   	seq_printf(m, "GT busy: %llu ms\n",
> -		   ktime_to_ms(intel_gt_get_busy_time(&dev_priv->gt)));
> +		   ktime_to_ms(intel_gt_get_busy_time(&i915->gt)));
>   	seq_printf(m, "CS timestamp frequency: %u Hz\n",
> -		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz);
> +		   RUNTIME_INFO(i915)->cs_timestamp_frequency_hz);
>   
>   	p = drm_seq_file_printer(m);
> -	for_each_uabi_engine(engine, dev_priv)
> +	for_each_uabi_engine(engine, i915)
>   		intel_engine_dump(engine, &p, "%s\n", engine->name);
>   
> -	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
> +	intel_gt_show_timelines(&i915->gt, &p, NULL);
> +
> +	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
>   
>   	return 0;
>   }
> 

Regards,

Tvrtko
Chris Wilson Nov. 17, 2020, 1:25 p.m. UTC | #2
Quoting Tvrtko Ursulin (2020-11-17 12:59:44)
> 
> On 17/11/2020 11:30, Chris Wilson wrote:
> > +             if (show_request) {
> > +                     list_for_each_entry_safe(rq, rn, &tl->requests, link)
> > +                             show_request(m, rq,
> > +                                          i915_request_is_active(rq) ? "  E" :
> > +                                          i915_request_is_ready(rq) ? "  Q" :
> > +                                          "  U");
> 
> Can we get some consistency between the category counts and flags.
> 
> s/count/queued/ -> Q

Hmm, if you are sure. Q would then not match with the engine info.

Still favouring count over queued; I think count indicates more clearly
that it is the superset, but queued may imply it excludes ready and
definitely sounds like it should not include inflight.

> ready -> R (also matches with term runnable)
> active -> E ? hmmm E is consistent with the engine info dump.
> 
> Not ideal but perhaps every bit of more consistency is good.

Not sold yet, but not happy with the current flags either.

If we go with 'R' for ready, we should also update engine info.
-Chris
Tvrtko Ursulin Nov. 18, 2020, 3:51 p.m. UTC | #3
On 17/11/2020 13:25, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-11-17 12:59:44)
>>
>> On 17/11/2020 11:30, Chris Wilson wrote:
>>> +             if (show_request) {
>>> +                     list_for_each_entry_safe(rq, rn, &tl->requests, link)
>>> +                             show_request(m, rq,
>>> +                                          i915_request_is_active(rq) ? "  E" :
>>> +                                          i915_request_is_ready(rq) ? "  Q" :
>>> +                                          "  U");
>>
>> Can we get some consistency between the category counts and flags.
>>
>> s/count/queued/ -> Q
> 
> Hmm, if you are sure. Q would then not match with the engine info.

Sure? Not really. What do we have there? You mean "!/*/+/-" flags? Or 
"E/Q/V" from intel_execlists_show_requests? Right, 'Q' there means 
runnable and it doesn't show queued at all. Yes, why not change everything.

> Still favouring count over queued; I think count indicates more clearly
> that it is the superset, but queued may imply it excludes ready and
> definitely sounds like it should not include inflight.

I am okay with that.

>> ready -> R (also matches with term runnable)
>> active -> E ? hmmm E is consistent with the engine info dump.
>>
>> Not ideal but perhaps every bit of more consistency is good.
> 
> Not sold yet, but not happy with the current flags either.
> 
> If we go with 'R' for ready, we should also update engine info.

Okay we seem to have plenty of options.

U or Q - queued/unready
R or Q - ready/queued (to backend) (Rv/Qv for virtual?)
E or R, or I - executing/running/in-flight

Q -> R -> E
U -> R -> E
U -> Q -> E/R/I
U -> R -> E/I

I don't know.. either one as long as all places use the same.

Regards,

Tvrtko
Chris Wilson Nov. 19, 2020, 10:47 a.m. UTC | #4
Quoting Tvrtko Ursulin (2020-11-18 15:51:41)
> 
> On 17/11/2020 13:25, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-11-17 12:59:44)
> >>
> >> On 17/11/2020 11:30, Chris Wilson wrote:
> >>> +             if (show_request) {
> >>> +                     list_for_each_entry_safe(rq, rn, &tl->requests, link)
> >>> +                             show_request(m, rq,
> >>> +                                          i915_request_is_active(rq) ? "  E" :
> >>> +                                          i915_request_is_ready(rq) ? "  Q" :
> >>> +                                          "  U");
> >>
> >> Can we get some consistency between the category counts and flags.
> >>
> >> s/count/queued/ -> Q
> > 
> > Hmm, if you are sure. Q would then not match with the engine info.
> 
> Sure? Not really. What do we have there? You mean "!/*/+/-" flags? Or 
> "E/Q/V" from intel_execlists_show_requests? Right, 'Q' there means 
> runnable and it doesn't show queued at all. Yes, why not change everything.
> 
> > Still favouring count over queued; I think count indicates more clearly
> > that it is the superset, but queued may imply it excludes ready and
> > definitely sounds like it should not include inflight.
> 
> I am okay with that.
> 
> >> ready -> R (also matches with term runnable)
> >> active -> E ? hmmm E is consistent with the engine info dump.
> >>
> >> Not ideal but perhaps every bit of more consistency is good.
> > 
> > Not sold yet, but not happy with the current flags either.
> > 
> > If we go with 'R' for ready, we should also update engine info.
> 
> Okay we seem to have plenty of options.
> 
> U or Q - queued/unready
> R or Q - ready/queued (to backend) (Rv/Qv for virtual?)
> E or R, or I - executing/running/in-flight
> 
> Q -> R -> E
> U -> R -> E
> U -> Q -> E/R/I
> U -> R -> E/I
> 
> I don't know.. either one as long as all places use the same.

URE.

Unready -> ready -> executing.

Unready -> ready is a good pairing, and executing over inflight to avoid
being confused for an infection.

And unready opens up a plethora of jokes by just dropping the 'y'.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 7ea94d201fe6..2b4ed4b2b67c 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -617,6 +617,85 @@  void intel_gt_fini_timelines(struct intel_gt *gt)
 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
 }
 
+void intel_gt_show_timelines(struct intel_gt *gt,
+			     struct drm_printer *m,
+			     void (*show_request)(struct drm_printer *m,
+						  const struct i915_request *rq,
+						  const char *prefix))
+{
+	struct intel_gt_timelines *timelines = &gt->timelines;
+	struct intel_timeline *tl, *tn;
+	LIST_HEAD(free);
+
+	spin_lock(&timelines->lock);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		unsigned long count, ready, inflight;
+		struct i915_request *rq, *rn;
+		struct dma_fence *fence;
+
+		if (!mutex_trylock(&tl->mutex))
+			continue;
+
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!atomic_read(&tl->active_count));
+		atomic_inc(&tl->active_count); /* pin the list element */
+		spin_unlock(&timelines->lock);
+
+		count = 0;
+		ready = 0;
+		inflight = 0;
+		list_for_each_entry_safe(rq, rn, &tl->requests, link) {
+			if (i915_request_completed(rq))
+				continue;
+
+			count++;
+			if (i915_request_is_ready(rq))
+				ready++;
+			if (i915_request_is_active(rq))
+				inflight++;
+		}
+
+		drm_printf(m, "Timeline %llx: { ", tl->fence_context);
+		drm_printf(m, "count %lu, ready: %lu, inflight: %lu",
+			   count, ready, inflight);
+		drm_printf(m, ", seqno: { current: %d, last: %d }",
+			   *tl->hwsp_seqno, tl->seqno);
+		fence = i915_active_fence_get(&tl->last_request);
+		if (fence) {
+			drm_printf(m, ", engine: %s",
+				   to_request(fence)->engine->name);
+			dma_fence_put(fence);
+		}
+		drm_printf(m, " }\n");
+
+		if (show_request) {
+			list_for_each_entry_safe(rq, rn, &tl->requests, link)
+				show_request(m, rq,
+					     i915_request_is_active(rq) ? "  E" :
+					     i915_request_is_ready(rq) ? "  Q" :
+					     "  U");
+		}
+
+		mutex_unlock(&tl->mutex);
+		spin_lock(&timelines->lock);
+
+		/* Resume list iteration after reacquiring spinlock */
+		list_safe_reset_next(tl, tn, link);
+		if (atomic_dec_and_test(&tl->active_count))
+			list_del(&tl->link);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(atomic_read(&tl->active_count));
+			list_add(&tl->link, &free);
+		}
+	}
+	spin_unlock(&timelines->lock);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "gt/selftests/mock_timeline.c"
 #include "gt/selftest_timeline.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 9882cd911d8e..9b88f220be2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -31,6 +31,8 @@ 
 #include "i915_syncmap.h"
 #include "intel_timeline_types.h"
 
+struct drm_printer;
+
 struct intel_timeline *
 __intel_timeline_create(struct intel_gt *gt,
 			struct i915_vma *global_hwsp,
@@ -106,4 +108,10 @@  int intel_timeline_read_hwsp(struct i915_request *from,
 void intel_gt_init_timelines(struct intel_gt *gt);
 void intel_gt_fini_timelines(struct intel_gt *gt);
 
+void intel_gt_show_timelines(struct intel_gt *gt,
+			     struct drm_printer *m,
+			     void (*show_request)(struct drm_printer *m,
+						  const struct i915_request *rq,
+						  const char *prefix));
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 337293c7bb7d..498c82dcc7e9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1306,26 +1306,28 @@  static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 
 static int i915_engine_info(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
 	struct drm_printer p;
 
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	seq_printf(m, "GT awake? %s [%d]\n",
-		   yesno(dev_priv->gt.awake),
-		   atomic_read(&dev_priv->gt.wakeref.count));
+		   yesno(i915->gt.awake),
+		   atomic_read(&i915->gt.wakeref.count));
 	seq_printf(m, "GT busy: %llu ms\n",
-		   ktime_to_ms(intel_gt_get_busy_time(&dev_priv->gt)));
+		   ktime_to_ms(intel_gt_get_busy_time(&i915->gt)));
 	seq_printf(m, "CS timestamp frequency: %u Hz\n",
-		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz);
+		   RUNTIME_INFO(i915)->cs_timestamp_frequency_hz);
 
 	p = drm_seq_file_printer(m);
-	for_each_uabi_engine(engine, dev_priv)
+	for_each_uabi_engine(engine, i915)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+	intel_gt_show_timelines(&i915->gt, &p, NULL);
+
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 	return 0;
 }