diff mbox

[2/2] drm/i915/error: capture errored context based on request context-id

Message ID 1470931741-20353-3-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

arun.siluvery@linux.intel.com Aug. 11, 2016, 4:09 p.m. UTC
From: Dave Gordon <david.s.gordon@intel.com>

Context capture hasn't worked for a while now, since the introduction of
execlists because the function that records active context is using CCID
register but this register contents are not valid in Execlist mode; this
patch makes it work again by using a different way of identifying the
context of interest in execlist mode.

For: VIZ-2021
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 46 +++++++++++++++++++++++++----------
 2 files changed, 35 insertions(+), 13 deletions(-)

Comments

Chris Wilson Aug. 11, 2016, 4:43 p.m. UTC | #1
On Thu, Aug 11, 2016 at 05:09:01PM +0100, Arun Siluvery wrote:
> From: Dave Gordon <david.s.gordon@intel.com>
> 
> Context capture hasn't worked for a while now, since the introduction of
> execlists because the function that records active context is using CCID
> register but this register contents are not valid in Execlist mode; this
> patch makes it work again by using a different way of identifying the
> context of interest in execlist mode.

Bzzt. The contexts are stored in the request and the iteration here is
still unsafe.
-Chris
Dave Gordon Aug. 18, 2016, 4:56 p.m. UTC | #2
On 11/08/16 17:43, Chris Wilson wrote:
> On Thu, Aug 11, 2016 at 05:09:01PM +0100, Arun Siluvery wrote:
>> From: Dave Gordon <david.s.gordon@intel.com>
>>
>> Context capture hasn't worked for a while now, since the introduction of
>> execlists because the function that records active context is using CCID
>> register but this register contents are not valid in Execlist mode; this
>> patch makes it work again by using a different way of identifying the
>> context of interest in execlist mode.
>
> Bzzt. The contexts are stored in the request and the iteration here is
> still unsafe.
> -Chris

This patch didn't add any iteration, so any unsafe iteration was already 
there.

All this did was change the matching criterion for which context is the 
one interesting one, by matching on the relevant parts of the "active 
context descriptor" rather than only the (obsolete) CCID register.

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7971c76..94b9314 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -546,6 +546,8 @@  struct drm_i915_error_state {
 		u32 rc_psmi; /* sleep state */
 		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
 
+		u64 ctx_desc;
+
 		struct drm_i915_error_object {
 			int page_count;
 			u64 gtt_offset;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3209f6a..fa365ee 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -386,7 +386,8 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
 	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
 	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
+	if (!i915.enable_execlists)
+		err_printf(m, "CCID: 0x%08x\n", error->ccid);
 	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++)
@@ -526,9 +527,10 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		}
 
 		if ((obj = ee->ctx)) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
+			err_printf(m, "%s --- HW Context = 0x%08x, %d pages\n",
 				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
+				   lower_32_bits(obj->gtt_offset),
+				   obj->page_count);
 			print_error_obj(m, obj);
 		}
 	}
@@ -1069,16 +1071,29 @@  static void i915_gem_record_active_context(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_object *obj;
+	bool elsp_mode = i915.enable_execlists;
 
 	/* Currently render ring is the only HW context user */
-	if (engine->id != RCS || !error->ccid)
+	if (engine->id != RCS)
+		return;
+
+	/* contents of CCID are not valid in execlist mode of scheduling */
+	if (!elsp_mode && !error->ccid)
 		return;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		u64 base;
+
 		if (!i915_gem_obj_ggtt_bound(obj))
 			continue;
 
-		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
+		base = i915_gem_obj_ggtt_offset(obj);
+
+		if (elsp_mode)
+			base += LRC_PPHWSP_PN * PAGE_SIZE;
+
+		if (((error->ccid & PAGE_MASK) == base) ||
+		    (((base ^ ee->ctx_desc) & 0x00000000FFFFF000ULL) == 0)) {
 			ee->ctx = i915_error_ggtt_object_create(dev_priv, obj);
 			break;
 		}
@@ -1090,6 +1105,7 @@  static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct drm_i915_gem_request *request;
+	struct intel_engine_cs *engine;
 	int i, count;
 
 	if (dev_priv->semaphore_obj) {
@@ -1098,17 +1114,12 @@  static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 						      dev_priv->semaphore_obj);
 	}
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct intel_engine_cs *engine = &dev_priv->engine[i];
+	for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) {
 		struct drm_i915_error_engine *ee = &error->engine[i];
+		u64 ctx_desc;
 
 		ee->pid = -1;
-		ee->engine_id = -1;
-
-		if (!intel_engine_initialized(engine))
-			continue;
-
-		ee->engine_id = i;
+		ee->engine_id = engine->id;
 
 		error_record_engine_registers(error, engine, ee);
 		error_record_engine_waiters(engine, ee);
@@ -1150,7 +1161,16 @@  static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 			error->simulated |=
 				request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
 
+			ctx_desc = 0;
+			if (i915.enable_execlists) {
+				struct i915_gem_context *ctx;
+
+				ctx = request ? request->ctx : dev_priv->kernel_context;
+				ctx_desc = intel_lr_context_descriptor(ctx, engine);
+			}
+
 			ring = request->ring;
+			ee->ctx_desc = ctx_desc;
 			ee->cpu_ring_head = ring->head;
 			ee->cpu_ring_tail = ring->tail;
 			ee->ringbuffer =