diff mbox

[2/5] drm/i915/error: capture ringbuffer pointed to by START

Message ID 1454007684-16777-3-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

arun.siluvery@linux.intel.com Jan. 28, 2016, 7:01 p.m. UTC
From: Dave Gordon <david.s.gordon@intel.com>

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 36 +++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

Comments

Chris Wilson Jan. 29, 2016, 11:47 a.m. UTC | #1
On Thu, Jan 28, 2016 at 07:01:21PM +0000, Arun Siluvery wrote:
> From: Dave Gordon <david.s.gordon@intel.com>
> 
> For: VIZ-2021
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>

What information does this actually provide over and above the hw is not
executing the ring we expect? How have you used this, how do you plan
to?

As it stands adding more fragile loops is just increasing the potential
for an OOPS in that code, even more so as we can eliminate the current
dangerous list iteration for extracting the current ctx object.
-Chris
arun.siluvery@linux.intel.com Feb. 1, 2016, 9:30 p.m. UTC | #2
On 29/01/2016 11:47, Chris Wilson wrote:
> On Thu, Jan 28, 2016 at 07:01:21PM +0000, Arun Siluvery wrote:
>> From: Dave Gordon <david.s.gordon@intel.com>
>>
>> For: VIZ-2021
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>
> What information does this actually provide over and above the hw is not
> executing the ring we expect? How have you used this, how do you plan
> to?
Most of the times this matches with the ringbuffer we dump but when 
there is inconsistency it helps to know what hw is actually executing as 
opposed to what we expect, otherwise the head, tail values we capture 
and the instructions at those offsets don't make sense. Without this we 
don't have any idea what the HW was doing and what caused hang.

regards
Arun

>
> As it stands adding more fragile loops is just increasing the potential
> for an OOPS in that code, even more so as we can eliminate the current
> dangerous list iteration for extracting the current ctx object.
> -Chris
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8b30242..8b510fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -565,7 +565,7 @@  struct drm_i915_error_state {
 			int page_count;
 			u64 gtt_offset;
 			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
+		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
 		struct drm_i915_error_request {
 			long jiffies;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index bf53c2b..5c8ec63 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -475,13 +475,20 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
-		if ((obj = error->ring[i].ringbuffer)) {
+		if ((obj = error->ring[i].req_ringbuffer)) {
 			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
 				   dev_priv->ring[i].name,
 				   lower_32_bits(obj->gtt_offset));
 			print_error_obj(m, obj);
 		}
 
+		if ((obj = error->ring[i].hw_ringbuffer)) {
+			err_printf(m, "%s --- HW ringbuffer = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   lower_32_bits(obj->gtt_offset));
+			print_error_obj(m, obj);
+		}
+
 		if ((obj = error->ring[i].hws_page)) {
 			u64 hws_offset = obj->gtt_offset;
 			u32 *hws_page = &obj->pages[0][0];
@@ -592,7 +599,8 @@  static void i915_error_state_free(struct kref *error_ref)
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		i915_error_object_free(error->ring[i].batchbuffer);
 		i915_error_object_free(error->ring[i].wa_batchbuffer);
-		i915_error_object_free(error->ring[i].ringbuffer);
+		i915_error_object_free(error->ring[i].req_ringbuffer);
+		i915_error_object_free(error->ring[i].hw_ringbuffer);
 		i915_error_object_free(error->ring[i].hws_page);
 		i915_error_object_free(error->ring[i].ctx);
 		kfree(error->ring[i].requests);
@@ -1004,19 +1012,27 @@  static void i915_gem_record_active_context(struct intel_engine_cs *ring,
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_object *obj;
-
-	/* Currently render ring is the only HW context user */
-	if (ring->id != RCS || !error->ccid)
-		return;
+	u64 base;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if (!i915_gem_obj_ggtt_bound(obj))
 			continue;
 
-		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
-			ering->ctx = i915_error_ggtt_object_create(dev_priv, obj);
-			break;
+		base = i915_gem_obj_ggtt_offset(obj);
+
+		if (base == ering->start) {
+			ering->hw_ringbuffer = i915_error_ggtt_object_create(dev_priv, obj);
+			continue;
 		}
+
+		if (!error->ccid)
+			continue;
+
+		if (i915.enable_execlists)
+			base += LRC_PPHWSP_PN * PAGE_SIZE;
+
+		if (base == (error->ccid & PAGE_MASK))
+			ering->ctx = i915_error_ggtt_object_create(dev_priv, obj);
 	}
 }
 
@@ -1091,7 +1107,7 @@  static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].cpu_ring_head = rbuf->head;
 		error->ring[i].cpu_ring_tail = rbuf->tail;
 
-		error->ring[i].ringbuffer =
+		error->ring[i].req_ringbuffer =
 			i915_error_ggtt_object_create(dev_priv, rbuf->obj);
 
 		error->ring[i].hws_page =