[2/2] drm/i915: Record the object matching actual head to error state
diff mbox

Message ID 1430750652-7931-2-git-send-email-mika.kuoppala@intel.com
State New
Headers show

Commit Message

Mika Kuoppala May 4, 2015, 2:44 p.m. UTC
If we have chained batch, the request only contains
the batch buffer that branched the execution into chained
batch. We can try to find object for actual head and
if it is different than the object from request, record
and print its state also.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  3 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c | 49 ++++++++++++++++++++++++++++++++---
 2 files changed, 47 insertions(+), 5 deletions(-)

Comments

Chris Wilson May 4, 2015, 3:04 p.m. UTC | #1
On Mon, May 04, 2015 at 05:44:12PM +0300, Mika Kuoppala wrote:
> +		obj = find_obj_for_addr(dev, error->ring[i].acthd);
> +		if (request && request->batch_obj == obj)
> +			obj = NULL;
> +
> +		if (obj) {
> +			if (vm && !i915_gem_obj_bound(obj, vm))
> +				vm = NULL;
> +
> +			error->ring[i].active_batchbuffer =
> +				i915_error_object_create(dev_priv, obj, vm);
> +		}

I was ok up to here. But after seeing this, I really want to defer this
until after i915_error_object_create() is fixed to just take the vma.

http://cgit.freedesktop.org/~ickle/linux-2.6/diff/drivers/gpu/drm/i915/i915_gpu_error.c?h=nightly&id=cc584ccfc109e5b13813548eb0ef1fd8f5751d16
which in about patch 150/200 that Daniel promised to review ;).
-Chris
Chris Wilson May 4, 2015, 3:13 p.m. UTC | #2
On Mon, May 04, 2015 at 05:44:12PM +0300, Mika Kuoppala wrote:
> If we have chained batch, the request only contains
> the batch buffer that branched the execution into chained
> batch. We can try to find object for actual head and
> if it is different than the object from request, record
> and print its state also.

I was also going to say if we want to add even more large objects (and
chasing a random pointer can mean we see very, very large objects), we
want to compress the output:

http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=435e640e6396d6fa51fc09e035f2f4703a86a8bd
-Chris
Shuang He May 4, 2015, 10:15 p.m. UTC | #3
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6313
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                                  276/276              276/276
ILK                                  302/302              302/302
SNB                                  316/316              316/316
IVB                                  342/342              342/342
BYT                                  286/286              286/286
BDW                                  321/321              321/321
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
Note: You need to pay more attention to line start with '*'

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 136d42a..d23ca8e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -480,7 +480,8 @@  struct drm_i915_error_state {
 			int page_count;
 			u32 gtt_offset;
 			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
+		} *ringbuffer, *batchbuffer, *wa_batchbuffer,
+			*active_batchbuffer, *ctx, *hws_page;
 
 		struct drm_i915_error_request {
 			long jiffies;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a3e330d..81705b9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -433,6 +433,13 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			print_error_obj(m, obj);
 		}
 
+		obj = error->ring[i].active_batchbuffer;
+		if (obj) {
+			err_printf(m, "%s (active) --- gtt_offset = 0x%08x\n",
+				   dev_priv->ring[i].name, obj->gtt_offset);
+			print_error_obj(m, obj);
+		}
+
 		if (error->ring[i].num_requests) {
 			err_printf(m, "%s --- %d requests\n",
 				   dev_priv->ring[i].name,
@@ -555,6 +562,7 @@  static void i915_error_state_free(struct kref *error_ref)
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		i915_error_object_free(error->ring[i].batchbuffer);
 		i915_error_object_free(error->ring[i].wa_batchbuffer);
+		i915_error_object_free(error->ring[i].active_batchbuffer);
 		i915_error_object_free(error->ring[i].ringbuffer);
 		i915_error_object_free(error->ring[i].hws_page);
 		i915_error_object_free(error->ring[i].ctx);
@@ -596,13 +604,13 @@  i915_error_object_create(struct drm_i915_private *dev_priv,
 	if (dst == NULL)
 		return NULL;
 
-	if (i915_gem_obj_bound(src, vm))
+	if (vm && i915_gem_obj_bound(src, vm))
 		dst->gtt_offset = i915_gem_obj_offset(src, vm);
 	else
 		dst->gtt_offset = -1;
 
 	reloc_offset = dst->gtt_offset;
-	if (i915_is_ggtt(vm))
+	if (vm && i915_is_ggtt(vm))
 		vma = i915_gem_obj_to_ggtt(src);
 	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
 		   vma && (vma->bound & GLOBAL_BIND) &&
@@ -967,6 +975,27 @@  static void i915_gem_record_active_context(struct intel_engine_cs *ring,
 	}
 }
 
+static struct drm_i915_gem_object *find_obj_for_addr(struct drm_device *dev,
+						     const u64 va_addr)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	/* We are searching for chained batches, so we are happy
+	 * for first match.
+	 */
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+			if (va_addr >= vma->node.start &&
+			    va_addr < vma->node.start + vma->node.size)
+				return obj;
+		}
+	}
+
+	return NULL;
+}
+
 static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
@@ -977,6 +1006,8 @@  static void i915_gem_record_rings(struct drm_device *dev,
 	for (i = 0; i < I915_NUM_RINGS; i++) {
 		struct intel_engine_cs *ring = &dev_priv->ring[i];
 		struct intel_ringbuffer *rbuf;
+		struct i915_address_space *vm = NULL;
+		struct drm_i915_gem_object *obj;
 
 		error->ring[i].pid = -1;
 
@@ -989,8 +1020,6 @@  static void i915_gem_record_rings(struct drm_device *dev,
 
 		request = i915_gem_find_active_request(ring);
 		if (request) {
-			struct i915_address_space *vm;
-
 			vm = request->ctx && request->ctx->ppgtt ?
 				&request->ctx->ppgtt->base :
 				&dev_priv->gtt.base;
@@ -1022,6 +1051,18 @@  static void i915_gem_record_rings(struct drm_device *dev,
 			}
 		}
 
+		obj = find_obj_for_addr(dev, error->ring[i].acthd);
+		if (request && request->batch_obj == obj)
+			obj = NULL;
+
+		if (obj) {
+			if (vm && !i915_gem_obj_bound(obj, vm))
+				vm = NULL;
+
+			error->ring[i].active_batchbuffer =
+				i915_error_object_create(dev_priv, obj, vm);
+		}
+
 		if (i915.enable_execlists) {
 			/* TODO: This is only a small fix to keep basic error
 			 * capture working, but we need to add more information