diff mbox

[2/2] drm/i915: Add the last written reg to error state

Message ID 1424410412-24910-2-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Feb. 20, 2015, 5:33 a.m. UTC
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h       |  1 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 11 +++++++++++
 2 files changed, 12 insertions(+)

Comments

Chris Wilson Feb. 20, 2015, 12:04 p.m. UTC | #1
At first glance, this is interesting. On reflection I think I still only
care about the register values at the time of the error. Otherwise, I
would rather have the last few registers written - and only those of the
interesting set as defined by the error state - their values and when.
We would also then require the timestamp of batch execution to marry with
the register updates if we suspect them of causing GPU hangs.
-Chris
Shuang He Feb. 20, 2015, 1:29 p.m. UTC | #2
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5799
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -2              277/277              275/277
ILK                                  313/313              313/313
SNB                 -1              309/309              308/309
IVB                                  382/382              382/382
BYT                                  296/296              296/296
HSW                 -1              425/425              424/425
BDW                 -1              318/318              317/318
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
 PNV  igt_gem_userptr_blits_coherency-sync      NO_RESULT(1)CRASH(5)NRUN(1)PASS(6)      CRASH(2)
 PNV  igt_gem_userptr_blits_coherency-unsync      CRASH(3)NRUN(1)PASS(4)      CRASH(2)
*SNB  igt_kms_plane_plane-panning-top-left-pipe-B-plane-2      PASS(2)      TIMEOUT(1)PASS(1)
*HSW  igt_gem_storedw_batches_loop_secure-dispatch      PASS(2)      DMESG_WARN(1)PASS(1)
*BDW  igt_gem_gtt_hog      PASS(17)      DMESG_WARN(1)PASS(1)
Note: You need to pay more attention to line start with '*'
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6fa22db..49bc296 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -323,6 +323,7 @@  struct drm_i915_error_state {
 	struct timeval time;
 
 	char error_msg[128];
+	u32 last_written[NR_CPUS];
 	u32 reset_count;
 	u32 suspend_count;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2c87a79..f7737c5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -356,6 +356,9 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   error->ring[i].pid);
 		}
 	}
+	for_each_possible_cpu(i)
+		err_printf(m, "Last written register (cpu=%d): 0x%08x\n",
+			   i, error->last_written[i]);
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
 	err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device);
@@ -1253,6 +1256,13 @@  static void i915_error_capture_msg(struct drm_device *dev,
 		  wedged ? "reset" : "continue");
 }
 
+static void i915_capture_cpu_state(struct drm_i915_error_state *error)
+{
+	int cpu;
+	for_each_possible_cpu(cpu)
+		error->last_written[cpu] = per_cpu(i915_last_written, cpu);
+}
+
 static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
 				   struct drm_i915_error_state *error)
 {
@@ -1286,6 +1296,7 @@  void i915_capture_error_state(struct drm_device *dev, bool wedged,
 
 	kref_init(&error->ref);
 
+	i915_capture_cpu_state(error);
 	i915_capture_gen_state(dev_priv, error);
 	i915_capture_reg_state(dev_priv, error);
 	i915_gem_capture_buffers(dev_priv, error);