diff mbox

drm/i915: Print error state times relative to capture

Message ID 20180416134052.30586-1-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala April 16, 2018, 1:40 p.m. UTC
Using plain jiffies in error state output makes the output
time differences relative to the current system time. This
is wrong as it makes output time differences dependent
of when the error state is printed rather than when it is
captured.

Store capture jiffies into error state and use it
when outputting the state to fix time differences output.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 20 +++++++++++++-------
 drivers/gpu/drm/i915/i915_gpu_error.h |  1 +
 2 files changed, 14 insertions(+), 7 deletions(-)

Comments

Chris Wilson April 16, 2018, 1:52 p.m. UTC | #1
Quoting Mika Kuoppala (2018-04-16 14:40:52)
> Using plain jiffies in error state output makes the output
> time differences relative to the current system time. This
> is wrong as it makes output time differences dependent
> of when the error state is printed rather than when it is
> captured.
> 
> Store capture jiffies into error state and use it
> when outputting the state to fix time differences output.

Good point. It was always confusing to see "several hours ago", when what
we wanted to know was how long before the hanging request. Hmm, do we want
to use that as our reference point rather than the capture timestamp? Do 
we even need to see the jiffies? (Yes, we probably do want the raw value
to check, but I think it's secondary.)

> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Please do consider using the (earliest) guilty timestamp as time 0 and
print before/after. If no guilty then use capture?
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index effaf982b19b..fc7e76f21111 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -404,7 +404,8 @@  static const char *bannable(const struct drm_i915_error_context *ctx)
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq)
+				const struct drm_i915_error_request *erq,
+				const unsigned long timestamp)
 {
 	if (!erq->seqno)
 		return;
@@ -412,7 +413,7 @@  static void error_print_request(struct drm_i915_error_state_buf *m,
 	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
 		   erq->context, erq->seqno, erq->priority,
-		   jiffies_to_msecs(jiffies - erq->jiffies),
+		   jiffies_to_msecs(timestamp - erq->jiffies),
 		   erq->head, erq->tail);
 }
 
@@ -427,7 +428,8 @@  static void error_print_context(struct drm_i915_error_state_buf *m,
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee)
+			       const struct drm_i915_error_engine *ee,
+			       const unsigned long timestamp)
 {
 	int n;
 
@@ -499,12 +501,12 @@  static void error_print_engine(struct drm_i915_error_state_buf *m,
 		   hangcheck_action_to_str(ee->hangcheck_action));
 	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
 		   ee->hangcheck_timestamp,
-		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+		   jiffies_to_msecs(timestamp - ee->hangcheck_timestamp));
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n]);
+		error_print_request(m, " ", &ee->execlist[n], timestamp);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -650,6 +652,7 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+	err_printf(m, "Jiffies: %lu (now %lu)\n", error->timestamp, jiffies);
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_stalled &&
@@ -710,7 +713,8 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i]);
+			error_print_engine(m, &error->engine[i],
+					   error->timestamp);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
@@ -769,7 +773,8 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ", &ee->requests[j]);
+				error_print_request(m, " ", &ee->requests[j],
+						    error->timestamp);
 		}
 
 		if (IS_ERR(ee->waiters)) {
@@ -1743,6 +1748,7 @@  static int capture(void *data)
 	error->boottime = ktime_get_boottime();
 	error->uptime = ktime_sub(ktime_get(),
 				  error->i915->gt.last_init_time);
+	error->timestamp = jiffies;
 
 	capture_params(error);
 	capture_gen_state(error);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index c05b6034d718..1f36ad526bc5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -30,6 +30,7 @@  struct i915_gpu_state {
 	ktime_t time;
 	ktime_t boottime;
 	ktime_t uptime;
+	unsigned long timestamp; /* jiffies */
 
 	struct drm_i915_private *i915;