diff mbox

[4/5] drm/i915: Add basic execlist info to error state

Message ID 1452018609-10142-5-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Jan. 5, 2016, 6:30 p.m. UTC
Sample output:
...
  waiting: yes
  ring->head: 0x00000000
  ring->tail: 0x00000c50
  ring->next_context_status_buffer: 0x5
  CSB Pointer: 0x00000405
    Context 0 Status: 0x0000000000000001
    Context 1 Status: 0x0000009d00000018
    Context 2 Status: 0x0000000000000001
    Context 3 Status: 0x0000000100000018
    Context 4 Status: 0x0000000000000001
    Context 5 Status: 0x0000009d00000018
  hangcheck: hung [40]
bsd command stream:
  START: 0x00039000
  HEAD:  0x00000018
...

Signed-off-by: Ben Widawsky <benjamin.widawsky@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  7 ++++++-
 drivers/gpu/drm/i915/i915_gpu_error.c | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c      | 10 +++++-----
 drivers/gpu/drm/i915/intel_lrc.h      |  4 ++++
 4 files changed, 38 insertions(+), 6 deletions(-)

Comments

Michel Thierry Jan. 6, 2016, 3:10 p.m. UTC | #1
On 1/5/2016 6:30 PM, Ben Widawsky wrote:
> Sample output:
> ...
>    waiting: yes
>    ring->head: 0x00000000
>    ring->tail: 0x00000c50
>    ring->next_context_status_buffer: 0x5
>    CSB Pointer: 0x00000405
>      Context 0 Status: 0x0000000000000001
>      Context 1 Status: 0x0000009d00000018
>      Context 2 Status: 0x0000000000000001
>      Context 3 Status: 0x0000000100000018
>      Context 4 Status: 0x0000000000000001
>      Context 5 Status: 0x0000009d00000018

There's another patch floating that does more less the same, plus also 
decodes the CSB events 
(http://patchwork.freedesktop.org/patch/msgid/1448278932-31551-8-git-send-email-John.C.Harrison@Intel.com).

It's too much to ask to combine them, but at least adding a blank space 
in the context status (upper/lower_32_bits) will make it more readable.

>    hangcheck: hung [40]
> bsd command stream:
>    START: 0x00039000
>    HEAD:  0x00000018
> ...
>
> Signed-off-by: Ben Widawsky <benjamin.widawsky@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h       |  7 ++++++-
>   drivers/gpu/drm/i915/i915_gpu_error.c | 23 +++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c      | 10 +++++-----
>   drivers/gpu/drm/i915/intel_lrc.h      |  4 ++++
>   4 files changed, 38 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c6dd4db..c79e869 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -526,6 +526,7 @@ struct drm_i915_error_state {
>                  u32 cpu_ring_tail;
>
>                  u32 semaphore_seqno[I915_NUM_RINGS - 1];
> +               u32 semaphore_mboxes[I915_NUM_RINGS - 1];
>
>                  /* Register state */
>                  u32 start;
> @@ -545,7 +546,11 @@ struct drm_i915_error_state {
>                  u32 fault_reg;
>                  u64 faddr;
>                  u32 rc_psmi; /* sleep state */
> -               u32 semaphore_mboxes[I915_NUM_RINGS - 1];
> +
> +               /* execlist state */
> +               u32 csb_ptr;
> +               u8 next_context_status_buffer;
> +               u64 context_status[GEN8_CSB_ENTRIES];
>
>                  struct drm_i915_error_object {
>                          int page_count;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 06ca408..20a5daa 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -301,6 +301,18 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>          err_printf(m, "  waiting: %s\n", yesno(ring->waiting));
>          err_printf(m, "  ring->head: 0x%08x\n", ring->cpu_ring_head);
>          err_printf(m, "  ring->tail: 0x%08x\n", ring->cpu_ring_tail);
> +
> +       if (i915.enable_execlists) {
> +               int j;
> +               err_printf(m, "  ring->next_context_status_buffer: 0x%d\n",
> +                             ring->next_context_status_buffer);
> +               err_printf(m, "  CSB Pointer: 0x%08x\n", ring->csb_ptr);
> +               for (j = 0; j < GEN8_CSB_ENTRIES; j++) {
> +                       err_printf(m, "    Context %d Status: 0x%016llx\n",
> +                                          j, ring->context_status[j]);
> +               }
> +       }
> +
>          err_printf(m, "  hangcheck: %s [%d]\n",
>                     hangcheck_action_to_str(ring->hangcheck_action),
>                     ring->hangcheck_score);
> @@ -1042,6 +1054,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
>                  }
>
>                  if (i915.enable_execlists) {
> +                       int j;
> +
>                          /* TODO: This is only a small fix to keep basic error
>                           * capture working, but we need to add more information
>                           * for it to be useful (e.g. dump the context being
> @@ -1051,6 +1065,15 @@ static void i915_gem_record_rings(struct drm_device *dev,
>                                  rbuf = request->ctx->engine[ring->id].ringbuf;
>                          else
>                                  rbuf = ring->default_context->engine[ring->id].ringbuf;
> +
> +                       error->ring[i].csb_ptr = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
> +                       error->ring[i].next_context_status_buffer = ring->next_context_status_buffer;
> +                       for (j = 0; j < GEN8_CSB_ENTRIES; j++) {
> +                               u32 status, id;
> +                               intel_lrc_get_context_status(ring, j, &status, &id);
> +                               error->ring[i].context_status[j] = ((__u64)id<<32)|(__u64)status;
> +                       }
> +
>                  } else
>                          rbuf = ring->buffer;
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 23839ff..a118146 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -496,9 +496,9 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
>          return false;
>   }
>
> -static void get_context_status(struct intel_engine_cs *ring,
> -                              u8 read_pointer,
> -                              u32 *status, u32 *context_id)
> +void intel_lrc_get_context_status(struct intel_engine_cs *ring,
> +                                 u8 read_pointer,
> +                                 u32 *status, u32 *context_id)
>   {
>          struct drm_i915_private *dev_priv = ring->dev->dev_private;
>
> @@ -537,8 +537,8 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
>
>          while (read_pointer < write_pointer) {
>
> -               get_context_status(ring, ++read_pointer % GEN8_CSB_ENTRIES,
> -                                  &status, &status_id);
> +               intel_lrc_get_context_status(ring, ++read_pointer % GEN8_CSB_ENTRIES,
> +                                            &status, &status_id);
>
>                  if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
>                          continue;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index de41ad6..82c87f9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -118,4 +118,8 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
>   void intel_lrc_irq_handler(struct intel_engine_cs *ring);
>   void intel_execlists_retire_requests(struct intel_engine_cs *ring);
>
> +void intel_lrc_get_context_status(struct intel_engine_cs *ring,
> +                                 u8 read_pointer,
> +                                 u32 *status, u32 *context_id);
> +
>   #endif /* _INTEL_LRC_H_ */
> --
> 2.6.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c6dd4db..c79e869 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -526,6 +526,7 @@  struct drm_i915_error_state {
 		u32 cpu_ring_tail;
 
 		u32 semaphore_seqno[I915_NUM_RINGS - 1];
+		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
 
 		/* Register state */
 		u32 start;
@@ -545,7 +546,11 @@  struct drm_i915_error_state {
 		u32 fault_reg;
 		u64 faddr;
 		u32 rc_psmi; /* sleep state */
-		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
+
+		/* execlist state */
+		u32 csb_ptr;
+		u8 next_context_status_buffer;
+		u64 context_status[GEN8_CSB_ENTRIES];
 
 		struct drm_i915_error_object {
 			int page_count;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 06ca408..20a5daa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -301,6 +301,18 @@  static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  waiting: %s\n", yesno(ring->waiting));
 	err_printf(m, "  ring->head: 0x%08x\n", ring->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ring->cpu_ring_tail);
+
+	if (i915.enable_execlists) {
+		int j;
+		err_printf(m, "  ring->next_context_status_buffer: 0x%d\n",
+			      ring->next_context_status_buffer);
+		err_printf(m, "  CSB Pointer: 0x%08x\n", ring->csb_ptr);
+		for (j = 0; j < GEN8_CSB_ENTRIES; j++) {
+			err_printf(m, "    Context %d Status: 0x%016llx\n",
+				           j, ring->context_status[j]);
+		}
+	}
+
 	err_printf(m, "  hangcheck: %s [%d]\n",
 		   hangcheck_action_to_str(ring->hangcheck_action),
 		   ring->hangcheck_score);
@@ -1042,6 +1054,8 @@  static void i915_gem_record_rings(struct drm_device *dev,
 		}
 
 		if (i915.enable_execlists) {
+			int j;
+
 			/* TODO: This is only a small fix to keep basic error
 			 * capture working, but we need to add more information
 			 * for it to be useful (e.g. dump the context being
@@ -1051,6 +1065,15 @@  static void i915_gem_record_rings(struct drm_device *dev,
 				rbuf = request->ctx->engine[ring->id].ringbuf;
 			else
 				rbuf = ring->default_context->engine[ring->id].ringbuf;
+
+			error->ring[i].csb_ptr = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+			error->ring[i].next_context_status_buffer = ring->next_context_status_buffer;
+			for (j = 0; j < GEN8_CSB_ENTRIES; j++) {
+				u32 status, id;
+				intel_lrc_get_context_status(ring, j, &status, &id);
+				error->ring[i].context_status[j] = ((__u64)id<<32)|(__u64)status;
+			}
+
 		} else
 			rbuf = ring->buffer;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 23839ff..a118146 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -496,9 +496,9 @@  static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 	return false;
 }
 
-static void get_context_status(struct intel_engine_cs *ring,
-			       u8 read_pointer,
-			       u32 *status, u32 *context_id)
+void intel_lrc_get_context_status(struct intel_engine_cs *ring,
+				  u8 read_pointer,
+				  u32 *status, u32 *context_id)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
@@ -537,8 +537,8 @@  void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 
 	while (read_pointer < write_pointer) {
 
-		get_context_status(ring, ++read_pointer % GEN8_CSB_ENTRIES,
-				   &status, &status_id);
+		intel_lrc_get_context_status(ring, ++read_pointer % GEN8_CSB_ENTRIES,
+					     &status, &status_id);
 
 		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
 			continue;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index de41ad6..82c87f9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -118,4 +118,8 @@  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 void intel_lrc_irq_handler(struct intel_engine_cs *ring);
 void intel_execlists_retire_requests(struct intel_engine_cs *ring);
 
+void intel_lrc_get_context_status(struct intel_engine_cs *ring,
+				  u8 read_pointer,
+				  u32 *status, u32 *context_id);
+
 #endif /* _INTEL_LRC_H_ */