diff mbox series

[11/14] drm/i915: Drop the shadow ring state from the error capture

Message ID 20200109085839.873553-11-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/14] drm/i915/gt: Push context state allocation earlier | expand

Commit Message

Chris Wilson Jan. 9, 2020, 8:58 a.m. UTC
The shadow ring regs (ring->head, ring->tail) are meaningless in the
post-mortem dump as they do not related to anything on HW. Remove them
from the coredump.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 5 -----
 drivers/gpu/drm/i915/i915_gpu_error.h | 4 ----
 2 files changed, 9 deletions(-)

Comments

Mika Kuoppala Jan. 9, 2020, 9:04 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> The shadow ring regs (ring->head, ring->tail) are meaningless in the
> post-mortem dump as they do not related to anything on HW. Remove them
> from the coredump.

We have been dumping these just to check that our bookkeepping matches?
-Mika

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 5 -----
>  drivers/gpu/drm/i915/i915_gpu_error.h | 4 ----
>  2 files changed, 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b45c128d0a17..a35aad439281 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -553,8 +553,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
>  				   ee->vm_info.pp_dir_base);
>  		}
>  	}
> -	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
> -	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
>  	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
>  
>  	for (n = 0; n < ee->num_ports; n++) {
> @@ -1428,9 +1426,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
>  	if (HAS_BROKEN_CS_TLB(rq->i915))
>  		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
>  
> -	ee->cpu_ring_head = rq->ring->head;
> -	ee->cpu_ring_tail = rq->ring->tail;
> -
>  	ee->rq_head = rq->head;
>  	ee->rq_post = rq->postfix;
>  	ee->rq_tail = rq->tail;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 0df9d8c32056..8f4579d64d8c 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -66,10 +66,6 @@ struct intel_engine_coredump {
>  	/* position of active request inside the ring */
>  	u32 rq_head, rq_post, rq_tail;
>  
> -	/* our own tracking of ring head and tail */
> -	u32 cpu_ring_head;
> -	u32 cpu_ring_tail;
> -
>  	/* Register state */
>  	u32 ccid;
>  	u32 start;
> -- 
> 2.25.0.rc1
Chris Wilson Jan. 9, 2020, 9:14 a.m. UTC | #2
Quoting Mika Kuoppala (2020-01-09 09:04:31)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > The shadow ring regs (ring->head, ring->tail) are meaningless in the
> > post-mortem dump as they do not related to anything on HW. Remove them
> > from the coredump.
> 
> We have been dumping these just to check that our bookkeepping matches?

Kind off, but they never really match since the ring->head is very lazy,
and ring->tail is wherever the user got up to. We have the relevant
information from the request where we expect to be in the ring for the
error, and the HW tells us where it was executing.
-Chris
Mika Kuoppala Jan. 10, 2020, 10:41 a.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2020-01-09 09:04:31)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > The shadow ring regs (ring->head, ring->tail) are meaningless in the
>> > post-mortem dump as they do not related to anything on HW. Remove them
>> > from the coredump.
>> 
>> We have been dumping these just to check that our bookkeepping matches?
>
> Kind off, but they never really match since the ring->head is very lazy,
> and ring->tail is wherever the user got up to. We have the relevant
> information from the request where we expect to be in the ring for the
> error, and the HW tells us where it was executing.
> -Chris

Ok, based on that and that  don't remember a single case where,
from external reports, these would have provided anything of value.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b45c128d0a17..a35aad439281 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -553,8 +553,6 @@  static void error_print_engine(struct drm_i915_error_state_buf *m,
 				   ee->vm_info.pp_dir_base);
 		}
 	}
-	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
-	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
@@ -1428,9 +1426,6 @@  intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 	if (HAS_BROKEN_CS_TLB(rq->i915))
 		vma = capture_vma(vma, ee->engine->gt->scratch, "WA batch", gfp);
 
-	ee->cpu_ring_head = rq->ring->head;
-	ee->cpu_ring_tail = rq->ring->tail;
-
 	ee->rq_head = rq->head;
 	ee->rq_post = rq->postfix;
 	ee->rq_tail = rq->tail;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 0df9d8c32056..8f4579d64d8c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -66,10 +66,6 @@  struct intel_engine_coredump {
 	/* position of active request inside the ring */
 	u32 rq_head, rq_post, rq_tail;
 
-	/* our own tracking of ring head and tail */
-	u32 cpu_ring_head;
-	u32 cpu_ring_tail;
-
 	/* Register state */
 	u32 ccid;
 	u32 start;