diff mbox

[4/5] drm/i915: Capture PPGTT info on error capture

Message ID 1390616265-4329-4-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Jan. 25, 2014, 2:17 a.m. UTC
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h       |  7 ++++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 41 +++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

Comments

Chris Wilson Jan. 26, 2014, 11:42 a.m. UTC | #1
On Fri, Jan 24, 2014 at 06:17:44PM -0800, Ben Widawsky wrote:
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  7 ++++++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 41 +++++++++++++++++++++++++++++++++++
>  2 files changed, 48 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6f68515..5105fd4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -359,6 +359,13 @@ struct drm_i915_error_state {
>  		s32 ring:4;
>  		u32 cache_level:3;
>  	} **active_bo, **pinned_bo;
> +	struct drm_i915_vm_info {
> +		u32 gfx_mode;
> +		union {
> +			u64 pdp[4];
> +			u32 pp_dir_base;
> +		};
> +	} vm_info[I915_NUM_RINGS];

Note for future janitorial work: let's coalesce all the per-ring
information into the ring error struct.

>  	u32 *active_bo_count, *pinned_bo_count;

For instance, I thought active_bo was already being tracked per-ring.
(Pinned bo is global since that exists more or less to make sure that
our registers are pointing into pinned objects.)

Do we also want to capture?
  GAC_ECO_BITS /* gen6,7 */
  GAM_ECOCHK /* gen6,7 */
  GAB_CTL /* gen6 */
  GFX_MODE /* gen6 */

The rest looks good.
-Chris
Ben Widawsky Jan. 26, 2014, 7:06 p.m. UTC | #2
On Sun, Jan 26, 2014 at 11:42:22AM +0000, Chris Wilson wrote:
> On Fri, Jan 24, 2014 at 06:17:44PM -0800, Ben Widawsky wrote:
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h       |  7 ++++++
> >  drivers/gpu/drm/i915/i915_gpu_error.c | 41 +++++++++++++++++++++++++++++++++++
> >  2 files changed, 48 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 6f68515..5105fd4 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -359,6 +359,13 @@ struct drm_i915_error_state {
> >  		s32 ring:4;
> >  		u32 cache_level:3;
> >  	} **active_bo, **pinned_bo;
> > +	struct drm_i915_vm_info {
> > +		u32 gfx_mode;
> > +		union {
> > +			u64 pdp[4];
> > +			u32 pp_dir_base;
> > +		};
> > +	} vm_info[I915_NUM_RINGS];
> 
> Note for future janitorial work: let's coalesce all the per-ring
> information into the ring error struct.
> 
> >  	u32 *active_bo_count, *pinned_bo_count;
> 
> For instance, I thought active_bo was already being tracked per-ring.
> (Pinned bo is global since that exists more or less to make sure that
> our registers are pointing into pinned objects.)
> 
> Do we also want to capture?
>   GAC_ECO_BITS /* gen6,7 */
>   GAM_ECOCHK /* gen6,7 */
>   GAB_CTL /* gen6 */
>   GFX_MODE /* gen6 */
> 
> The rest looks good.
> -Chris
> 

I agree. I was pretty unhappy too with how we've done things, but as
this information is immediately useful, I'd really like to not postpone.
Does "The rest looks good." mean reviewed-by?
Chris Wilson Jan. 26, 2014, 7:51 p.m. UTC | #3
On Sun, Jan 26, 2014 at 11:06:40AM -0800, Ben Widawsky wrote:
> On Sun, Jan 26, 2014 at 11:42:22AM +0000, Chris Wilson wrote:
> > On Fri, Jan 24, 2014 at 06:17:44PM -0800, Ben Widawsky wrote:
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h       |  7 ++++++
> > >  drivers/gpu/drm/i915/i915_gpu_error.c | 41 +++++++++++++++++++++++++++++++++++
> > >  2 files changed, 48 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 6f68515..5105fd4 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -359,6 +359,13 @@ struct drm_i915_error_state {
> > >  		s32 ring:4;
> > >  		u32 cache_level:3;
> > >  	} **active_bo, **pinned_bo;
> > > +	struct drm_i915_vm_info {
> > > +		u32 gfx_mode;
> > > +		union {
> > > +			u64 pdp[4];
> > > +			u32 pp_dir_base;
> > > +		};
> > > +	} vm_info[I915_NUM_RINGS];
> > 
> > Note for future janitorial work: let's coalesce all the per-ring
> > information into the ring error struct.
> > 
> > >  	u32 *active_bo_count, *pinned_bo_count;
> > 
> > For instance, I thought active_bo was already being tracked per-ring.
> > (Pinned bo is global since that exists more or less to make sure that
> > our registers are pointing into pinned objects.)
> > 
> > Do we also want to capture?
> >   GAC_ECO_BITS /* gen6,7 */
> >   GAM_ECOCHK /* gen6,7 */
> >   GAB_CTL /* gen6 */
> >   GFX_MODE /* gen6 */
> > 
> > The rest looks good.
> > -Chris
> > 
> 
> I agree. I was pretty unhappy too with how we've done things, but as
> this information is immediately useful, I'd really like to not postpone.
> Does "The rest looks good." mean reviewed-by?

Yes. If you spend the 2 minutes to add reviewed-by to the changelog, you
can also add the extra registers. ;-)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6f68515..5105fd4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -359,6 +359,13 @@  struct drm_i915_error_state {
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
+	struct drm_i915_vm_info {
+		u32 gfx_mode;
+		union {
+			u64 pdp[4];
+			u32 pp_dir_base;
+		};
+	} vm_info[I915_NUM_RINGS];
 	u32 *active_bo_count, *pinned_bo_count;
 	u32 vm_count;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f598829..76bb010 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -276,6 +276,22 @@  static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  hangcheck: %s [%d]\n",
 		   hangcheck_action_to_str(error->hangcheck_action[ring]),
 		   error->hangcheck_score[ring]);
+
+	if (USES_PPGTT(dev)) {
+		err_printf(m, "  GFX_MODE: 0x%08x\n",
+			   error->vm_info[ring].gfx_mode);
+
+		if (INTEL_INFO(dev)->gen >= 8) {
+			int i;
+			for (i = 0; i < 4; i++)
+				err_printf(m, "  PDP%d: 0x%016llx\n", i,
+					   error->vm_info[ring].pdp[i]);
+		} else {
+			err_printf(m, "  PP_DIR_BASE: 0x%08x\n",
+				   error->vm_info[ring].pp_dir_base);
+		}
+	}
+
 }
 
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -799,6 +815,31 @@  static void i915_record_ring_state(struct drm_device *dev,
 
 	error->hangcheck_score[ring->id] = ring->hangcheck.score;
 	error->hangcheck_action[ring->id] = ring->hangcheck.action;
+
+	if (USES_PPGTT(dev)) {
+		int i;
+
+		error->vm_info[ring->id].gfx_mode =
+			I915_READ(RING_MODE_GEN7(ring));
+
+		switch (INTEL_INFO(dev)->gen) {
+		case 8:
+			for (i = 0; i < 4; i++) {
+				error->vm_info[ring->id].pdp[i] =
+					I915_READ(GEN8_RING_PDP_UDW(ring, i));
+				error->vm_info[ring->id].pdp[i] <<= 32;
+				error->vm_info[ring->id].pdp[i] |=
+					I915_READ(GEN8_RING_PDP_LDW(ring, i));
+			}
+			break;
+		case 7:
+			error->vm_info[ring->id].pp_dir_base = RING_PP_DIR_BASE(ring);
+			break;
+		case 6:
+			error->vm_info[ring->id].pp_dir_base = RING_PP_DIR_BASE_READ(ring);
+			break;
+		}
+	}
 }