[6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
diff mbox

Message ID 1350583639-773-6-git-send-email-jbarnes@virtuousgeek.org
State New, archived
Headers show

Commit Message

Jesse Barnes Oct. 18, 2012, 6:07 p.m. UTC
So store into the scratch space of the HWS to make sure the invalidate
occurs.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h         |    6 ++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 3 files changed, 23 insertions(+), 6 deletions(-)

Comments

Chris Wilson Oct. 23, 2012, 11:22 a.m. UTC | #1
On Thu, 18 Oct 2012 13:07:17 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> So store into the scratch space of the HWS to make sure the invalidate
> occurs.

Whoops, instant hang. Probably doesn't agree with being called FLUSH_SW
and not FLUSH_DW! ;-)

> +	/*
> +	 * Bspec vol 1c.5 - video engine command streamer:
> +	 * "If ENABLED, all TLBs will be invalidated once the flush
> +	 * operation is complete. This bit is only valid when the
> +	 * Post-Sync Operation field is a value of 1h or 3h."
> +	 */
>  	if (invalidate & I915_GEM_GPU_DOMAINS)
> -		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
> +		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
> +			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
>  	intel_ring_emit(ring, cmd);
> -	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
And here is where the error lies. Perhaps this would be clearer if you
do:

#define MI_FLUSH_DW_USE_PPGTT 0
#define MI_FLUSH_DW_USE_GTT (1<<2)

#define I915_GEM_HWS_SCRATCH_INDEX 0x30
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)

Then:
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);

Hangs begone!
-Chris
Jesse Barnes Oct. 23, 2012, 2:28 p.m. UTC | #2
On Tue, 23 Oct 2012 12:22:16 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Thu, 18 Oct 2012 13:07:17 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > So store into the scratch space of the HWS to make sure the invalidate
> > occurs.
> 
> Whoops, instant hang. Probably doesn't agree with being called FLUSH_SW
> and not FLUSH_DW! ;-)
> 
> > +	/*
> > +	 * Bspec vol 1c.5 - video engine command streamer:
> > +	 * "If ENABLED, all TLBs will be invalidated once the flush
> > +	 * operation is complete. This bit is only valid when the
> > +	 * Post-Sync Operation field is a value of 1h or 3h."
> > +	 */
> >  	if (invalidate & I915_GEM_GPU_DOMAINS)
> > -		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
> > +		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
> > +			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
> >  	intel_ring_emit(ring, cmd);
> > -	intel_ring_emit(ring, 0);
> > +	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
> And here is where the error lies. Perhaps this would be clearer if you
> do:
> 
> #define MI_FLUSH_DW_USE_PPGTT 0
> #define MI_FLUSH_DW_USE_GTT (1<<2)
> 
> #define I915_GEM_HWS_SCRATCH_INDEX 0x30
> #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
> 
> Then:
> 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
> 
> Hangs begone!

Ah cool, was hoping it was something simple.  Damn PPGTT vs GTT always
gets us.

I'll respin with the change.

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 34067b5..c6f63a4 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -241,8 +241,10 @@ 
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x-1)
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
-#define   MI_INVALIDATE_TLB	(1<<18)
-#define   MI_INVALIDATE_BSD	(1<<7)
+#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
+#define   MI_INVALIDATE_TLB		(1<<18)
+#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
+#define   MI_INVALIDATE_BSD		(1<<7)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6c6f95a..e7daa90 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1396,10 +1396,17 @@  static int gen6_ring_flush(struct intel_ring_buffer *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+	/*
+	 * Bspec vol 1c.5 - video engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
 	if (invalidate & I915_GEM_GPU_DOMAINS)
-		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -1461,10 +1468,17 @@  static int blt_ring_flush(struct intel_ring_buffer *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+	/*
+	 * Bspec vol 1c.3 - blitter engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
 	if (invalidate & I915_GEM_DOMAIN_RENDER)
-		cmd |= MI_INVALIDATE_TLB;
+		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+			MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3745d1d..d089520 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -183,6 +183,7 @@  intel_read_status_page(struct intel_ring_buffer *ring,
  * The area from dword 0x20 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x20
+#define I915_GEM_HWS_SCRATCH_INDEX	0x28
 
 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);