diff mbox

drm/i915: Only apply the SNB pipe control w/a to gen6

Message ID 1342803748-25695-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 20, 2012, 5:02 p.m. UTC
The requirements for the sync flush to be emitted prior to the render
cache flush is only true for SandyBridge. On IvyBridge and friends we
can just emit the flushes with an inline CS stall.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |   33 +++++++++++++++++++------------
 1 file changed, 20 insertions(+), 13 deletions(-)

Comments

Ben Widawsky July 20, 2012, 9 p.m. UTC | #1
On Fri, 20 Jul 2012 18:02:28 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The requirements for the sync flush to be emitted prior to the render
> cache flush is only true for SandyBridge. On IvyBridge and friends we
> can just emit the flushes with an inline CS stall.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Ben Widawsky <ben@bwidawsk.net> [tested on IVB]
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   33
> +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 13
> deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
> b/drivers/gpu/drm/i915/intel_ringbuffer.c index b35a89a..42ad7ad
> 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -214,15 +214,8 @@ gen6_render_ring_flush(struct intel_ring_buffer
> *ring, u32 invalidate_domains, u32 flush_domains)
>  {
>  	u32 flags = 0;
> -	struct pipe_control *pc = ring->private;
> -	u32 scratch_addr = pc->gtt_offset + 128;
>  	int ret;
>  
> -	/* Force SNB workarounds for PIPE_CONTROL flushes */
> -	ret = intel_emit_post_sync_nonzero_flush(ring);
> -	if (ret)
> -		return ret;
> -
>  	/* Just flush everything.  Experiments have shown that
> reducing the
>  	 * number of bits based on the write domains has little
> performance
>  	 * impact.
> @@ -242,21 +235,33 @@ gen6_render_ring_flush(struct intel_ring_buffer
> *ring, if (flush_domains)
>  		flags |= PIPE_CONTROL_CS_STALL;
>  
> -	ret = intel_ring_begin(ring, 6);
> +	ret = intel_ring_begin(ring, 4);
>  	if (ret)
>  		return ret;
>  
> -	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
>  	intel_ring_emit(ring, flags);
> -	intel_ring_emit(ring, scratch_addr |
> PIPE_CONTROL_GLOBAL_GTT);
> -	intel_ring_emit(ring, 0); /* lower dword */
> -	intel_ring_emit(ring, 0); /* uppwer dword */
> -	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, 0);
>  	intel_ring_advance(ring);
>  
>  	return 0;
>  }
>  
> +static int
> +gen6_render_ring_flush__wa(struct intel_ring_buffer *ring,
> +			   u32 invalidate_domains, u32 flush_domains)
> +{
> +	int ret;
> +
> +	/* Force SNB workarounds for PIPE_CONTROL flushes */
> +	ret = intel_emit_post_sync_nonzero_flush(ring);
> +	if (ret)
> +		return ret;
> +
> +	return gen6_render_ring_flush(ring, invalidate_domains,
> flush_domains); +}
> +
>  static void ring_write_tail(struct intel_ring_buffer *ring,
>  			    u32 value)
>  {
> @@ -1374,6 +1379,8 @@ int intel_init_render_ring_buffer(struct
> drm_device *dev) if (INTEL_INFO(dev)->gen >= 6) {
>  		ring->add_request = gen6_add_request;
>  		ring->flush = gen6_render_ring_flush;
> +		if (INTEL_INFO(dev)->gen == 6)
> +			ring->flush = gen6_render_ring_flush__wa;
>  		ring->irq_get = gen6_ring_get_irq;
>  		ring->irq_put = gen6_ring_put_irq;
>  		ring->irq_enable_mask = GT_USER_INTERRUPT;
Daniel Vetter Aug. 8, 2012, 7:35 a.m. UTC | #2
On Fri, Jul 20, 2012 at 06:02:28PM +0100, Chris Wilson wrote:
> The requirements for the sync flush to be emitted prior to the render
> cache flush is only true for SandyBridge. On IvyBridge and friends we
> can just emit the flushes with an inline CS stall.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Since I've seen Ken ditch these w/a for ivb+ in mesa, I've figured that
this is ok. Some bspec reading seems to agree. Merged to dinq, thanks for
the patch.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b35a89a..42ad7ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -214,15 +214,8 @@  gen6_render_ring_flush(struct intel_ring_buffer *ring,
                          u32 invalidate_domains, u32 flush_domains)
 {
 	u32 flags = 0;
-	struct pipe_control *pc = ring->private;
-	u32 scratch_addr = pc->gtt_offset + 128;
 	int ret;
 
-	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(ring);
-	if (ret)
-		return ret;
-
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
@@ -242,21 +235,33 @@  gen6_render_ring_flush(struct intel_ring_buffer *ring,
 	if (flush_domains)
 		flags |= PIPE_CONTROL_CS_STALL;
 
-	ret = intel_ring_begin(ring, 6);
+	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
 	intel_ring_emit(ring, flags);
-	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, 0); /* lower dword */
-	intel_ring_emit(ring, 0); /* uppwer dword */
-	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
 	return 0;
 }
 
+static int
+gen6_render_ring_flush__wa(struct intel_ring_buffer *ring,
+			   u32 invalidate_domains, u32 flush_domains)
+{
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	ret = intel_emit_post_sync_nonzero_flush(ring);
+	if (ret)
+		return ret;
+
+	return gen6_render_ring_flush(ring, invalidate_domains, flush_domains);
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -1374,6 +1379,8 @@  int intel_init_render_ring_buffer(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
 		ring->flush = gen6_render_ring_flush;
+		if (INTEL_INFO(dev)->gen == 6)
+			ring->flush = gen6_render_ring_flush__wa;
 		ring->irq_get = gen6_ring_get_irq;
 		ring->irq_put = gen6_ring_put_irq;
 		ring->irq_enable_mask = GT_USER_INTERRUPT;