diff mbox

[v2] drm/i915: Make semaphore updates more precise

Message ID 1392150042-18837-1-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Feb. 11, 2014, 8:20 p.m. UTC
With the ring mask we now have an easy way to know the number of rings
in the system, and therefore can accurately predict the number of dwords
to emit for semaphore signalling. This was not possible (easily)
previously.

There should be no functional impact, simply fewer instructions emitted.

While we're here, simply do the round up to 2 instead of the fancier
rounding we did before, which rounding up per mbox, ie 4. This also
allows us to drop the unnecessary MI_NOOP, so not really 4, 3.

v2: Use 3 dwords instead of 4 (Ville)
Do the proper calculation to get the number of dwords to emit (Ville)
Conditionally set .sync_to when semaphores are enabled (Ville)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 55 ++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 25 deletions(-)

Comments

Ville Syrjälä Feb. 11, 2014, 8:53 p.m. UTC | #1
On Tue, Feb 11, 2014 at 12:20:42PM -0800, Ben Widawsky wrote:
> With the ring mask we now have an easy way to know the number of rings
> in the system, and therefore can accurately predict the number of dwords
> to emit for semaphore signalling. This was not possible (easily)
> previously.
> 
> There should be no functional impact, simply fewer instructions emitted.
> 
> While we're here, simply do the round up to 2 instead of the fancier
> rounding we did before, which rounding up per mbox, ie 4. This also
> allows us to drop the unnecessary MI_NOOP, so not really 4, 3.
> 
> v2: Use 3 dwords instead of 4 (Ville)
> Do the proper calculation to get the number of dwords to emit (Ville)
> Conditionally set .sync_to when semaphores are enabled (Ville)
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Yeah looks OK now. Well, assuming we don't keep going when we fail to
init one or more rings, because in that case the loop would fail to emit
all the dwords it was supposed to.

IIRC the rest of the patches looked good up to 05/11. So for patches
01-05:
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 55 ++++++++++++++++++---------------
>  1 file changed, 30 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 70f7190..483684f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -635,24 +635,19 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
>  static int gen6_signal(struct intel_ring_buffer *signaller,
>  		       unsigned int num_dwords)
>  {
> +#define MBOX_UPDATE_DWORDS 3
>  	struct drm_device *dev = signaller->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_ring_buffer *useless;
> -	int i, ret;
> +	int i, ret, num_rings;
>  
> -	/* NB: In order to be able to do semaphore MBOX updates for varying
> -	 * number of rings, it's easiest if we round up each individual update
> -	 * to a multiple of 2 (since ring updates must always be a multiple of
> -	 * 2) even though the actual update only requires 3 dwords.
> -	 */
> -#define MBOX_UPDATE_DWORDS 4
> -	if (i915_semaphore_is_enabled(dev))
> -		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
> +	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
> +	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
> +#undef MBOX_UPDATE_DWORDS
>  
>  	ret = intel_ring_begin(signaller, num_dwords);
>  	if (ret)
>  		return ret;
> -#undef MBOX_UPDATE_DWORDS
>  
>  	for_each_ring(useless, dev_priv, i) {
>  		u32 mbox_reg = signaller->semaphore.signal_mbox[i];
> @@ -660,15 +655,13 @@ static int gen6_signal(struct intel_ring_buffer *signaller,
>  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>  			intel_ring_emit(signaller, mbox_reg);
>  			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> -			intel_ring_emit(signaller, MI_NOOP);
> -		} else {
> -			intel_ring_emit(signaller, MI_NOOP);
> -			intel_ring_emit(signaller, MI_NOOP);
> -			intel_ring_emit(signaller, MI_NOOP);
> -			intel_ring_emit(signaller, MI_NOOP);
>  		}
>  	}
>  
> +	/* If num_dwords was rounded, make sure the tail pointer is correct */
> +	if (num_rings % 2 == 0)
> +		intel_ring_emit(signaller, MI_NOOP);
> +
>  	return 0;
>  }
>  
> @@ -686,7 +679,11 @@ gen6_add_request(struct intel_ring_buffer *ring)
>  {
>  	int ret;
>  
> -	ret = ring->semaphore.signal(ring, 4);
> +	if (ring->semaphore.signal)
> +		ret = ring->semaphore.signal(ring, 4);
> +	else
> +		ret = intel_ring_begin(ring, 4);
> +
>  	if (ret)
>  		return ret;
>  
> @@ -1880,8 +1877,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
>  		ring->get_seqno = gen6_ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
> -		ring->semaphore.sync_to = gen6_ring_sync;
> -		ring->semaphore.signal = gen6_signal;
> +		if (i915_semaphore_is_enabled(dev)) {
> +			ring->semaphore.sync_to = gen6_ring_sync;
> +			ring->semaphore.signal = gen6_signal;
> +		}
>  		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
>  		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_RV;
>  		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_RB;
> @@ -2057,8 +2056,10 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
>  			ring->dispatch_execbuffer =
>  				gen6_ring_dispatch_execbuffer;
>  		}
> -		ring->semaphore.sync_to = gen6_ring_sync;
> -		ring->semaphore.signal = gen6_signal;
> +		if (i915_semaphore_is_enabled(dev)) {
> +			ring->semaphore.sync_to = gen6_ring_sync;
> +			ring->semaphore.signal = gen6_signal;
> +		}
>  		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
>  		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
>  		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
> @@ -2115,8 +2116,10 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
>  		ring->irq_put = gen6_ring_put_irq;
>  		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
>  	}
> -	ring->semaphore.sync_to = gen6_ring_sync;
> -	ring->semaphore.signal = gen6_signal;
> +	if (i915_semaphore_is_enabled(dev)) {
> +		ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +	}
>  	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
>  	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
>  	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> @@ -2157,8 +2160,10 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
>  		ring->irq_put = hsw_vebox_put_irq;
>  		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
>  	}
> -	ring->semaphore.sync_to = gen6_ring_sync;
> -	ring->semaphore.signal = gen6_signal;
> +	if (i915_semaphore_is_enabled(dev)) {
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +		ring->semaphore.signal = gen6_signal;
> +	}
>  	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
>  	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
>  	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
> -- 
> 1.8.5.4
Ben Widawsky Feb. 11, 2014, 9:50 p.m. UTC | #2
On Tue, Feb 11, 2014 at 10:53:40PM +0200, Ville Syrjälä wrote:
> On Tue, Feb 11, 2014 at 12:20:42PM -0800, Ben Widawsky wrote:
> > With the ring mask we now have an easy way to know the number of rings
> > in the system, and therefore can accurately predict the number of dwords
> > to emit for semaphore signalling. This was not possible (easily)
> > previously.
> > 
> > There should be no functional impact, simply fewer instructions emitted.
> > 
> > While we're here, simply do the round up to 2 instead of the fancier
> > rounding we did before, which rounding up per mbox, ie 4. This also
> > allows us to drop the unnecessary MI_NOOP, so not really 4, 3.
> > 
> > v2: Use 3 dwords instead of 4 (Ville)
> > Do the proper calculation to get the number of dwords to emit (Ville)
> > Conditionally set .sync_to when semaphores are enabled (Ville)
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> Yeah looks OK now. Well, assuming we don't keep going when we fail to
> init one or more rings, because in that case the loop would fail to emit
> all the dwords it was supposed to.
> 

Yeah. I don't think this is ever the behavior we should aim form.
Though more generally though I feel our code chickens out too often. If
the HW is supposed to support it, I'd rather get a bug report than try
to limp along.

> IIRC the rest of the patches looked good up to 05/11. So for patches
> 01-05:
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Thanks.

> 
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 55 ++++++++++++++++++---------------
> >  1 file changed, 30 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 70f7190..483684f 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -635,24 +635,19 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
> >  static int gen6_signal(struct intel_ring_buffer *signaller,
> >  		       unsigned int num_dwords)
> >  {
> > +#define MBOX_UPDATE_DWORDS 3
> >  	struct drm_device *dev = signaller->dev;
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct intel_ring_buffer *useless;
> > -	int i, ret;
> > +	int i, ret, num_rings;
> >  
> > -	/* NB: In order to be able to do semaphore MBOX updates for varying
> > -	 * number of rings, it's easiest if we round up each individual update
> > -	 * to a multiple of 2 (since ring updates must always be a multiple of
> > -	 * 2) even though the actual update only requires 3 dwords.
> > -	 */
> > -#define MBOX_UPDATE_DWORDS 4
> > -	if (i915_semaphore_is_enabled(dev))
> > -		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
> > +	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
> > +	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
> > +#undef MBOX_UPDATE_DWORDS
> >  
> >  	ret = intel_ring_begin(signaller, num_dwords);
> >  	if (ret)
> >  		return ret;
> > -#undef MBOX_UPDATE_DWORDS
> >  
> >  	for_each_ring(useless, dev_priv, i) {
> >  		u32 mbox_reg = signaller->semaphore.signal_mbox[i];
> > @@ -660,15 +655,13 @@ static int gen6_signal(struct intel_ring_buffer *signaller,
> >  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
> >  			intel_ring_emit(signaller, mbox_reg);
> >  			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> > -			intel_ring_emit(signaller, MI_NOOP);
> > -		} else {
> > -			intel_ring_emit(signaller, MI_NOOP);
> > -			intel_ring_emit(signaller, MI_NOOP);
> > -			intel_ring_emit(signaller, MI_NOOP);
> > -			intel_ring_emit(signaller, MI_NOOP);
> >  		}
> >  	}
> >  
> > +	/* If num_dwords was rounded, make sure the tail pointer is correct */
> > +	if (num_rings % 2 == 0)
> > +		intel_ring_emit(signaller, MI_NOOP);
> > +
> >  	return 0;
> >  }
> >  
> > @@ -686,7 +679,11 @@ gen6_add_request(struct intel_ring_buffer *ring)
> >  {
> >  	int ret;
> >  
> > -	ret = ring->semaphore.signal(ring, 4);
> > +	if (ring->semaphore.signal)
> > +		ret = ring->semaphore.signal(ring, 4);
> > +	else
> > +		ret = intel_ring_begin(ring, 4);
> > +
> >  	if (ret)
> >  		return ret;
> >  
> > @@ -1880,8 +1877,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
> >  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> >  		ring->get_seqno = gen6_ring_get_seqno;
> >  		ring->set_seqno = ring_set_seqno;
> > -		ring->semaphore.sync_to = gen6_ring_sync;
> > -		ring->semaphore.signal = gen6_signal;
> > +		if (i915_semaphore_is_enabled(dev)) {
> > +			ring->semaphore.sync_to = gen6_ring_sync;
> > +			ring->semaphore.signal = gen6_signal;
> > +		}
> >  		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
> >  		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_RV;
> >  		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_RB;
> > @@ -2057,8 +2056,10 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
> >  			ring->dispatch_execbuffer =
> >  				gen6_ring_dispatch_execbuffer;
> >  		}
> > -		ring->semaphore.sync_to = gen6_ring_sync;
> > -		ring->semaphore.signal = gen6_signal;
> > +		if (i915_semaphore_is_enabled(dev)) {
> > +			ring->semaphore.sync_to = gen6_ring_sync;
> > +			ring->semaphore.signal = gen6_signal;
> > +		}
> >  		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
> >  		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> >  		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
> > @@ -2115,8 +2116,10 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
> >  		ring->irq_put = gen6_ring_put_irq;
> >  		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
> >  	}
> > -	ring->semaphore.sync_to = gen6_ring_sync;
> > -	ring->semaphore.signal = gen6_signal;
> > +	if (i915_semaphore_is_enabled(dev)) {
> > +		ring->semaphore.signal = gen6_signal;
> > +		ring->semaphore.sync_to = gen6_ring_sync;
> > +	}
> >  	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
> >  	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
> >  	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> > @@ -2157,8 +2160,10 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
> >  		ring->irq_put = hsw_vebox_put_irq;
> >  		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
> >  	}
> > -	ring->semaphore.sync_to = gen6_ring_sync;
> > -	ring->semaphore.signal = gen6_signal;
> > +	if (i915_semaphore_is_enabled(dev)) {
> > +		ring->semaphore.sync_to = gen6_ring_sync;
> > +		ring->semaphore.signal = gen6_signal;
> > +	}
> >  	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
> >  	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
> >  	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
> > -- 
> > 1.8.5.4
> 
> -- 
> Ville Syrjälä
> Intel OTC
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 70f7190..483684f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -635,24 +635,19 @@  static void render_ring_cleanup(struct intel_ring_buffer *ring)
 static int gen6_signal(struct intel_ring_buffer *signaller,
 		       unsigned int num_dwords)
 {
+#define MBOX_UPDATE_DWORDS 3
 	struct drm_device *dev = signaller->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *useless;
-	int i, ret;
+	int i, ret, num_rings;
 
-	/* NB: In order to be able to do semaphore MBOX updates for varying
-	 * number of rings, it's easiest if we round up each individual update
-	 * to a multiple of 2 (since ring updates must always be a multiple of
-	 * 2) even though the actual update only requires 3 dwords.
-	 */
-#define MBOX_UPDATE_DWORDS 4
-	if (i915_semaphore_is_enabled(dev))
-		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
+	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
+	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
+#undef MBOX_UPDATE_DWORDS
 
 	ret = intel_ring_begin(signaller, num_dwords);
 	if (ret)
 		return ret;
-#undef MBOX_UPDATE_DWORDS
 
 	for_each_ring(useless, dev_priv, i) {
 		u32 mbox_reg = signaller->semaphore.signal_mbox[i];
@@ -660,15 +655,13 @@  static int gen6_signal(struct intel_ring_buffer *signaller,
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit(signaller, mbox_reg);
 			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
-			intel_ring_emit(signaller, MI_NOOP);
-		} else {
-			intel_ring_emit(signaller, MI_NOOP);
-			intel_ring_emit(signaller, MI_NOOP);
-			intel_ring_emit(signaller, MI_NOOP);
-			intel_ring_emit(signaller, MI_NOOP);
 		}
 	}
 
+	/* If num_dwords was rounded, make sure the tail pointer is correct */
+	if (num_rings % 2 == 0)
+		intel_ring_emit(signaller, MI_NOOP);
+
 	return 0;
 }
 
@@ -686,7 +679,11 @@  gen6_add_request(struct intel_ring_buffer *ring)
 {
 	int ret;
 
-	ret = ring->semaphore.signal(ring, 4);
+	if (ring->semaphore.signal)
+		ret = ring->semaphore.signal(ring, 4);
+	else
+		ret = intel_ring_begin(ring, 4);
+
 	if (ret)
 		return ret;
 
@@ -1880,8 +1877,10 @@  int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
-		ring->semaphore.sync_to = gen6_ring_sync;
-		ring->semaphore.signal = gen6_signal;
+		if (i915_semaphore_is_enabled(dev)) {
+			ring->semaphore.sync_to = gen6_ring_sync;
+			ring->semaphore.signal = gen6_signal;
+		}
 		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
 		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_RV;
 		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_RB;
@@ -2057,8 +2056,10 @@  int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->dispatch_execbuffer =
 				gen6_ring_dispatch_execbuffer;
 		}
-		ring->semaphore.sync_to = gen6_ring_sync;
-		ring->semaphore.signal = gen6_signal;
+		if (i915_semaphore_is_enabled(dev)) {
+			ring->semaphore.sync_to = gen6_ring_sync;
+			ring->semaphore.signal = gen6_signal;
+		}
 		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
 		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
 		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
@@ -2115,8 +2116,10 @@  int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_put = gen6_ring_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->semaphore.sync_to = gen6_ring_sync;
-	ring->semaphore.signal = gen6_signal;
+	if (i915_semaphore_is_enabled(dev)) {
+		ring->semaphore.signal = gen6_signal;
+		ring->semaphore.sync_to = gen6_ring_sync;
+	}
 	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
 	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
 	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
@@ -2157,8 +2160,10 @@  int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_put = hsw_vebox_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->semaphore.sync_to = gen6_ring_sync;
-	ring->semaphore.signal = gen6_signal;
+	if (i915_semaphore_is_enabled(dev)) {
+		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.signal = gen6_signal;
+	}
 	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
 	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
 	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;