diff mbox

[CI,1/3] drm/i915: Avoid the branch in computing intel_ring_space()

Message ID 20170504130846.4807-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 4, 2017, 1:08 p.m. UTC
Exploit the power-of-two ring size to compute the space across the
wraparound using a mask rather than a if. Convert to unsigned integers
so the operation is well defined.

References: https://bugs.freedesktop.org/show_bug.cgi?id=99671
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 25 deletions(-)

Comments

Michal Wajdeczko May 4, 2017, 2:17 p.m. UTC | #1
On Thu, May 04, 2017 at 02:08:44PM +0100, Chris Wilson wrote:
> Exploit the power-of-two ring size to compute the space across the
> wraparound using a mask rather than a if. Convert to unsigned integers
> so the operation is well defined.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=99671
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++++++++++++++++++++-------------
>  2 files changed, 34 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 3ce1c87dec46..e7ef04cc071b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -39,12 +39,16 @@
>   */
>  #define LEGACY_REQUEST_SIZE 200
>  
> -static int __intel_ring_space(int head, int tail, int size)
> +static unsigned int __intel_ring_space(unsigned int head,
> +				       unsigned int tail,
> +				       unsigned int size)
>  {
> -	int space = head - tail;
> -	if (space <= 0)
> -		space += size;
> -	return space - I915_RING_FREE_SPACE;
> +	/*
> +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> +	 * same cacheline, the Head Pointer must not be greater than the Tail
> +	 * Pointer."
> +	 */
> +	return (head - tail - CACHELINE_BYTES) & (size - 1);

Btw, as you exploit power-of-two ring size here, maybe it is worth to repeat

	GEM_BUG_ON(!is_power_of_2(size));

to emphase this assumption in the code (not only in the commit message)?

-Michal
Chris Wilson May 4, 2017, 2:39 p.m. UTC | #2
On Thu, May 04, 2017 at 04:17:13PM +0200, Michal Wajdeczko wrote:
> On Thu, May 04, 2017 at 02:08:44PM +0100, Chris Wilson wrote:
> > Exploit the power-of-two ring size to compute the space across the
> > wraparound using a mask rather than a if. Convert to unsigned integers
> > so the operation is well defined.
> > 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=99671
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++----------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++++++++++++++++++++-------------
> >  2 files changed, 34 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 3ce1c87dec46..e7ef04cc071b 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -39,12 +39,16 @@
> >   */
> >  #define LEGACY_REQUEST_SIZE 200
> >  
> > -static int __intel_ring_space(int head, int tail, int size)
> > +static unsigned int __intel_ring_space(unsigned int head,
> > +				       unsigned int tail,
> > +				       unsigned int size)
> >  {
> > -	int space = head - tail;
> > -	if (space <= 0)
> > -		space += size;
> > -	return space - I915_RING_FREE_SPACE;
> > +	/*
> > +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> > +	 * same cacheline, the Head Pointer must not be greater than the Tail
> > +	 * Pointer."
> > +	 */
> > +	return (head - tail - CACHELINE_BYTES) & (size - 1);
> 
> Btw, as you exploit power-of-two ring size here, maybe it is worth to repeat
> 
> 	GEM_BUG_ON(!is_power_of_2(size));
> 
> to emphase this assumption in the code (not only in the commit message)?

I did check we had an is_power_of_2() check in intel_engine_create_ring.
Might be worth asserting here as well as there's a little disconnect
between the function and ring->size.
-Chris
Chris Wilson May 4, 2017, 2:54 p.m. UTC | #3
On Thu, May 04, 2017 at 04:17:13PM +0200, Michal Wajdeczko wrote:
> On Thu, May 04, 2017 at 02:08:44PM +0100, Chris Wilson wrote:
> > Exploit the power-of-two ring size to compute the space across the
> > wraparound using a mask rather than a if. Convert to unsigned integers
> > so the operation is well defined.
> > 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=99671
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++----------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++++++++++++++++++++-------------
> >  2 files changed, 34 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index 3ce1c87dec46..e7ef04cc071b 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -39,12 +39,16 @@
> >   */
> >  #define LEGACY_REQUEST_SIZE 200
> >  
> > -static int __intel_ring_space(int head, int tail, int size)
> > +static unsigned int __intel_ring_space(unsigned int head,
> > +				       unsigned int tail,
> > +				       unsigned int size)
> >  {
> > -	int space = head - tail;
> > -	if (space <= 0)
> > -		space += size;
> > -	return space - I915_RING_FREE_SPACE;
> > +	/*
> > +	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> > +	 * same cacheline, the Head Pointer must not be greater than the Tail
> > +	 * Pointer."
> > +	 */
> > +	return (head - tail - CACHELINE_BYTES) & (size - 1);
> 
> Btw, as you exploit power-of-two ring size here, maybe it is worth to repeat
> 
> 	GEM_BUG_ON(!is_power_of_2(size));
> 
> to emphase this assumption in the code (not only in the commit message)?

I've made the cardinal sin of changing it at the last moment, if I've
broken everything I'm going to blame you :)

Semi-pushed, looks like we're already back in conflict territory.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3ce1c87dec46..e7ef04cc071b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -39,12 +39,16 @@ 
  */
 #define LEGACY_REQUEST_SIZE 200
 
-static int __intel_ring_space(int head, int tail, int size)
+static unsigned int __intel_ring_space(unsigned int head,
+				       unsigned int tail,
+				       unsigned int size)
 {
-	int space = head - tail;
-	if (space <= 0)
-		space += size;
-	return space - I915_RING_FREE_SPACE;
+	/*
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 */
+	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
 void intel_ring_update_space(struct intel_ring *ring)
@@ -1670,12 +1674,9 @@  static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	GEM_BUG_ON(!req->reserved_space);
 
 	list_for_each_entry(target, &ring->request_list, ring_link) {
-		unsigned space;
-
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->emit,
-					   ring->size);
-		if (space >= bytes)
+		if (bytes <= __intel_ring_space(target->postfix,
+						ring->emit, ring->size))
 			break;
 	}
 
@@ -1744,11 +1745,11 @@  u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 	}
 
 	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	GEM_BUG_ON(ring->space < bytes);
 	cs = ring->vaddr + ring->emit;
 	GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
 	ring->emit += bytes;
 	ring->space -= bytes;
-	GEM_BUG_ON(ring->space < 0);
 
 	return cs;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 600713b29d79..650ab884d6c8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -17,17 +17,6 @@ 
 #define CACHELINE_BYTES 64
 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
 
-/*
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
- * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
- * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
- *
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
- * cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
-#define I915_RING_FREE_SPACE 64
-
 struct intel_hw_status_page {
 	struct i915_vma *vma;
 	u32 *page_addr;
@@ -145,9 +134,9 @@  struct intel_ring {
 	u32 tail;
 	u32 emit;
 
-	int space;
-	int size;
-	int effective_size;
+	u32 space;
+	u32 size;
+	u32 effective_size;
 };
 
 struct i915_gem_context;
@@ -548,6 +537,25 @@  assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	 */
 	GEM_BUG_ON(!IS_ALIGNED(tail, 8));
 	GEM_BUG_ON(tail >= ring->size);
+
+	/*
+	 * "Ring Buffer Use"
+	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 *
+	 * We use ring->head as the last known location of the actual RING_HEAD,
+	 * it may have advanced but in the worst case it is equally the same
+	 * as ring->head and so we should never program RING_TAIL to advance
+	 * into the same cacheline as ring->head.
+	 */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+		   tail < ring->head);
+#undef cacheline
 }
 
 static inline unsigned int