diff mbox

[1/2] drm/i915: Dumb down the semaphore logic

Message ID 1315437162-14312-1-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Sept. 7, 2011, 11:12 p.m. UTC
While I think the previous code is correct, it was hard to follow and
hard to debug. Since we already have a ring abstraction, might as well
use it to handle the semaphore updates and compares.

I don't expect this code to make semaphores better or worse, but you
never know...

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Eric Anholt <eric@anholt.net>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
 drivers/gpu/drm/i915/i915_reg.h            |    7 +
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  176 +++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    7 +-
 4 files changed, 145 insertions(+), 48 deletions(-)

Comments

Andrew Lutomirski Sept. 8, 2011, 12:30 a.m. UTC | #1
On Wed, Sep 7, 2011 at 4:12 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
> While I think the previous code is correct, it was hard to follow and
> hard to debug. Since we already have a ring abstraction, might as well
> use it to handle the semaphore updates and compares.
>
> I don't expect this code to make semaphores better or worse, but you
> never know...
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Eric Anholt <eric@anholt.net>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
>  drivers/gpu/drm/i915/i915_reg.h            |    7 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  176 +++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |    7 +-
>  4 files changed, 145 insertions(+), 48 deletions(-)
>

Sadly, it still instantly crashes.

--Andy
Ben Widawsky Sept. 8, 2011, 1:19 a.m. UTC | #2
On Sep 7, 2011, at 5:30 PM, Andrew Lutomirski <luto@mit.edu> wrote:

> On Wed, Sep 7, 2011 at 4:12 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
>> While I think the previous code is correct, it was hard to follow and
>> hard to debug. Since we already have a ring abstraction, might as well
>> use it to handle the semaphore updates and compares.
>> 
>> I don't expect this code to make semaphores better or worse, but you
>> never know...
>> 
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Cc: Eric Anholt <eric@anholt.net>
>> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
>>  drivers/gpu/drm/i915/i915_reg.h            |    7 +
>>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  176 +++++++++++++++++++++-------
>>  drivers/gpu/drm/i915/intel_ringbuffer.h    |    7 +-
>>  4 files changed, 145 insertions(+), 48 deletions(-)
>> 
> 
> Sadly, it still instantly crashes.
> 
> --Andy

Remind me again... Does ssh still work?
Andrew Lutomirski Sept. 8, 2011, 1:22 a.m. UTC | #3
On Wed, Sep 7, 2011 at 6:19 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
>
>
> On Sep 7, 2011, at 5:30 PM, Andrew Lutomirski <luto@mit.edu> wrote:
>
>> On Wed, Sep 7, 2011 at 4:12 PM, Ben Widawsky <ben@bwidawsk.net> wrote:
>>> While I think the previous code is correct, it was hard to follow and
>>> hard to debug. Since we already have a ring abstraction, might as well
>>> use it to handle the semaphore updates and compares.
>>>
>>> I don't expect this code to make semaphores better or worse, but you
>>> never know...
>>>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Cc: Eric Anholt <eric@anholt.net>
>>> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
>>> ---
>>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
>>>  drivers/gpu/drm/i915/i915_reg.h            |    7 +
>>>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  176 +++++++++++++++++++++-------
>>>  drivers/gpu/drm/i915/intel_ringbuffer.h    |    7 +-
>>>  4 files changed, 145 insertions(+), 48 deletions(-)
>>>
>>
>> Sadly, it still instantly crashes.
>>
>> --Andy
>
> Remind me again... Does ssh still work?

I haven't tried, but I'd be surprised.  The *reset* button (the
hardware one that's attached to the motherboard) doesn't work.

--Andy
Keith Packard Sept. 8, 2011, 4:31 a.m. UTC | #4
On Wed,  7 Sep 2011 16:12:41 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:

> -update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
> +update_mboxes(struct intel_ring_buffer *ring,
> +	    u32 seqno,
> +	    u32 mmio_offset)

Yeah, definitely like this change; lots less magic here.

> -static int
> +/**
> + * gen6_add_request - Update the semaphore mailbox registers
> + * 
> + * @ring - ring that is adding a request
> + * @mbox1_reg - mailbox address for RCS or VCS ring
> + * @mbox2_reg - mailbox address for VCS or BCS ring
> + *
> + * Update the mailbox registers in the *other* rings with the current seqno.
> + * This acts like a signal in the canonical semaphore.
> + */
> +static u32
>  gen6_add_request(struct intel_ring_buffer *ring,
> -		 u32 *result)
> +		 u32 mbox1_reg,
> +		 u32 mbox2_reg)

I think you're losing the ability to return errors from here.

>  	u32 seqno;
>  	int ret;
> +	seqno = i915_gem_get_seqno(ring->dev);
>  
>  	ret = intel_ring_begin(ring, 10);
>  	if (ret)
>  		return ret;
>  
> -	seqno = i915_gem_get_seqno(ring->dev);


Why change the ordering of get_seqno relative to ring_begin here?

> +static int
> +gen6_blt_add_request(struct intel_ring_buffer *ring,
> +		     u32 *result)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	*result = gen6_add_request(ring,
> +				   dev_priv->ring[RCS].mmio_base + 0x44,
> +				   dev_priv->ring[VCS].mmio_base + 0x40);
>  	return 0;

Why the magic constants? Can we have named values? And, note that this
function never returns an error value, which is definitely not a good plan.

> +	temp |= MI_SEMAPHORE_REGISTER;

temp is a constant, why is it being |='d here?

> +/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
> +int
> +render_ring_sync_to(struct intel_ring_buffer *waiter,
> +		    struct intel_ring_buffer *signaller,
> +		    u32 seqno)
> +{
> +	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
> +	return intel_ring_sync(waiter,
> +			       signaller,
> +			       signaller->semaphore_register[RCS],

Should you just pass the index instead of the register value itself?

Otherwise, this seems like a reasonable change to me.
Daniel Vetter Sept. 8, 2011, 7:52 a.m. UTC | #5
On Wed, Sep 07, 2011 at 04:12:41PM -0700, Ben Widawsky wrote:
> While I think the previous code is correct, it was hard to follow and
> hard to debug. Since we already have a ring abstraction, might as well
> use it to handle the semaphore updates and compares.
> 
> I don't expect this code to make semaphores better or worse, but you
> never know...

I kinda start to like this ;-)

While you stare at this, two things I'm pondering:
- Would it make sense to also move the !semaphores ring_sync_to case into
  the ringbuffer abstraction?
- Can we have a basic testcase for the magic values (and semaphores in
  general), please? I'm thinking of submitting a batchbuffer with a dummy
  r/w render relocation (hide it e.g. in the val field of
  MI_FLUSH_DW/PIPE_CONTROL without setting the write post-sync-op), and
  then randomly moving around the buffer to another ring. That should
  decently exercise the semaphores code in the kernel and sync hw in the
  gpu without (hopefully) anything else interfering.

-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4934cf8..3693e83 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -784,7 +784,8 @@  i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
 	}
 
 	from->sync_seqno[idx] = seqno;
-	return intel_ring_sync(to, from, seqno - 1);
+
+	return to->sync_to(to, from, seqno - 1);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 542453f..f0b5287 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -194,6 +194,13 @@ 
 #define  MI_SEMAPHORE_UPDATE	    (1<<21)
 #define  MI_SEMAPHORE_COMPARE	    (1<<20)
 #define  MI_SEMAPHORE_REGISTER	    (1<<18)
+#define  MI_SEMAPHORE_SYNC_RV	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_RB	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_VR	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_VB	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_BR	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_BV	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_INVALID  (1<<0)
 /*
  * 3D instructions used by the kernel
  */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c30626e..8f902a1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -315,79 +315,155 @@  static void render_ring_cleanup(struct intel_ring_buffer *ring)
 }
 
 static void
-update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
+update_mboxes(struct intel_ring_buffer *ring,
+	    u32 seqno,
+	    u32 mmio_offset)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int id;
-
-	/*
-	 * cs -> 1 = vcs, 0 = bcs
-	 * vcs -> 1 = bcs, 0 = cs,
-	 * bcs -> 1 = cs, 0 = vcs.
-	 */
-	id = ring - dev_priv->ring;
-	id += 2 - i;
-	id %= 3;
-
-	intel_ring_emit(ring,
-			MI_SEMAPHORE_MBOX |
-			MI_SEMAPHORE_REGISTER |
-			MI_SEMAPHORE_UPDATE);
+	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
+			      MI_SEMAPHORE_GLOBAL_GTT |
+			      MI_SEMAPHORE_REGISTER |
+			      MI_SEMAPHORE_UPDATE);
 	intel_ring_emit(ring, seqno);
-	intel_ring_emit(ring,
-			RING_SYNC_0(dev_priv->ring[id].mmio_base) + 4*i);
+	intel_ring_emit(ring, mmio_offset);
 }
 
-static int
+/**
+ * gen6_add_request - Update the semaphore mailbox registers
+ * 
+ * @ring - ring that is adding a request
+ * @mbox1_reg - mailbox address for RCS or VCS ring
+ * @mbox2_reg - mailbox address for VCS or BCS ring
+ *
+ * Update the mailbox registers in the *other* rings with the current seqno.
+ * This acts like a signal in the canonical semaphore.
+ */
+static u32
 gen6_add_request(struct intel_ring_buffer *ring,
-		 u32 *result)
+		 u32 mbox1_reg,
+		 u32 mbox2_reg)
 {
 	u32 seqno;
 	int ret;
+	seqno = i915_gem_get_seqno(ring->dev);
 
 	ret = intel_ring_begin(ring, 10);
 	if (ret)
 		return ret;
 
-	seqno = i915_gem_get_seqno(ring->dev);
-	update_semaphore(ring, 0, seqno);
-	update_semaphore(ring, 1, seqno);
-
+	update_mboxes(ring, seqno, mbox1_reg);
+	update_mboxes(ring, seqno, mbox2_reg);
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 	intel_ring_emit(ring, seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_advance(ring);
 
-	*result = seqno;
+	return seqno;
+}
+
+static int
+gen6_blt_add_request(struct intel_ring_buffer *ring,
+		     u32 *result)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	*result = gen6_add_request(ring,
+				   dev_priv->ring[RCS].mmio_base + 0x44,
+				   dev_priv->ring[VCS].mmio_base + 0x40);
 	return 0;
 }
 
-int
-intel_ring_sync(struct intel_ring_buffer *ring,
-		struct intel_ring_buffer *to,
+static int
+gen6_bsd_add_request(struct intel_ring_buffer *ring,
+		     u32 *result)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	*result = gen6_add_request(ring,
+				   dev_priv->ring[RCS].mmio_base + 0x40,
+				   dev_priv->ring[BCS].mmio_base + 0x44);
+	return 0;
+}
+
+static int
+gen6_render_add_request(struct intel_ring_buffer *ring,
+		        u32 *result)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	*result = gen6_add_request(ring,
+				   dev_priv->ring[VCS].mmio_base + 0x44,
+				   dev_priv->ring[BCS].mmio_base + 0x40);
+	return 0;
+}
+
+static int
+intel_ring_sync(struct intel_ring_buffer *waiter,
+		struct intel_ring_buffer *signaller,
+		u32 semaphore_register,
 		u32 seqno)
 {
 	int ret;
+	u32 temp = MI_SEMAPHORE_MBOX |
+		   MI_SEMAPHORE_GLOBAL_GTT | /* Not needed */
+		   MI_SEMAPHORE_COMPARE;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(waiter, 4);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring,
-			MI_SEMAPHORE_MBOX |
-			MI_SEMAPHORE_REGISTER |
-			intel_ring_sync_index(ring, to) << 17 |
-			MI_SEMAPHORE_COMPARE);
-	intel_ring_emit(ring, seqno);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	temp |= MI_SEMAPHORE_REGISTER;
+
+	intel_ring_emit(waiter, temp | semaphore_register);
+	intel_ring_emit(waiter, seqno);
+	intel_ring_emit(waiter, 0);
+	intel_ring_emit(waiter, MI_NOOP);
+	intel_ring_advance(waiter);
 
 	return 0;
 }
 
+/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
+int
+render_ring_sync_to(struct intel_ring_buffer *waiter,
+		    struct intel_ring_buffer *signaller,
+		    u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       signaller->semaphore_register[RCS],
+			       seqno);
+}
+
+/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
+int
+gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
+		      struct intel_ring_buffer *signaller,
+		      u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       signaller->semaphore_register[VCS],
+			       seqno);
+}
+
+/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
+int
+gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
+		      struct intel_ring_buffer *signaller,
+		      u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       signaller->semaphore_register[BCS],
+			       seqno);
+}
+
+
+
 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
 do {									\
 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |		\
@@ -1027,6 +1103,10 @@  static const struct intel_ring_buffer render_ring = {
 	.irq_put		= render_ring_put_irq,
 	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
        .cleanup			= render_ring_cleanup,
+	.sync_to		= render_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
+				   MI_SEMAPHORE_SYNC_RV,
+				   MI_SEMAPHORE_SYNC_RB},
 };
 
 /* ring buffer for bit-stream decoder */
@@ -1149,11 +1229,15 @@  static const struct intel_ring_buffer gen6_bsd_ring = {
 	.init			= init_ring_common,
 	.write_tail		= gen6_bsd_ring_write_tail,
 	.flush			= gen6_ring_flush,
-	.add_request		= gen6_add_request,
+	.add_request		= gen6_bsd_add_request,
 	.get_seqno		= ring_get_seqno,
 	.irq_get		= gen6_bsd_ring_get_irq,
 	.irq_put		= gen6_bsd_ring_put_irq,
 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.sync_to		= gen6_bsd_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
+				   MI_SEMAPHORE_SYNC_INVALID,
+				   MI_SEMAPHORE_SYNC_VB},
 };
 
 /* Blitter support (SandyBridge+) */
@@ -1279,12 +1363,16 @@  static const struct intel_ring_buffer gen6_blt_ring = {
        .init			= blt_ring_init,
        .write_tail		= ring_write_tail,
        .flush			= blt_ring_flush,
-       .add_request		= gen6_add_request,
+       .add_request		= gen6_blt_add_request,
        .get_seqno		= ring_get_seqno,
        .irq_get			= blt_ring_get_irq,
        .irq_put			= blt_ring_put_irq,
        .dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
        .cleanup			= blt_ring_cleanup,
+       .sync_to			= gen6_blt_ring_sync_to,
+       .semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
+				   MI_SEMAPHORE_SYNC_BV,
+				   MI_SEMAPHORE_SYNC_INVALID},
 };
 
 int intel_init_render_ring_buffer(struct drm_device *dev)
@@ -1294,7 +1382,7 @@  int intel_init_render_ring_buffer(struct drm_device *dev)
 
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
-		ring->add_request = gen6_add_request;
+		ring->add_request = gen6_render_add_request;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {
@@ -1317,7 +1405,7 @@  int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
-		ring->add_request = gen6_add_request;
+		ring->add_request = gen6_render_add_request;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 39ac2b6..98052fd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -75,7 +75,11 @@  struct  intel_ring_buffer {
 	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
 					       u32 offset, u32 length);
 	void		(*cleanup)(struct intel_ring_buffer *ring);
+	int		(*sync_to)(struct intel_ring_buffer *ring,
+				   struct intel_ring_buffer *to,
+				   u32 seqno);
 
+	u32		semaphore_register[3];
 	/**
 	 * List of objects currently involved in rendering from the
 	 * ringbuffer.
@@ -180,9 +184,6 @@  static inline void intel_ring_emit(struct intel_ring_buffer *ring,
 void intel_ring_advance(struct intel_ring_buffer *ring);
 
 u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
-int intel_ring_sync(struct intel_ring_buffer *ring,
-		    struct intel_ring_buffer *to,
-		    u32 seqno);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);