diff mbox

drm/i915: Remove the impedance mismatch around intel_engine_enable_signaling

Message ID 20180308140732.25090-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 8, 2018, 2:07 p.m. UTC
There is some redundancy between dma_fence->ops->enable_signaling (via
i915_fence_enable_signaling) and our backend,
intel_engine_enable_signaling() in that both levels recheck the fence
status multiple times. If we convert intel_engine_enable_signaling() to
return the information desired by dma_fence->ops->enable_signaling, we
can reduce i915_fence_enable_signaling to a simple stub and avoid
trying to reinterpret the same information.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c      |  6 +-----
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
 3 files changed, 15 insertions(+), 14 deletions(-)

Comments

Tvrtko Ursulin March 9, 2018, 5:24 p.m. UTC | #1
On 08/03/2018 14:07, Chris Wilson wrote:
> There is some redundancy between dma_fence->ops->enable_signaling (via
> i915_fence_enable_signaling) and our backend,
> intel_engine_enable_signaling() in that both levels recheck the fence
> status multiple times. If we convert intel_engine_enable_signaling() to
> return the information desired by dma_fence->ops->enable_signaling, we
> can reduce i915_fence_enable_signaling to a simple stub and avoid
> trying to reinterpret the same information.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c      |  6 +-----
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++++++++++++--------
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
>   3 files changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index d437beac3969..2a841800d4cf 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -59,11 +59,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
>   
>   static bool i915_fence_enable_signaling(struct dma_fence *fence)
>   {
> -	if (i915_fence_signaled(fence))
> -		return false;

This was based on hws seqno check...

> -
> -	intel_engine_enable_signaling(to_request(fence), true);
> -	return !i915_fence_signaled(fence);
> +	return intel_engine_enable_signaling(to_request(fence), true);
>   }
>   
>   static signed long i915_fence_wait(struct dma_fence *fence,
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 1f79e7a47433..671a6d61e29d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -730,10 +730,11 @@ static void insert_signal(struct intel_breadcrumbs *b,
>   	list_add(&request->signaling.link, &iter->signaling.link);
>   }
>   
> -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
> +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
>   {
>   	struct intel_engine_cs *engine = request->engine;
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	struct intel_wait *wait = &request->signaling.wait;
>   	u32 seqno;
>   
>   	/*
> @@ -750,12 +751,12 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
>   
>   	seqno = i915_request_global_seqno(request);
>   	if (!seqno) /* will be enabled later upon execution */
> -		return;
> +		return true;
>   
> -	GEM_BUG_ON(request->signaling.wait.seqno);
> -	request->signaling.wait.tsk = b->signaler;
> -	request->signaling.wait.request = request;
> -	request->signaling.wait.seqno = seqno;
> +	GEM_BUG_ON(wait->seqno);
> +	wait->tsk = b->signaler;
> +	wait->request = request;
> +	wait->seqno = seqno;
>   
>   	/*
>   	 * Add ourselves into the list of waiters, but registering our
> @@ -768,11 +769,15 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
>   	 */
>   	spin_lock(&b->rb_lock);
>   	insert_signal(b, request, seqno);
> -	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
> +	wakeup &= __intel_engine_add_wait(engine, wait);
>   	spin_unlock(&b->rb_lock);
>   
> -	if (wakeup)
> +	if (wakeup) {
>   		wake_up_process(b->signaler);
> +		return !intel_wait_complete(wait);

... and would now be based on breadcrumb waiter waking up and removing 
itself from the tree.

So some potential latency where it wasn't before, well, enable-disable 
signalling cycles actually.

If I got it right.. breadcrumbs code confuses me massively these days.

Regards,

Tvrtko

> +	}
> +
> +	return true;
>   }
>   
>   void intel_engine_cancel_signaling(struct i915_request *request)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0320c2c4cfba..aa34b2ff04bc 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -939,7 +939,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
>   			   struct intel_wait *wait);
>   void intel_engine_remove_wait(struct intel_engine_cs *engine,
>   			      struct intel_wait *wait);
> -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
> +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
>   void intel_engine_cancel_signaling(struct i915_request *request);
>   
>   static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
>
Chris Wilson March 9, 2018, 5:33 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-03-09 17:24:33)
> 
> On 08/03/2018 14:07, Chris Wilson wrote:
> > There is some redundancy between dma_fence->ops->enable_signaling (via
> > i915_fence_enable_signaling) and our backend,
> > intel_engine_enable_signaling() in that both levels recheck the fence
> > status multiple times. If we convert intel_engine_enable_signaling() to
> > return the information desired by dma_fence->ops->enable_signaling, we
> > can reduce i915_fence_enable_signaling to a simple stub and avoid
> > trying to reinterpret the same information.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Cc: Michal Winiarski <michal.winiarski@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c      |  6 +-----
> >   drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++++++++++++--------
> >   drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
> >   3 files changed, 15 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index d437beac3969..2a841800d4cf 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -59,11 +59,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
> >   
> >   static bool i915_fence_enable_signaling(struct dma_fence *fence)
> >   {
> > -     if (i915_fence_signaled(fence))
> > -             return false;
> 
> This was based on hws seqno check...
> 
> > -
> > -     intel_engine_enable_signaling(to_request(fence), true);
> > -     return !i915_fence_signaled(fence);
> > +     return intel_engine_enable_signaling(to_request(fence), true);
> >   }

> > @@ -768,11 +769,15 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
> >        */
> >       spin_lock(&b->rb_lock);
> >       insert_signal(b, request, seqno);
> > -     wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
> > +     wakeup &= __intel_engine_add_wait(engine, wait);
> >       spin_unlock(&b->rb_lock);
> >   
> > -     if (wakeup)
> > +     if (wakeup) {
> >               wake_up_process(b->signaler);
> > +             return !intel_wait_complete(wait);
> 
> ... and would now be based on breadcrumb waiter waking up and removing 
> itself from the tree.

And don't forget the same HWS check before the waiter is inserted. So we
have the same guards as before, just inside yet another spinlock.

> So some potential latency where it wasn't before, well, enable-disable 
> signalling cycles actually.

The extra steps would be the insert_signal(). Fwiw, we could reorder the
insert_signal() but frankly this, dma_fence enable signaling of an
inflight request, is not a fast path. More commonly we will be enabling
signaling on the request as it is submitted (where wakeup is false and
we know that the HWS cannot be complete).
-Chris
Tvrtko Ursulin March 12, 2018, 11:33 a.m. UTC | #3
On 09/03/2018 17:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-03-09 17:24:33)
>>
>> On 08/03/2018 14:07, Chris Wilson wrote:
>>> There is some redundancy between dma_fence->ops->enable_signaling (via
>>> i915_fence_enable_signaling) and our backend,
>>> intel_engine_enable_signaling() in that both levels recheck the fence
>>> status multiple times. If we convert intel_engine_enable_signaling() to
>>> return the information desired by dma_fence->ops->enable_signaling, we
>>> can reduce i915_fence_enable_signaling to a simple stub and avoid
>>> trying to reinterpret the same information.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c      |  6 +-----
>>>    drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++++++++++++--------
>>>    drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
>>>    3 files changed, 15 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index d437beac3969..2a841800d4cf 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -59,11 +59,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
>>>    
>>>    static bool i915_fence_enable_signaling(struct dma_fence *fence)
>>>    {
>>> -     if (i915_fence_signaled(fence))
>>> -             return false;
>>
>> This was based on hws seqno check...
>>
>>> -
>>> -     intel_engine_enable_signaling(to_request(fence), true);
>>> -     return !i915_fence_signaled(fence);
>>> +     return intel_engine_enable_signaling(to_request(fence), true);
>>>    }
> 
>>> @@ -768,11 +769,15 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
>>>         */
>>>        spin_lock(&b->rb_lock);
>>>        insert_signal(b, request, seqno);
>>> -     wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
>>> +     wakeup &= __intel_engine_add_wait(engine, wait);
>>>        spin_unlock(&b->rb_lock);
>>>    
>>> -     if (wakeup)
>>> +     if (wakeup) {
>>>                wake_up_process(b->signaler);
>>> +             return !intel_wait_complete(wait);
>>
>> ... and would now be based on breadcrumb waiter waking up and removing
>> itself from the tree.
> 
> And don't forget the same HWS check before the waiter is inserted. So we
> have the same guards as before, just inside yet another spinlock.

True, did not think of this. So it may proceed a bit deeper in the call 
chain but there is no actual behaviour change as I described it.

>> So some potential latency where it wasn't before, well, enable-disable
>> signalling cycles actually.
> 
> The extra steps would be the insert_signal(). Fwiw, we could reorder the
> insert_signal() but frankly this, dma_fence enable signaling of an
> inflight request, is not a fast path. More commonly we will be enabling
> signaling on the request as it is submitted (where wakeup is false and
> we know that the HWS cannot be complete).

Yes, no complaints after realizing the above.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d437beac3969..2a841800d4cf 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -59,11 +59,7 @@  static bool i915_fence_signaled(struct dma_fence *fence)
 
 static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
-	if (i915_fence_signaled(fence))
-		return false;
-
-	intel_engine_enable_signaling(to_request(fence), true);
-	return !i915_fence_signaled(fence);
+	return intel_engine_enable_signaling(to_request(fence), true);
 }
 
 static signed long i915_fence_wait(struct dma_fence *fence,
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 1f79e7a47433..671a6d61e29d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -730,10 +730,11 @@  static void insert_signal(struct intel_breadcrumbs *b,
 	list_add(&request->signaling.link, &iter->signaling.link);
 }
 
-void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
+bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_wait *wait = &request->signaling.wait;
 	u32 seqno;
 
 	/*
@@ -750,12 +751,12 @@  void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 
 	seqno = i915_request_global_seqno(request);
 	if (!seqno) /* will be enabled later upon execution */
-		return;
+		return true;
 
-	GEM_BUG_ON(request->signaling.wait.seqno);
-	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.request = request;
-	request->signaling.wait.seqno = seqno;
+	GEM_BUG_ON(wait->seqno);
+	wait->tsk = b->signaler;
+	wait->request = request;
+	wait->seqno = seqno;
 
 	/*
 	 * Add ourselves into the list of waiters, but registering our
@@ -768,11 +769,15 @@  void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 	 */
 	spin_lock(&b->rb_lock);
 	insert_signal(b, request, seqno);
-	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
+	wakeup &= __intel_engine_add_wait(engine, wait);
 	spin_unlock(&b->rb_lock);
 
-	if (wakeup)
+	if (wakeup) {
 		wake_up_process(b->signaler);
+		return !intel_wait_complete(wait);
+	}
+
+	return true;
 }
 
 void intel_engine_cancel_signaling(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0320c2c4cfba..aa34b2ff04bc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -939,7 +939,7 @@  bool intel_engine_add_wait(struct intel_engine_cs *engine,
 			   struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
-void intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
+bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
 void intel_engine_cancel_signaling(struct i915_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)