diff mbox

[1/3] drm/i915: Only spin whilst waiting on the current request

Message ID 1447840568-20167-2-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Nov. 18, 2015, 9:56 a.m. UTC
Limit busywaiting only to the request currently being processed by the
GPU. If the request is not currently being processed by the GPU, there
is a very low likelihood of it being completed within the 2 microsecond
spin timeout and so we will just be wasting CPU cycles.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h | 2 +-
 drivers/gpu/drm/i915/i915_gem.c | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

Comments

Daniel Vetter Nov. 18, 2015, 5:03 p.m. UTC | #1
On Wed, Nov 18, 2015 at 09:56:06AM +0000, Chris Wilson wrote:
> Limit busywaiting only to the request currently being processed by the
> GPU. If the request is not currently being processed by the GPU, there
> is a very low likelihood of it being completed within the 2 microsecond
> spin timeout and so we will just be wasting CPU cycles.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/i915_drv.h | 2 +-
>  drivers/gpu/drm/i915/i915_gem.c | 8 +++++++-
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8afda459a26e..16095b95d2df 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2190,7 +2190,7 @@ struct drm_i915_gem_request {
>  	struct intel_engine_cs *ring;
>  
>  	/** GEM sequence number associated with this request. */
> -	uint32_t seqno;
> +	uint32_t seqno, spin_seqno;
>  
>  	/** Position in the ringbuffer of the start of the request */
>  	u32 head;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 414150a0b8d5..af9ffa11ef44 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1193,9 +1193,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
>  	 * takes to sleep on a request, on the order of a microsecond.
>  	 */
>  
> -	if (i915_gem_request_get_ring(req)->irq_refcount)
> +	if (req->ring->irq_refcount)
>  		return -EBUSY;
>  
> +	/* Only spin if we know the GPU is processing this request */
> +	if (i915_seqno_passed(req->ring->get_seqno(req->ring, false),
> +			      req->spin_seqno))
> +		return -EAGAIN;
> +
>  	timeout = local_clock_us(&cpu) + 2;
>  	while (!need_resched()) {
>  		if (i915_gem_request_completed(req, true))
> @@ -2592,6 +2597,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>  	request->batch_obj = obj;
>  
>  	request->emitted_jiffies = jiffies;
> +	request->spin_seqno = ring->last_submitted_seqno;
>  	ring->last_submitted_seqno = request->seqno;
>  	list_add_tail(&request->list, &ring->request_list);
>  
> -- 
> 2.6.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin Nov. 19, 2015, 10:05 a.m. UTC | #2
Hi,

On 18/11/15 09:56, Chris Wilson wrote:
> Limit busywaiting only to the request currently being processed by the
> GPU. If the request is not currently being processed by the GPU, there
> is a very low likelihood of it being completed within the 2 microsecond
> spin timeout and so we will just be wasting CPU cycles.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h | 2 +-
>   drivers/gpu/drm/i915/i915_gem.c | 8 +++++++-
>   2 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8afda459a26e..16095b95d2df 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2190,7 +2190,7 @@ struct drm_i915_gem_request {
>   	struct intel_engine_cs *ring;
>
>   	/** GEM sequence number associated with this request. */
> -	uint32_t seqno;
> +	uint32_t seqno, spin_seqno;

Comment needs splitting out.

And spin_seqno is not the best name, I think previous_ring_seqno would 
be better. So it would immediately tell you what it is, and then at the 
place which uses it it would also be clearer what is the criteria for 
spinning.

>   	/** Position in the ringbuffer of the start of the request */
>   	u32 head;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 414150a0b8d5..af9ffa11ef44 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1193,9 +1193,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
>   	 * takes to sleep on a request, on the order of a microsecond.
>   	 */
>
> -	if (i915_gem_request_get_ring(req)->irq_refcount)
> +	if (req->ring->irq_refcount)
>   		return -EBUSY;
>
> +	/* Only spin if we know the GPU is processing this request */
> +	if (i915_seqno_passed(req->ring->get_seqno(req->ring, false),
> +			      req->spin_seqno))
> +		return -EAGAIN;
> +
>   	timeout = local_clock_us(&cpu) + 2;
>   	while (!need_resched()) {
>   		if (i915_gem_request_completed(req, true))
> @@ -2592,6 +2597,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>   	request->batch_obj = obj;
>
>   	request->emitted_jiffies = jiffies;
> +	request->spin_seqno = ring->last_submitted_seqno;
>   	ring->last_submitted_seqno = request->seqno;
>   	list_add_tail(&request->list, &ring->request_list);

Commit message says it will spin only on the request currently being 
processed by the GPU but from here it looks like it will spin for any 
request _queued up_ before the last?

For example if we have submitted 1, 2, 3 and 4 and GPU is currently 
processing 1.

2 has spin_seqno 1.
3 has spin_seqno 2.
4 has spin_seqno 3.

ring->get_seqno is 0.

Wait on 1: seqno_passed(0, 0) = true -> wait
Wait on 2: seqno_passed(0, 1) = false -> spin
Wait on 3: seqno_passed(0, 2) = false -> spin
Wait on 4: seqno_passed(0, 3) = false -> spin

So it looks the opposite.

Or is it too early for me? :)

Regards,

Tvrtko
Chris Wilson Nov. 19, 2015, 10:12 a.m. UTC | #3
On Thu, Nov 19, 2015 at 10:05:39AM +0000, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 18/11/15 09:56, Chris Wilson wrote:
> >Limit busywaiting only to the request currently being processed by the
> >GPU. If the request is not currently being processed by the GPU, there
> >is a very low likelihood of it being completed within the 2 microsecond
> >spin timeout and so we will just be wasting CPU cycles.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h | 2 +-
> >  drivers/gpu/drm/i915/i915_gem.c | 8 +++++++-
> >  2 files changed, 8 insertions(+), 2 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 8afda459a26e..16095b95d2df 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2190,7 +2190,7 @@ struct drm_i915_gem_request {
> >  	struct intel_engine_cs *ring;
> >
> >  	/** GEM sequence number associated with this request. */
> >-	uint32_t seqno;
> >+	uint32_t seqno, spin_seqno;
> 
> Comment needs splitting out.

Is it not the sequence associated with the request? The start of request
activity, end of active marker?
 
> And spin_seqno is not the best name, I think previous_ring_seqno
> would be better. So it would immediately tell you what it is, and
> then at the place which uses it it would also be clearer what is the
> criteria for spinning.

I agree, calling it spin_seqno was a mistake. I didn't like last_seqno
either. Though now I think,

u32 seqno_active, seqno_complete;

and a

i915_gem_request_active() helper to match i915_gem_request_completed().

> Commit message says it will spin only on the request currently being
> processed by the GPU but from here it looks like it will spin for
> any request _queued up_ before the last?

Nope, my mistake. Thanks,
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8afda459a26e..16095b95d2df 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2190,7 +2190,7 @@  struct drm_i915_gem_request {
 	struct intel_engine_cs *ring;
 
 	/** GEM sequence number associated with this request. */
-	uint32_t seqno;
+	uint32_t seqno, spin_seqno;
 
 	/** Position in the ringbuffer of the start of the request */
 	u32 head;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 414150a0b8d5..af9ffa11ef44 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1193,9 +1193,14 @@  static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	if (i915_gem_request_get_ring(req)->irq_refcount)
+	if (req->ring->irq_refcount)
 		return -EBUSY;
 
+	/* Only spin if we know the GPU is processing this request */
+	if (i915_seqno_passed(req->ring->get_seqno(req->ring, false),
+			      req->spin_seqno))
+		return -EAGAIN;
+
 	timeout = local_clock_us(&cpu) + 2;
 	while (!need_resched()) {
 		if (i915_gem_request_completed(req, true))
@@ -2592,6 +2597,7 @@  void __i915_add_request(struct drm_i915_gem_request *request,
 	request->batch_obj = obj;
 
 	request->emitted_jiffies = jiffies;
+	request->spin_seqno = ring->last_submitted_seqno;
 	ring->last_submitted_seqno = request->seqno;
 	list_add_tail(&request->list, &ring->request_list);