diff mbox

[03/32] drm/i915: Only spin whilst waiting on the current request

Message ID 1449833608-22125-4-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Dec. 11, 2015, 11:32 a.m. UTC
Limit busywaiting only to the request currently being processed by the
GPU. If the request is not currently being processed by the GPU, there
is a very low likelihood of it being completed within the 2 microsecond
spin timeout and so we will just be wasting CPU cycles.

v2: Check for logical inversion when rebasing - we were incorrectly
checking for this request being active, and instead busywaiting for
when the GPU was not yet processing the request of interest.

v3: Try another colour for the seqno names.
v4: Another colour for the function names.

v5: Remove the forced coherency when checking for the active request. On
reflection and plenty of recent experimentation, the issue is not a
cache coherency problem - but an irq/seqno ordering problem (timing issue).
Here, we do not need the w/a to force ordering of the read with an
interrupt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Cc: "Rantala, Valtteri" <valtteri.rantala@intel.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_drv.h | 27 +++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem.c |  8 +++++++-
 2 files changed, 26 insertions(+), 9 deletions(-)

Comments

Daniel Vetter Dec. 18, 2015, 4:12 p.m. UTC | #1
On Fri, Dec 11, 2015 at 11:32:59AM +0000, Chris Wilson wrote:
> Limit busywaiting only to the request currently being processed by the
> GPU. If the request is not currently being processed by the GPU, there
> is a very low likelihood of it being completed within the 2 microsecond
> spin timeout and so we will just be wasting CPU cycles.
> 
> v2: Check for logical inversion when rebasing - we were incorrectly
> checking for this request being active, and instead busywaiting for
> when the GPU was not yet processing the request of interest.
> 
> v3: Try another colour for the seqno names.
> v4: Another colour for the function names.
> 
> v5: Remove the forced coherency when checking for the active request. On
> reflection and plenty of recent experimentation, the issue is not a
> cache coherency problem - but an irq/seqno ordering problem (timing issue).
> Here, we do not need the w/a to force ordering of the read with an
> interrupt.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Eero Tamminen <eero.t.tamminen@intel.com>
> Cc: "Rantala, Valtteri" <valtteri.rantala@intel.com>
> Cc: stable@vger.kernel.org

Merged these 3 patches, thanks.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h | 27 +++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem.c |  8 +++++++-
>  2 files changed, 26 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5edd39352e97..8c4303b664d9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2182,8 +2182,17 @@ struct drm_i915_gem_request {
>  	struct drm_i915_private *i915;
>  	struct intel_engine_cs *ring;
>  
> -	/** GEM sequence number associated with this request. */
> -	uint32_t seqno;
> +	 /** GEM sequence number associated with the previous request,
> +	  * when the HWS breadcrumb is equal to this the GPU is processing
> +	  * this request.
> +	  */
> +	u32 previous_seqno;
> +
> +	 /** GEM sequence number associated with this request,
> +	  * when the HWS breadcrumb is equal or greater than this the GPU
> +	  * has finished processing this request.
> +	  */
> +	u32 seqno;
>  
>  	/** Position in the ringbuffer of the start of the request */
>  	u32 head;
> @@ -2958,15 +2967,17 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
>  	return (int32_t)(seq1 - seq2) >= 0;
>  }
>  
> +static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> +					   bool lazy_coherency)
> +{
> +	u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
> +	return i915_seqno_passed(seqno, req->previous_seqno);
> +}
> +
>  static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
>  					      bool lazy_coherency)
>  {
> -	u32 seqno;
> -
> -	BUG_ON(req == NULL);
> -
> -	seqno = req->ring->get_seqno(req->ring, lazy_coherency);
> -
> +	u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
>  	return i915_seqno_passed(seqno, req->seqno);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 46a84c447d8f..29d98ddbbc80 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1193,9 +1193,13 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
>  	 * takes to sleep on a request, on the order of a microsecond.
>  	 */
>  
> -	if (i915_gem_request_get_ring(req)->irq_refcount)
> +	if (req->ring->irq_refcount)
>  		return -EBUSY;
>  
> +	/* Only spin if we know the GPU is processing this request */
> +	if (!i915_gem_request_started(req, true))
> +		return -EAGAIN;
> +
>  	timeout = local_clock_us(&cpu) + 5;
>  	while (!need_resched()) {
>  		if (i915_gem_request_completed(req, true))
> @@ -1209,6 +1213,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
>  
>  		cpu_relax_lowlatency();
>  	}
> +
>  	if (i915_gem_request_completed(req, false))
>  		return 0;
>  
> @@ -2600,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>  	request->batch_obj = obj;
>  
>  	request->emitted_jiffies = jiffies;
> +	request->previous_seqno = ring->last_submitted_seqno;
>  	ring->last_submitted_seqno = request->seqno;
>  	list_add_tail(&request->list, &ring->request_list);
>  
> -- 
> 2.6.3
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5edd39352e97..8c4303b664d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2182,8 +2182,17 @@  struct drm_i915_gem_request {
 	struct drm_i915_private *i915;
 	struct intel_engine_cs *ring;
 
-	/** GEM sequence number associated with this request. */
-	uint32_t seqno;
+	 /** GEM sequence number associated with the previous request,
+	  * when the HWS breadcrumb is equal to this the GPU is processing
+	  * this request.
+	  */
+	u32 previous_seqno;
+
+	 /** GEM sequence number associated with this request,
+	  * when the HWS breadcrumb is equal or greater than this the GPU
+	  * has finished processing this request.
+	  */
+	u32 seqno;
 
 	/** Position in the ringbuffer of the start of the request */
 	u32 head;
@@ -2958,15 +2967,17 @@  i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
+					   bool lazy_coherency)
+{
+	u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
+	return i915_seqno_passed(seqno, req->previous_seqno);
+}
+
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
 					      bool lazy_coherency)
 {
-	u32 seqno;
-
-	BUG_ON(req == NULL);
-
-	seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-
+	u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
 	return i915_seqno_passed(seqno, req->seqno);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 46a84c447d8f..29d98ddbbc80 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1193,9 +1193,13 @@  static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	if (i915_gem_request_get_ring(req)->irq_refcount)
+	if (req->ring->irq_refcount)
 		return -EBUSY;
 
+	/* Only spin if we know the GPU is processing this request */
+	if (!i915_gem_request_started(req, true))
+		return -EAGAIN;
+
 	timeout = local_clock_us(&cpu) + 5;
 	while (!need_resched()) {
 		if (i915_gem_request_completed(req, true))
@@ -1209,6 +1213,7 @@  static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
 
 		cpu_relax_lowlatency();
 	}
+
 	if (i915_gem_request_completed(req, false))
 		return 0;
 
@@ -2600,6 +2605,7 @@  void __i915_add_request(struct drm_i915_gem_request *request,
 	request->batch_obj = obj;
 
 	request->emitted_jiffies = jiffies;
+	request->previous_seqno = ring->last_submitted_seqno;
 	ring->last_submitted_seqno = request->seqno;
 	list_add_tail(&request->list, &ring->request_list);