diff mbox

[v5,23/35] drm/i915: Defer seqno allocation until actual hardware submission time

Message ID 1455805644-6450-24-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison Feb. 18, 2016, 2:27 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The seqno value is now only used for the final test for completion of
a request. It is no longer used to track the request through the
software stack. Thus it is no longer necessary to allocate the seqno
immediately with the request. Instead, it can be done lazily and left
until the request is actually sent to the hardware. This is particular
advantageous with a GPU scheduler as the requests can then be
re-ordered between their creation and their hardware submission
without having out of order seqnos.

v2: i915_add_request() can't fail!

Combine with 'drm/i915: Assign seqno at start of exec_final()'
Various bits of code during the execbuf code path need a seqno value
to be assigned to the request. This change makes this assignment
explicit at the start of submission_final() rather than relying on an
auto-generated seqno to have happened already. This is in preparation
for a future patch which changes seqno values to be assigned lazily
(during add_request).

v3: Updated to use locally cached request pointer.

v4: Changed some white space and comment formatting to keep the style
checker happy.

For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_gem.c            | 23 ++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c           | 14 ++++++++++++++
 4 files changed, 51 insertions(+), 1 deletion(-)

Comments

Joonas Lahtinen March 7, 2016, 12:16 p.m. UTC | #1
On to, 2016-02-18 at 14:27 +0000, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The seqno value is now only used for the final test for completion of
> a request. It is no longer used to track the request through the
> software stack. Thus it is no longer necessary to allocate the seqno
> immediately with the request. Instead, it can be done lazily and left
> until the request is actually sent to the hardware. This is particular
> advantageous with a GPU scheduler as the requests can then be
> re-ordered between their creation and their hardware submission
> without having out of order seqnos.
> 
> v2: i915_add_request() can't fail!
> 
> Combine with 'drm/i915: Assign seqno at start of exec_final()'
> Various bits of code during the execbuf code path need a seqno value
> to be assigned to the request. This change makes this assignment
> explicit at the start of submission_final() rather than relying on an
> auto-generated seqno to have happened already. This is in preparation
> for a future patch which changes seqno values to be assigned lazily
> (during add_request).
> 
> v3: Updated to use locally cached request pointer.
> 
> v4: Changed some white space and comment formatting to keep the style
> checker happy.
> 
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  1 +
>  drivers/gpu/drm/i915/i915_gem.c            | 23 ++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c           | 14 ++++++++++++++
>  4 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5eeeced..071a27b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2257,6 +2257,7 @@ struct drm_i915_gem_request {
>  	  * has finished processing this request.
>  	  */
>  	u32 seqno;
> +	u32 reserved_seqno;
>  
>  	/* Unique identifier which can be used for trace points & debug */
>  	uint32_t uniq;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a249e52..a2c136d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2616,6 +2616,11 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  
>  	/* reserve 0 for non-seqno */
>  	if (dev_priv->next_seqno == 0) {
> +		/*
> +		 * Why is the full re-initialisation required? Is it only for
> +		 * hardware semaphores? If so, could skip it in the case where
> +		 * semaphores are disabled?
> +		 */
>  		int ret = i915_gem_init_seqno(dev, 0);
>  		if (ret)
>  			return ret;
> @@ -2673,6 +2678,12 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>  		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
>  	}
>  
> +	/* Make the request's seqno 'live': */
> +	if (!request->seqno) {
> +		request->seqno = request->reserved_seqno;
> +		WARN_ON(request->seqno != dev_priv->last_seqno);
> +	}
> +
>  	/* Record the position of the start of the request so that
>  	 * should we detect the updated seqno part-way through the
>  	 * GPU processing the request, we never over-estimate the
> @@ -2930,6 +2941,9 @@ void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked)
>  
>  	list_for_each_entry_safe(req, req_next, &ring->fence_signal_list, signal_link) {
>  		if (!req->cancelled) {
> +			/* How can this happen? */
> +			WARN_ON(req->seqno == 0);
> +
>  			if (!i915_seqno_passed(seqno, req->seqno))
>  				break;
>  		}
> @@ -3079,7 +3093,14 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
>  	if (req == NULL)
>  		return -ENOMEM;
>  
> -	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
> +	/*
> +	 * Assign an identifier to track this request through the hardware
> +	 * but don't make it live yet. It could change in the future if this
> +	 * request gets overtaken. However, it still needs to be allocated
> +	 * in advance because the point of submission must not fail and seqno
> +	 * allocation can fail.
> +	 */
> +	ret = i915_gem_get_seqno(ring->dev, &req->reserved_seqno);
>  	if (ret)
>  		goto err;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index f45f4dc..b9ad0fd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1295,6 +1295,20 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
>  	/* The mutex must be acquired before calling this function */
>  	WARN_ON(!mutex_is_locked(¶ms->dev->struct_mutex));
>  
> +	/* Make sure the request's seqno is the latest and greatest: */
> +	if (req->reserved_seqno != dev_priv->last_seqno) {
> +		ret = i915_gem_get_seqno(ring->dev, &req->reserved_seqno);
> +		if (ret)
> +			return ret;
> +	}

Would this situation be caused by reordering? Kinda strange to
pregenerate and then re-generate if some request got prioritized over
us. I'd assume that to happen constantly with compositor running.

What exactly stops from generating the seqno always at submission time,
if we're able to re-generate it anyway?

Regards, Joonas

> +	/*
> +	 * And make it live because some of the execbuff submission code
> +	 * requires the seqno to be available up front.
> +	 */
> +	WARN_ON(req->seqno);
> +	req->seqno = req->reserved_seqno;
> +	WARN_ON(req->seqno != dev_priv->last_seqno);
> +
>  	ret = intel_ring_reserve_space(req);
>  	if (ret)
>  		goto error;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e056875..9c7a79a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -978,6 +978,20 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params)
>  	/* The mutex must be acquired before calling this function */
>  	WARN_ON(!mutex_is_locked(¶ms->dev->struct_mutex));
>  
> +	/* Make sure the request's seqno is the latest and greatest: */
> +	if (req->reserved_seqno != dev_priv->last_seqno) {
> +		ret = i915_gem_get_seqno(ring->dev, &req->reserved_seqno);
> +		if (ret)
> +			return ret;
> +	}
> +	/*
> +	 * And make it live because some of the execbuff submission code
> +	 * requires the seqno to be available up front.
> +	 */
> +	WARN_ON(req->seqno);
> +	req->seqno = req->reserved_seqno;
> +	WARN_ON(req->seqno != dev_priv->last_seqno);
> +
>  	ret = intel_logical_ring_reserve_space(req);
>  	if (ret)
>  		goto err;
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5eeeced..071a27b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2257,6 +2257,7 @@  struct drm_i915_gem_request {
 	  * has finished processing this request.
 	  */
 	u32 seqno;
+	u32 reserved_seqno;
 
 	/* Unique identifier which can be used for trace points & debug */
 	uint32_t uniq;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a249e52..a2c136d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2616,6 +2616,11 @@  i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
 	/* reserve 0 for non-seqno */
 	if (dev_priv->next_seqno == 0) {
+		/*
+		 * Why is the full re-initialisation required? Is it only for
+		 * hardware semaphores? If so, could skip it in the case where
+		 * semaphores are disabled?
+		 */
 		int ret = i915_gem_init_seqno(dev, 0);
 		if (ret)
 			return ret;
@@ -2673,6 +2678,12 @@  void __i915_add_request(struct drm_i915_gem_request *request,
 		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
 	}
 
+	/* Make the request's seqno 'live': */
+	if (!request->seqno) {
+		request->seqno = request->reserved_seqno;
+		WARN_ON(request->seqno != dev_priv->last_seqno);
+	}
+
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -2930,6 +2941,9 @@  void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked)
 
 	list_for_each_entry_safe(req, req_next, &ring->fence_signal_list, signal_link) {
 		if (!req->cancelled) {
+			/* How can this happen? */
+			WARN_ON(req->seqno == 0);
+
 			if (!i915_seqno_passed(seqno, req->seqno))
 				break;
 		}
@@ -3079,7 +3093,14 @@  int i915_gem_request_alloc(struct intel_engine_cs *ring,
 	if (req == NULL)
 		return -ENOMEM;
 
-	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
+	/*
+	 * Assign an identifier to track this request through the hardware
+	 * but don't make it live yet. It could change in the future if this
+	 * request gets overtaken. However, it still needs to be allocated
+	 * in advance because the point of submission must not fail and seqno
+	 * allocation can fail.
+	 */
+	ret = i915_gem_get_seqno(ring->dev, &req->reserved_seqno);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f45f4dc..b9ad0fd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1295,6 +1295,20 @@  int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
 	/* The mutex must be acquired before calling this function */
 	WARN_ON(!mutex_is_locked(&params->dev->struct_mutex));
 
+	/* Make sure the request's seqno is the latest and greatest: */
+	if (req->reserved_seqno != dev_priv->last_seqno) {
+		ret = i915_gem_get_seqno(ring->dev, &req->reserved_seqno);
+		if (ret)
+			return ret;
+	}
+	/*
+	 * And make it live because some of the execbuff submission code
+	 * requires the seqno to be available up front.
+	 */
+	WARN_ON(req->seqno);
+	req->seqno = req->reserved_seqno;
+	WARN_ON(req->seqno != dev_priv->last_seqno);
+
 	ret = intel_ring_reserve_space(req);
 	if (ret)
 		goto error;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e056875..9c7a79a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -978,6 +978,20 @@  int intel_execlists_submission_final(struct i915_execbuffer_params *params)
 	/* The mutex must be acquired before calling this function */
 	WARN_ON(!mutex_is_locked(&params->dev->struct_mutex));
 
+	/* Make sure the request's seqno is the latest and greatest: */
+	if (req->reserved_seqno != dev_priv->last_seqno) {
+		ret = i915_gem_get_seqno(ring->dev, &req->reserved_seqno);
+		if (ret)
+			return ret;
+	}
+	/*
+	 * And make it live because some of the execbuff submission code
+	 * requires the seqno to be available up front.
+	 */
+	WARN_ON(req->seqno);
+	req->seqno = req->reserved_seqno;
+	WARN_ON(req->seqno != dev_priv->last_seqno);
+
 	ret = intel_logical_ring_reserve_space(req);
 	if (ret)
 		goto err;