diff mbox

[v6,14/25] drm/i915: Manually unwind after a failed request allocation

Message ID 1461701180-895-15-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 26, 2016, 8:06 p.m. UTC
In the next patches, we want to move the work out of freeing the request
and into its retirement (so that we can free the request without
requiring the struct_mutex). This means that we cannot rely on
unreferencing the request to completely teardown the request any more
and so we need to manually unwind the failed allocation. In doing so, we
reorder the allocation in order to make the unwind simple (and ensure
that we don't try to unwind a partial request that may have modified
global state) and so we end up pushing the initial preallocation down
into the engine request initialisation functions where we have the
requisite control over the state of the request.

Moving the initial preallocation into the engine is less than ideal: it
moves logic to handle a specific problem with request handling out of
the common code. On the other hand, it does allow those backends
significantly more flexibility in performing its allocations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         | 28 +++++++++-------------------
 drivers/gpu/drm/i915/intel_lrc.c        | 16 ++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
 3 files changed, 24 insertions(+), 22 deletions(-)

Comments

Joonas Lahtinen April 27, 2016, 10:48 a.m. UTC | #1
On ti, 2016-04-26 at 21:06 +0100, Chris Wilson wrote:
> In the next patches, we want to move the work out of freeing the request
> and into its retirement (so that we can free the request without
> requiring the struct_mutex). This means that we cannot rely on
> unreferencing the request to completely teardown the request any more
> and so we need to manually unwind the failed allocation. In doing so, we
> reorder the allocation in order to make the unwind simple (and ensure
> that we don't try to unwind a partial request that may have modified
> global state) and so we end up pushing the initial preallocation down
> into the engine request initialisation functions where we have the
> requisite control over the state of the request.
> 
> Moving the initial preallocation into the engine is less than ideal: it
> moves logic to handle a specific problem with request handling out of
> the common code. On the other hand, it does allow those backends
> significantly more flexibility in performing its allocations.
> 

Adding John as CC,

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         | 28 +++++++++-------------------
>  drivers/gpu/drm/i915/intel_lrc.c        | 16 ++++++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
>  3 files changed, 24 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0e27484bd28a..d7ff5e79182f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2766,15 +2766,6 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	req->ctx  = ctx;
>  	i915_gem_context_reference(req->ctx);
>  
> -	if (i915.enable_execlists)
> -		ret = intel_logical_ring_alloc_request_extras(req);
> -	else
> -		ret = intel_ring_alloc_request_extras(req);
> -	if (ret) {
> -		i915_gem_context_unreference(req->ctx);
> -		goto err;
> -	}
> -
>  	/*
>  	 * Reserve space in the ring buffer for all the commands required to
>  	 * eventually emit this request. This is to guarantee that the
> @@ -2783,20 +2774,19 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	 * away, e.g. because a GPU scheduler has deferred it.
>  	 */
>  	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
> -	ret = intel_ring_begin(req, 0);
> -	if (ret) {
> -		/*
> -		 * At this point, the request is fully allocated even if not
> -		 * fully prepared. Thus it can be cleaned up using the proper
> -		 * free code.
> -		 */
> -		i915_gem_request_unreference(req);
> -		return ret;
> -	}
> +
> +	if (i915.enable_execlists)
> +		ret = intel_logical_ring_alloc_request_extras(req);
> +	else
> +		ret = intel_ring_alloc_request_extras(req);
> +	if (ret)
> +		goto err_ctx;
>  
>  	*req_out = req;
>  	return 0;
>  
> +err_ctx:
> +	i915_gem_context_unreference(ctx);
>  err:
>  	kmem_cache_free(dev_priv->requests, req);
>  	return ret;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 910044cf143e..01517dd7069b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -698,7 +698,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>  
>  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>  {
> -	int ret = 0;
> +	int ret;
>  
>  	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
>  
> @@ -715,9 +715,21 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>  			return ret;
>  	}
>  
> -	if (request->ctx != request->i915->kernel_context)
> +	if (request->ctx != request->i915->kernel_context) {
>  		ret = intel_lr_context_pin(request->ctx, request->engine);
> +		if (ret)
> +			return ret;
> +	}
>  
> +	ret = intel_ring_begin(request, 0);
> +	if (ret)
> +		goto err_unpin;
> +
> +	return 0;
> +
> +err_unpin:
> +	if (request->ctx != request->i915->kernel_context)
> +		intel_lr_context_unpin(request->ctx, request->engine);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ba5946b9fa06..1193372f74fd 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2333,7 +2333,7 @@ int intel_engine_idle(struct intel_engine_cs *engine)
>  int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>  {
>  	request->ringbuf = request->engine->buffer;
> -	return 0;
> +	return intel_ring_begin(request, 0);
>  }

The names of these two _extras functions with the added functionality
do not make sense.

Regards, Joonas

>  
>  static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
Chris Wilson April 27, 2016, 10:59 a.m. UTC | #2
On Wed, Apr 27, 2016 at 01:48:01PM +0300, Joonas Lahtinen wrote:
> The names of these two _extras functions with the added functionality
> do not make sense.

They never did. The original intent here was to call engine->pin_request
that would acquire the context and prepare the ringbuf for writing into it.

Eventually they will do so again.
-Chris
Tvrtko Ursulin April 27, 2016, 12:51 p.m. UTC | #3
On 26/04/16 21:06, Chris Wilson wrote:
> In the next patches, we want to move the work out of freeing the request
> and into its retirement (so that we can free the request without
> requiring the struct_mutex). This means that we cannot rely on
> unreferencing the request to completely teardown the request any more
> and so we need to manually unwind the failed allocation. In doing so, we
> reorder the allocation in order to make the unwind simple (and ensure
> that we don't try to unwind a partial request that may have modified
> global state) and so we end up pushing the initial preallocation down
> into the engine request initialisation functions where we have the
> requisite control over the state of the request.
>
> Moving the initial preallocation into the engine is less than ideal: it
> moves logic to handle a specific problem with request handling out of
> the common code. On the other hand, it does allow those backends
> significantly more flexibility in performing its allocations.

Could add _free_request_extras which would only be allowed to be called 
from _alloc_request? That would enable not-polluting the engine with 
common code I think.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c         | 28 +++++++++-------------------
>   drivers/gpu/drm/i915/intel_lrc.c        | 16 ++++++++++++++--
>   drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
>   3 files changed, 24 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0e27484bd28a..d7ff5e79182f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2766,15 +2766,6 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>   	req->ctx  = ctx;
>   	i915_gem_context_reference(req->ctx);
>
> -	if (i915.enable_execlists)
> -		ret = intel_logical_ring_alloc_request_extras(req);
> -	else
> -		ret = intel_ring_alloc_request_extras(req);
> -	if (ret) {
> -		i915_gem_context_unreference(req->ctx);
> -		goto err;
> -	}
> -
>   	/*
>   	 * Reserve space in the ring buffer for all the commands required to
>   	 * eventually emit this request. This is to guarantee that the
> @@ -2783,20 +2774,19 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>   	 * away, e.g. because a GPU scheduler has deferred it.
>   	 */
>   	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
> -	ret = intel_ring_begin(req, 0);
> -	if (ret) {
> -		/*
> -		 * At this point, the request is fully allocated even if not
> -		 * fully prepared. Thus it can be cleaned up using the proper
> -		 * free code.
> -		 */
> -		i915_gem_request_unreference(req);
> -		return ret;
> -	}
> +
> +	if (i915.enable_execlists)
> +		ret = intel_logical_ring_alloc_request_extras(req);
> +	else
> +		ret = intel_ring_alloc_request_extras(req);
> +	if (ret)
> +		goto err_ctx;
>
>   	*req_out = req;
>   	return 0;
>
> +err_ctx:
> +	i915_gem_context_unreference(ctx);
>   err:
>   	kmem_cache_free(dev_priv->requests, req);
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 910044cf143e..01517dd7069b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -698,7 +698,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>
>   int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>   {
> -	int ret = 0;
> +	int ret;
>
>   	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
>
> @@ -715,9 +715,21 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   			return ret;
>   	}
>
> -	if (request->ctx != request->i915->kernel_context)
> +	if (request->ctx != request->i915->kernel_context) {
>   		ret = intel_lr_context_pin(request->ctx, request->engine);
> +		if (ret)
> +			return ret;
> +	}
>
> +	ret = intel_ring_begin(request, 0);
> +	if (ret)
> +		goto err_unpin;
> +
> +	return 0;
> +
> +err_unpin:
> +	if (request->ctx != request->i915->kernel_context)
> +		intel_lr_context_unpin(request->ctx, request->engine);
>   	return ret;
>   }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ba5946b9fa06..1193372f74fd 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2333,7 +2333,7 @@ int intel_engine_idle(struct intel_engine_cs *engine)
>   int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>   {
>   	request->ringbuf = request->engine->buffer;
> -	return 0;
> +	return intel_ring_begin(request, 0);
>   }
>
>   static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>
Chris Wilson April 27, 2016, 12:58 p.m. UTC | #4
On Wed, Apr 27, 2016 at 01:51:38PM +0100, Tvrtko Ursulin wrote:
> 
> On 26/04/16 21:06, Chris Wilson wrote:
> >In the next patches, we want to move the work out of freeing the request
> >and into its retirement (so that we can free the request without
> >requiring the struct_mutex). This means that we cannot rely on
> >unreferencing the request to completely teardown the request any more
> >and so we need to manually unwind the failed allocation. In doing so, we
> >reorder the allocation in order to make the unwind simple (and ensure
> >that we don't try to unwind a partial request that may have modified
> >global state) and so we end up pushing the initial preallocation down
> >into the engine request initialisation functions where we have the
> >requisite control over the state of the request.
> >
> >Moving the initial preallocation into the engine is less than ideal: it
> >moves logic to handle a specific problem with request handling out of
> >the common code. On the other hand, it does allow those backends
> >significantly more flexibility in performing its allocations.
> 
> Could add _free_request_extras which would only be allowed to be
> called from _alloc_request? That would enable not-polluting the
> engine with common code I think.

If you look at where I think it should be placed inside lrc, then we
need multiple phases. Not that isn't much of a big deal:

request_alloc:

	engine->pin_request()

	/* prep */

	engine->init_context()

are more or less what we need, it will take a bit of organisation to
align legacy / execlists. But it can be done.
-Chris
Tvrtko Ursulin April 27, 2016, 1:03 p.m. UTC | #5
On 27/04/16 13:58, Chris Wilson wrote:
> On Wed, Apr 27, 2016 at 01:51:38PM +0100, Tvrtko Ursulin wrote:
>>
>> On 26/04/16 21:06, Chris Wilson wrote:
>>> In the next patches, we want to move the work out of freeing the request
>>> and into its retirement (so that we can free the request without
>>> requiring the struct_mutex). This means that we cannot rely on
>>> unreferencing the request to completely teardown the request any more
>>> and so we need to manually unwind the failed allocation. In doing so, we
>>> reorder the allocation in order to make the unwind simple (and ensure
>>> that we don't try to unwind a partial request that may have modified
>>> global state) and so we end up pushing the initial preallocation down
>>> into the engine request initialisation functions where we have the
>>> requisite control over the state of the request.
>>>
>>> Moving the initial preallocation into the engine is less than ideal: it
>>> moves logic to handle a specific problem with request handling out of
>>> the common code. On the other hand, it does allow those backends
>>> significantly more flexibility in performing its allocations.
>>
>> Could add _free_request_extras which would only be allowed to be
>> called from _alloc_request? That would enable not-polluting the
>> engine with common code I think.
>
> If you look at where I think it should be placed inside lrc, then we
> need multiple phases. Not that isn't much of a big deal:
>
> request_alloc:
>
> 	engine->pin_request()
>
> 	/* prep */
>
> 	engine->init_context()
>
> are more or less what we need, it will take a bit of organisation to
> align legacy / execlists. But it can be done.

Forgot to say, patch looks correct to me as it is. So r-b from that 
point of view anyway. Because in my mind solving the big performance 
sloppiness the series fixes is much more important than one (more) 
slight temporary design inelegance.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0e27484bd28a..d7ff5e79182f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2766,15 +2766,6 @@  __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->ctx  = ctx;
 	i915_gem_context_reference(req->ctx);
 
-	if (i915.enable_execlists)
-		ret = intel_logical_ring_alloc_request_extras(req);
-	else
-		ret = intel_ring_alloc_request_extras(req);
-	if (ret) {
-		i915_gem_context_unreference(req->ctx);
-		goto err;
-	}
-
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -2783,20 +2774,19 @@  __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * away, e.g. because a GPU scheduler has deferred it.
 	 */
 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	ret = intel_ring_begin(req, 0);
-	if (ret) {
-		/*
-		 * At this point, the request is fully allocated even if not
-		 * fully prepared. Thus it can be cleaned up using the proper
-		 * free code.
-		 */
-		i915_gem_request_unreference(req);
-		return ret;
-	}
+
+	if (i915.enable_execlists)
+		ret = intel_logical_ring_alloc_request_extras(req);
+	else
+		ret = intel_ring_alloc_request_extras(req);
+	if (ret)
+		goto err_ctx;
 
 	*req_out = req;
 	return 0;
 
+err_ctx:
+	i915_gem_context_unreference(ctx);
 err:
 	kmem_cache_free(dev_priv->requests, req);
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 910044cf143e..01517dd7069b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -698,7 +698,7 @@  static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
-	int ret = 0;
+	int ret;
 
 	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
 
@@ -715,9 +715,21 @@  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 			return ret;
 	}
 
-	if (request->ctx != request->i915->kernel_context)
+	if (request->ctx != request->i915->kernel_context) {
 		ret = intel_lr_context_pin(request->ctx, request->engine);
+		if (ret)
+			return ret;
+	}
 
+	ret = intel_ring_begin(request, 0);
+	if (ret)
+		goto err_unpin;
+
+	return 0;
+
+err_unpin:
+	if (request->ctx != request->i915->kernel_context)
+		intel_lr_context_unpin(request->ctx, request->engine);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ba5946b9fa06..1193372f74fd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2333,7 +2333,7 @@  int intel_engine_idle(struct intel_engine_cs *engine)
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
 	request->ringbuf = request->engine->buffer;
-	return 0;
+	return intel_ring_begin(request, 0);
 }
 
 static int wait_for_space(struct drm_i915_gem_request *req, int bytes)