diff mbox

[15/18] drm/i915: Nonblocking request submission

Message ID 20160830081812.4110-16-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Aug. 30, 2016, 8:18 a.m. UTC
Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++------
 drivers/gpu/drm/i915/i915_gem_request.c    | 14 +++++++++++++-
 2 files changed, 20 insertions(+), 7 deletions(-)

Comments

John Harrison Sept. 2, 2016, 3:49 p.m. UTC | #1
On 30/08/2016 09:18, Chris Wilson wrote:
> Now that we have fences in place to drive request submission, we can
> employ those to queue requests after their dependencies as opposed to
> stalling in the middle of an execbuf ioctl. (However, we still choose to
> spin before enabling the IRQ as that is faster - though contentious.)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++++++------
>   drivers/gpu/drm/i915/i915_gem_request.c    | 14 +++++++++++++-
>   2 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 1685f4aaa4c2..0c8e447ffdbb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1136,12 +1136,13 @@ eb_await_request(struct drm_i915_gem_request *to,
>   
>   	trace_i915_gem_ring_sync_to(to, from);
>   	if (!i915.semaphores) {
> -		ret = i915_wait_request(from,
> -					I915_WAIT_INTERRUPTIBLE |
> -					I915_WAIT_LOCKED,
> -					NULL, NO_WAITBOOST);
> -		if (ret)
> -			return ret;
> +		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
> +			ret = i915_sw_fence_await_dma_fence(&to->submit,
> +							    &from->fence,
> +							    GFP_KERNEL);
To finish the discussion here:

> > Why not use 'i915_sw_fence_await_sw_fence(from->submit)' as below?
> > Or conversely, why not use '_await_dma_fence(prev->fence)' below?
>
> On the same engine the requests are in execution order and so once the
> first is ready to submit so are its dependents. This extends naturally
> to timelines. Between engines, we have to wait until the request is
> complete before we can submit the second (note this applies to the
> !semaphore branch).

Doh. Yes, req->submit is signalled when the request is submitted but 
req->fence is signalled when the request completes. For some reason, I 
was thinking the two were actually signalled together.


> +			if (ret < 0)
> +				return ret;
> +		}
>   	} else {
>   		ret = to->engine->semaphore.sync_to(to, from);
>   		if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 89ed66275d95..5837660502cd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -352,7 +352,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   		       struct i915_gem_context *ctx)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
> -	struct drm_i915_gem_request *req;
> +	struct drm_i915_gem_request *req, *prev;
>   	u32 seqno;
>   	int ret;
>   
> @@ -448,6 +448,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   	 */
>   	req->head = req->ring->tail;
>   
> +	prev = i915_gem_active_peek(&engine->last_request,
> +				    &req->i915->drm.struct_mutex);
> +	if (prev) {
> +		ret = i915_sw_fence_await_sw_fence(&req->submit,
> +						   &prev->submit,
> +						   GFP_KERNEL);
> +		if (ret < 0) {
> +			i915_add_request(req);
Isn't this an old version of the patch. I thought you re-issued it with 
the ordering changed to avoid this add_request() on sync failure?


> +			return ERR_PTR(ret);
> +		}
> +	}
> +
>   	return req;
>   
>   err_ctx:
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1685f4aaa4c2..0c8e447ffdbb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1136,12 +1136,13 @@  eb_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 89ed66275d95..5837660502cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -352,7 +352,7 @@  i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct drm_i915_gem_request *req;
+	struct drm_i915_gem_request *req, *prev;
 	u32 seqno;
 	int ret;
 
@@ -448,6 +448,18 @@  i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	prev = i915_gem_active_peek(&engine->last_request,
+				    &req->i915->drm.struct_mutex);
+	if (prev) {
+		ret = i915_sw_fence_await_sw_fence(&req->submit,
+						   &prev->submit,
+						   GFP_KERNEL);
+		if (ret < 0) {
+			i915_add_request(req);
+			return ERR_PTR(ret);
+		}
+	}
+
 	return req;
 
 err_ctx: