diff mbox

[06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request

Message ID 20161007094635.28319-7-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 7, 2016, 9:45 a.m. UTC
We will need to wait on DMA completion (as signaled via struct fence)
before executing our i915_gem_request. Therefore we want to expose a
method for adding the await on the fence itself to the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
 2 files changed, 42 insertions(+)

Comments

Joonas Lahtinen Oct. 7, 2016, 9:56 a.m. UTC | #1
On pe, 2016-10-07 at 10:45 +0100, Chris Wilson wrote:
> +	if (!fence_is_array(fence)) {
> +		ret = i915_sw_fence_await_dma_fence(&req->submit,
> +						    fence, 10*HZ,
> +						    GFP_KERNEL);
> +		return ret < 0 ? ret : 0;
> +	}
> +

With the magic removed in two spots,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
Tvrtko Ursulin Oct. 7, 2016, 3:51 p.m. UTC | #2
On 07/10/2016 10:45, Chris Wilson wrote:
> We will need to wait on DMA completion (as signaled via struct fence)
> before executing our i915_gem_request. Therefore we want to expose a
> method for adding the await on the fence itself to the request.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
>   2 files changed, 42 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8832f8ec1583..e1f7a32d4080 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/prefetch.h>
> +#include <linux/fence-array.h>
>   
>   #include "i915_drv.h"
>   
> @@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>   	return 0;
>   }
>   
> +int
> +i915_gem_request_await_fence(struct drm_i915_gem_request *req,
> +			     struct fence *fence)
> +{
> +	struct fence_array *array;
> +	int ret;
> +	int i;
> +
> +	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +		return 0;
> +
> +	if (fence_is_i915(fence))
> +		return i915_gem_request_await_request(req, to_request(fence));
> +
> +	if (!fence_is_array(fence)) {
> +		ret = i915_sw_fence_await_dma_fence(&req->submit,
> +						    fence, 10*HZ,
> +						    GFP_KERNEL);
> +		return ret < 0 ? ret : 0;
> +	}
> +
> +	array = to_fence_array(fence);
> +	for (i = 0; i < array->num_fences; i++) {
> +		struct fence *child = array->fences[i];

1. What is the advantage in manually waiting on array elements rather 
than just the array? Is the existance of signal_on_any a problem?

2. Can child be another array?

Regards,

Tvrtko

> +
> +		if (fence_is_i915(child))
> +			ret = i915_gem_request_await_request(req,
> +							     to_request(child));
> +		else
> +			ret = i915_sw_fence_await_dma_fence(&req->submit,
> +							    child, 10*HZ,
> +							    GFP_KERNEL);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_gem_request_await_object - set this request to (async) wait upon a bo
>    *
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 974bd7bcc801..c85a3d82febf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -214,6 +214,8 @@ int
>   i915_gem_request_await_object(struct drm_i915_gem_request *to,
>   			      struct drm_i915_gem_object *obj,
>   			      bool write);
> +int i915_gem_request_await_fence(struct drm_i915_gem_request *req,
> +				 struct fence *fence);
>   
>   void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
>   #define i915_add_request(req) \
Chris Wilson Oct. 7, 2016, 4:12 p.m. UTC | #3
On Fri, Oct 07, 2016 at 04:51:14PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 10:45, Chris Wilson wrote:
> >We will need to wait on DMA completion (as signaled via struct fence)
> >before executing our i915_gem_request. Therefore we want to expose a
> >method for adding the await on the fence itself to the request.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
> >  2 files changed, 42 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> >index 8832f8ec1583..e1f7a32d4080 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.c
> >@@ -23,6 +23,7 @@
> >   */
> >  #include <linux/prefetch.h>
> >+#include <linux/fence-array.h>
> >  #include "i915_drv.h"
> >@@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
> >  	return 0;
> >  }
> >+int
> >+i915_gem_request_await_fence(struct drm_i915_gem_request *req,
> >+			     struct fence *fence)
> >+{
> >+	struct fence_array *array;
> >+	int ret;
> >+	int i;
> >+
> >+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> >+		return 0;
> >+
> >+	if (fence_is_i915(fence))
> >+		return i915_gem_request_await_request(req, to_request(fence));
> >+
> >+	if (!fence_is_array(fence)) {
> >+		ret = i915_sw_fence_await_dma_fence(&req->submit,
> >+						    fence, 10*HZ,
> >+						    GFP_KERNEL);
> >+		return ret < 0 ? ret : 0;
> >+	}
> >+
> >+	array = to_fence_array(fence);
> >+	for (i = 0; i < array->num_fences; i++) {
> >+		struct fence *child = array->fences[i];
> 
> 1. What is the advantage in manually waiting on array elements
> rather than just the array? Is the existance of signal_on_any a
> problem?

We can wait on native fences much more efficiently. In the typical case,
we don't even need the wait at all since the ordering is imposed by the
timeline. Or we can hook into the submission fence to avoid the
interrupt. Failing that we have to respond to the interrupt. Also note
that foriegn fences handle autosignaling differently to native fences,
as we cannot rely on third parties having a working hangcheck.
 
> 2. Can child be another array?

Yes, but we don't want to recurse (or at least need to bound the
recursion).
-Chris
Tvrtko Ursulin Oct. 7, 2016, 4:16 p.m. UTC | #4
On 07/10/2016 17:12, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 04:51:14PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 10:45, Chris Wilson wrote:
>>> We will need to wait on DMA completion (as signaled via struct fence)
>>> before executing our i915_gem_request. Therefore we want to expose a
>>> method for adding the await on the fence itself to the request.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
>>>   2 files changed, 42 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>>> index 8832f8ec1583..e1f7a32d4080 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
>>> @@ -23,6 +23,7 @@
>>>    */
>>>   #include <linux/prefetch.h>
>>> +#include <linux/fence-array.h>
>>>   #include "i915_drv.h"
>>> @@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>>>   	return 0;
>>>   }
>>> +int
>>> +i915_gem_request_await_fence(struct drm_i915_gem_request *req,
>>> +			     struct fence *fence)
>>> +{
>>> +	struct fence_array *array;
>>> +	int ret;
>>> +	int i;
>>> +
>>> +	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
>>> +		return 0;
>>> +
>>> +	if (fence_is_i915(fence))
>>> +		return i915_gem_request_await_request(req, to_request(fence));
>>> +
>>> +	if (!fence_is_array(fence)) {
>>> +		ret = i915_sw_fence_await_dma_fence(&req->submit,
>>> +						    fence, 10*HZ,
>>> +						    GFP_KERNEL);
>>> +		return ret < 0 ? ret : 0;
>>> +	}
>>> +
>>> +	array = to_fence_array(fence);
>>> +	for (i = 0; i < array->num_fences; i++) {
>>> +		struct fence *child = array->fences[i];
>> 1. What is the advantage in manually waiting on array elements
>> rather than just the array? Is the existance of signal_on_any a
>> problem?
> We can wait on native fences much more efficiently. In the typical case,
> we don't even need the wait at all since the ordering is imposed by the
> timeline. Or we can hook into the submission fence to avoid the
> interrupt. Failing that we have to respond to the interrupt. Also note
> that foriegn fences handle autosignaling differently to native fences,
> as we cannot rely on third parties having a working hangcheck.
>   
>> 2. Can child be another array?
> Yes, but we don't want to recurse (or at least need to bound the
> recursion).

In that case could the array have been created in the signal_on_any mode 
and how would we handle that?

Regards,

Tvrtko
Chris Wilson Oct. 7, 2016, 4:37 p.m. UTC | #5
On Fri, Oct 07, 2016 at 05:16:17PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 17:12, Chris Wilson wrote:
> >>2. Can child be another array?
> >Yes, but we don't want to recurse (or at least need to bound the
> >recursion).
> 
> In that case could the array have been created in the signal_on_any
> mode and how would we handle that?

Hmm. Not along our paths, I think. The only fence-array we have are from
sync-file which are signal-on-all (except... in the case of where it
wraps a single fence, that fence could be a composite). signal-on-any is
icky, to be complete we should not decompose it into its elements -
however, whether or not a fence-array is operating in signal_on_any mode
is not stored. So at the moment, the best I can do is offer a comment.

The only user of it at the moment is amdgpu waiting for the first of
many VM manager to become available, not something we'll see
immediately via sync-file.
-Chris
Tvrtko Ursulin Oct. 8, 2016, 8:23 a.m. UTC | #6
On 07/10/2016 17:37, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 05:16:17PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 17:12, Chris Wilson wrote:
>>>> 2. Can child be another array?
>>> Yes, but we don't want to recurse (or at least need to bound the
>>> recursion).
>> In that case could the array have been created in the signal_on_any
>> mode and how would we handle that?
> Hmm. Not along our paths, I think. The only fence-array we have are from
> sync-file which are signal-on-all (except... in the case of where it
> wraps a single fence, that fence could be a composite). signal-on-any is
> icky, to be complete we should not decompose it into its elements -
> however, whether or not a fence-array is operating in signal_on_any mode
> is not stored. So at the moment, the best I can do is offer a comment.

Yes signal-on-any sounds extremely weird. It mandates you decompose 3rd 
party fences in all cases, with recursion.

> The only user of it at the moment is amdgpu waiting for the first of
> many VM manager to become available, not something we'll see
> immediately via sync-file.

So userspace cannot engineer it to happen with some funky operations 
before giving us a fence?

Regards,

Tvrtko
Chris Wilson Oct. 8, 2016, 8:58 a.m. UTC | #7
On Sat, Oct 08, 2016 at 09:23:25AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 17:37, Chris Wilson wrote:
> >On Fri, Oct 07, 2016 at 05:16:17PM +0100, Tvrtko Ursulin wrote:
> >>On 07/10/2016 17:12, Chris Wilson wrote:
> >>>>2. Can child be another array?
> >>>Yes, but we don't want to recurse (or at least need to bound the
> >>>recursion).
> >>In that case could the array have been created in the signal_on_any
> >>mode and how would we handle that?
> >Hmm. Not along our paths, I think. The only fence-array we have are from
> >sync-file which are signal-on-all (except... in the case of where it
> >wraps a single fence, that fence could be a composite). signal-on-any is
> >icky, to be complete we should not decompose it into its elements -
> >however, whether or not a fence-array is operating in signal_on_any mode
> >is not stored. So at the moment, the best I can do is offer a comment.
> 
> Yes signal-on-any sounds extremely weird. It mandates you decompose
> 3rd party fences in all cases, with recursion.
> 
> >The only user of it at the moment is amdgpu waiting for the first of
> >many VM manager to become available, not something we'll see
> >immediately via sync-file.
> 
> So userspace cannot engineer it to happen with some funky operations
> before giving us a fence?

No. sync_file is a fence-array (in normal signal-on-all mode) or a
single fence (which via merging sync_files can not be another fence-array,
though the sync_file could be constructed pointing to a single fence-array
fence). If that is a signal-on-any fence, it is broken if the sync_file
is ever merged.... And reviewing the existing code, no one should be
creating sync_file from a signal-on-any fence-array. There is no ABI to
allow the user to create signal-on-any sync_file. I'm feeling safer that
there are quite a few issues to resolve before signal-on-any fences are
allowed to pollute the sync_file ABI.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8832f8ec1583..e1f7a32d4080 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -23,6 +23,7 @@ 
  */
 
 #include <linux/prefetch.h>
+#include <linux/fence-array.h>
 
 #include "i915_drv.h"
 
@@ -495,6 +496,45 @@  i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	return 0;
 }
 
+int
+i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+			     struct fence *fence)
+{
+	struct fence_array *array;
+	int ret;
+	int i;
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return 0;
+
+	if (fence_is_i915(fence))
+		return i915_gem_request_await_request(req, to_request(fence));
+
+	if (!fence_is_array(fence)) {
+		ret = i915_sw_fence_await_dma_fence(&req->submit,
+						    fence, 10*HZ,
+						    GFP_KERNEL);
+		return ret < 0 ? ret : 0;
+	}
+
+	array = to_fence_array(fence);
+	for (i = 0; i < array->num_fences; i++) {
+		struct fence *child = array->fences[i];
+
+		if (fence_is_i915(child))
+			ret = i915_gem_request_await_request(req,
+							     to_request(child));
+		else
+			ret = i915_sw_fence_await_dma_fence(&req->submit,
+							    child, 10*HZ,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_request_await_object - set this request to (async) wait upon a bo
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 974bd7bcc801..c85a3d82febf 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -214,6 +214,8 @@  int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write);
+int i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+				 struct fence *fence);
 
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \