diff mbox series

[23/26] drm/i915: Make request conflict tracking understand parallel submits

Message ID 20211004220637.14746-24-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Parallel submission aka multi-bb execbuf | expand

Commit Message

Matthew Brost Oct. 4, 2021, 10:06 p.m. UTC
If an object in the excl or shared slot is a composite fence from a
parallel submit and the current request in the conflict tracking is from
the same parallel context there is no need to enforce ordering as the
ordering already implicit. Make the request conflict tracking understand
this by comparing the parents parallel fence values and skipping the
conflict insertion if the values match.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++++++++++----------
 1 file changed, 29 insertions(+), 14 deletions(-)

Comments

John Harrison Oct. 12, 2021, 10:08 p.m. UTC | #1
On 10/4/2021 15:06, Matthew Brost wrote:
> If an object in the excl or shared slot is a composite fence from a
> parallel submit and the current request in the conflict tracking is from
> the same parallel context there is no need to enforce ordering as the
> ordering already implicit. Make the request conflict tracking understand
ordering already -> ordering is already

> this by comparing the parents parallel fence values and skipping the
parents -> parent's

> conflict insertion if the values match.
Presumably, this is to cope with the fact that the parallel submit 
fences do not look like regular submission fences. And hence the 
existing code that says 'new fence belongs to same context as old fence, 
so safe to ignore' does not work with parallel submission. However, this 
change does not appear to be adding parallel submit support to an 
existing 'same context' check. It seems to be a brand new check that 
does not exist for single submission. What makes parallel submit 
different? If we aren't skipping same context fences for single submits, 
why do we need it for parallel? Conversely, if we need it for parallel 
then why don't we need it for single?

And if the single submission version is simply somewhere else in the 
code, why do the parallel version here instead of at the same place?

John.

>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++++++++++----------
>   1 file changed, 29 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e9bfa32f9270..cf89624020ad 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
>   	return err;
>   }
>   
> +static inline bool is_parallel_rq(struct i915_request *rq)
> +{
> +	return intel_context_is_parallel(rq->context);
> +}
> +
> +static inline struct intel_context *request_to_parent(struct i915_request *rq)
> +{
> +	return intel_context_to_parent(rq->context);
> +}
> +
> +static bool is_same_parallel_context(struct i915_request *to,
> +				     struct i915_request *from)
> +{
> +	if (is_parallel_rq(to))
Should this not say '&& is_parallel_rq(from)'?

> +		return request_to_parent(to) == request_to_parent(from);
> +
> +	return false;
> +}
> +
>   int
>   i915_request_await_execution(struct i915_request *rq,
>   			     struct dma_fence *fence)
> @@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
>   		 * want to run our callback in all cases.
>   		 */
>   
> -		if (dma_fence_is_i915(fence))
> +		if (dma_fence_is_i915(fence)) {
> +			if (is_same_parallel_context(rq, to_request(fence)))
> +				continue;
>   			ret = __i915_request_await_execution(rq,
>   							     to_request(fence));
> -		else
> +		} else {
>   			ret = i915_request_await_external(rq, fence);
> +		}
>   		if (ret < 0)
>   			return ret;
>   	} while (--nchild);
> @@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   						 fence))
>   			continue;
>   
> -		if (dma_fence_is_i915(fence))
> +		if (dma_fence_is_i915(fence)) {
> +			if (is_same_parallel_context(rq, to_request(fence)))
> +				continue;
>   			ret = i915_request_await_request(rq, to_request(fence));
> -		else
> +		} else {
>   			ret = i915_request_await_external(rq, fence);
> +		}
>   		if (ret < 0)
>   			return ret;
>   
> @@ -1539,16 +1564,6 @@ i915_request_await_object(struct i915_request *to,
>   	return ret;
>   }
>   
> -static inline bool is_parallel_rq(struct i915_request *rq)
> -{
> -	return intel_context_is_parallel(rq->context);
> -}
> -
> -static inline struct intel_context *request_to_parent(struct i915_request *rq)
> -{
> -	return intel_context_to_parent(rq->context);
> -}
> -
>   static struct i915_request *
>   __i915_request_ensure_parallel_ordering(struct i915_request *rq,
>   					struct intel_timeline *timeline)
Matthew Brost Oct. 13, 2021, 12:32 a.m. UTC | #2
On Tue, Oct 12, 2021 at 03:08:05PM -0700, John Harrison wrote:
> On 10/4/2021 15:06, Matthew Brost wrote:
> > If an object in the excl or shared slot is a composite fence from a
> > parallel submit and the current request in the conflict tracking is from
> > the same parallel context there is no need to enforce ordering as the
> > ordering already implicit. Make the request conflict tracking understand
> ordering already -> ordering is already
> 

Yep.

> > this by comparing the parents parallel fence values and skipping the
> parents -> parent's
>

Yep.

> > conflict insertion if the values match.
> Presumably, this is to cope with the fact that the parallel submit fences do
> not look like regular submission fences. And hence the existing code that
> says 'new fence belongs to same context as old fence, so safe to ignore'
> does not work with parallel submission. However, this change does not appear

Yes. The check for 'if (fence->context == rq->fence.context)' doesn't
work with parallel submission as each rq->fence.context corresponds to a
timeline. With parallel submission each intel_context in the parallel
submit has its own timeline (seqno) so the compare fails for different
intel_context within the same parallel submit. This is the reason for
the additional compare on parallel submits parents, if they have the
same parent it is the same parallel submission and there is no need to
enforce additional ordering.

> to be adding parallel submit support to an existing 'same context' check. It
> seems to be a brand new check that does not exist for single submission.
> What makes parallel submit different? If we aren't skipping same context
> fences for single submits, why do we need it for parallel? Conversely, if we
> need it for parallel then why don't we need it for single?
>

I'm confused by what you are asking here. The existing same context
check is fine for parallel submits - it will just return true when we
compare requests with the same intel_context and new additional check
only true parallel submissions with the same parent.

> And if the single submission version is simply somewhere else in the code,
> why do the parallel version here instead of at the same place?
>

Again I'm confused by what you are asking. We might just need to sync on
a quick call.

Matt
 
> John.
> 
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++++++++++----------
> >   1 file changed, 29 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index e9bfa32f9270..cf89624020ad 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
> >   	return err;
> >   }
> > +static inline bool is_parallel_rq(struct i915_request *rq)
> > +{
> > +	return intel_context_is_parallel(rq->context);
> > +}
> > +
> > +static inline struct intel_context *request_to_parent(struct i915_request *rq)
> > +{
> > +	return intel_context_to_parent(rq->context);
> > +}
> > +
> > +static bool is_same_parallel_context(struct i915_request *to,
> > +				     struct i915_request *from)
> > +{
> > +	if (is_parallel_rq(to))
> Should this not say '&& is_parallel_rq(from)'?
> 
> > +		return request_to_parent(to) == request_to_parent(from);
> > +
> > +	return false;
> > +}
> > +
> >   int
> >   i915_request_await_execution(struct i915_request *rq,
> >   			     struct dma_fence *fence)
> > @@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
> >   		 * want to run our callback in all cases.
> >   		 */
> > -		if (dma_fence_is_i915(fence))
> > +		if (dma_fence_is_i915(fence)) {
> > +			if (is_same_parallel_context(rq, to_request(fence)))
> > +				continue;
> >   			ret = __i915_request_await_execution(rq,
> >   							     to_request(fence));
> > -		else
> > +		} else {
> >   			ret = i915_request_await_external(rq, fence);
> > +		}
> >   		if (ret < 0)
> >   			return ret;
> >   	} while (--nchild);
> > @@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
> >   						 fence))
> >   			continue;
> > -		if (dma_fence_is_i915(fence))
> > +		if (dma_fence_is_i915(fence)) {
> > +			if (is_same_parallel_context(rq, to_request(fence)))
> > +				continue;
> >   			ret = i915_request_await_request(rq, to_request(fence));
> > -		else
> > +		} else {
> >   			ret = i915_request_await_external(rq, fence);
> > +		}
> >   		if (ret < 0)
> >   			return ret;
> > @@ -1539,16 +1564,6 @@ i915_request_await_object(struct i915_request *to,
> >   	return ret;
> >   }
> > -static inline bool is_parallel_rq(struct i915_request *rq)
> > -{
> > -	return intel_context_is_parallel(rq->context);
> > -}
> > -
> > -static inline struct intel_context *request_to_parent(struct i915_request *rq)
> > -{
> > -	return intel_context_to_parent(rq->context);
> > -}
> > -
> >   static struct i915_request *
> >   __i915_request_ensure_parallel_ordering(struct i915_request *rq,
> >   					struct intel_timeline *timeline)
>
Matthew Brost Oct. 13, 2021, 5:51 p.m. UTC | #3
On Tue, Oct 12, 2021 at 03:08:05PM -0700, John Harrison wrote:
> On 10/4/2021 15:06, Matthew Brost wrote:
> > If an object in the excl or shared slot is a composite fence from a
> > parallel submit and the current request in the conflict tracking is from
> > the same parallel context there is no need to enforce ordering as the
> > ordering already implicit. Make the request conflict tracking understand
> ordering already -> ordering is already
> 
> > this by comparing the parents parallel fence values and skipping the
> parents -> parent's
> 
> > conflict insertion if the values match.
> Presumably, this is to cope with the fact that the parallel submit fences do
> not look like regular submission fences. And hence the existing code that
> says 'new fence belongs to same context as old fence, so safe to ignore'
> does not work with parallel submission. However, this change does not appear
> to be adding parallel submit support to an existing 'same context' check. It
> seems to be a brand new check that does not exist for single submission.
> What makes parallel submit different? If we aren't skipping same context
> fences for single submits, why do we need it for parallel? Conversely, if we
> need it for parallel then why don't we need it for single?
> 
> And if the single submission version is simply somewhere else in the code,
> why do the parallel version here instead of at the same place?
> 
> John.
> 
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++++++++++----------
> >   1 file changed, 29 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index e9bfa32f9270..cf89624020ad 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
> >   	return err;
> >   }
> > +static inline bool is_parallel_rq(struct i915_request *rq)
> > +{
> > +	return intel_context_is_parallel(rq->context);
> > +}
> > +
> > +static inline struct intel_context *request_to_parent(struct i915_request *rq)
> > +{
> > +	return intel_context_to_parent(rq->context);
> > +}
> > +
> > +static bool is_same_parallel_context(struct i915_request *to,
> > +				     struct i915_request *from)
> > +{
> > +	if (is_parallel_rq(to))
> Should this not say '&& is_parallel_rq(from)'?
> 

Missed this one. That isn't necessary as if from is not a parallel
submit the following compare of parents will always return false. I
could add if you insist as either way works.

Matt 

> > +		return request_to_parent(to) == request_to_parent(from);
> > +
> > +	return false;
> > +}
> > +
> >   int
> >   i915_request_await_execution(struct i915_request *rq,
> >   			     struct dma_fence *fence)
> > @@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
> >   		 * want to run our callback in all cases.
> >   		 */
> > -		if (dma_fence_is_i915(fence))
> > +		if (dma_fence_is_i915(fence)) {
> > +			if (is_same_parallel_context(rq, to_request(fence)))
> > +				continue;
> >   			ret = __i915_request_await_execution(rq,
> >   							     to_request(fence));
> > -		else
> > +		} else {
> >   			ret = i915_request_await_external(rq, fence);
> > +		}
> >   		if (ret < 0)
> >   			return ret;
> >   	} while (--nchild);
> > @@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
> >   						 fence))
> >   			continue;
> > -		if (dma_fence_is_i915(fence))
> > +		if (dma_fence_is_i915(fence)) {
> > +			if (is_same_parallel_context(rq, to_request(fence)))
> > +				continue;
> >   			ret = i915_request_await_request(rq, to_request(fence));
> > -		else
> > +		} else {
> >   			ret = i915_request_await_external(rq, fence);
> > +		}
> >   		if (ret < 0)
> >   			return ret;
> > @@ -1539,16 +1564,6 @@ i915_request_await_object(struct i915_request *to,
> >   	return ret;
> >   }
> > -static inline bool is_parallel_rq(struct i915_request *rq)
> > -{
> > -	return intel_context_is_parallel(rq->context);
> > -}
> > -
> > -static inline struct intel_context *request_to_parent(struct i915_request *rq)
> > -{
> > -	return intel_context_to_parent(rq->context);
> > -}
> > -
> >   static struct i915_request *
> >   __i915_request_ensure_parallel_ordering(struct i915_request *rq,
> >   					struct intel_timeline *timeline)
>
John Harrison Oct. 13, 2021, 7:25 p.m. UTC | #4
On 10/13/2021 10:51, Matthew Brost wrote:
> On Tue, Oct 12, 2021 at 03:08:05PM -0700, John Harrison wrote:
>> On 10/4/2021 15:06, Matthew Brost wrote:
>>> If an object in the excl or shared slot is a composite fence from a
>>> parallel submit and the current request in the conflict tracking is from
>>> the same parallel context there is no need to enforce ordering as the
>>> ordering already implicit. Make the request conflict tracking understand
>> ordering already -> ordering is already
>>
>>> this by comparing the parents parallel fence values and skipping the
>> parents -> parent's
>>
>>> conflict insertion if the values match.
>> Presumably, this is to cope with the fact that the parallel submit fences do
>> not look like regular submission fences. And hence the existing code that
>> says 'new fence belongs to same context as old fence, so safe to ignore'
>> does not work with parallel submission. However, this change does not appear
>> to be adding parallel submit support to an existing 'same context' check. It
>> seems to be a brand new check that does not exist for single submission.
>> What makes parallel submit different? If we aren't skipping same context
>> fences for single submits, why do we need it for parallel? Conversely, if we
>> need it for parallel then why don't we need it for single?
>>
>> And if the single submission version is simply somewhere else in the code,
>> why do the parallel version here instead of at the same place?
>>
>> John.
>>
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++++++++++----------
>>>    1 file changed, 29 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index e9bfa32f9270..cf89624020ad 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
>>>    	return err;
>>>    }
>>> +static inline bool is_parallel_rq(struct i915_request *rq)
>>> +{
>>> +	return intel_context_is_parallel(rq->context);
>>> +}
>>> +
>>> +static inline struct intel_context *request_to_parent(struct i915_request *rq)
>>> +{
>>> +	return intel_context_to_parent(rq->context);
>>> +}
>>> +
>>> +static bool is_same_parallel_context(struct i915_request *to,
>>> +				     struct i915_request *from)
>>> +{
>>> +	if (is_parallel_rq(to))
>> Should this not say '&& is_parallel_rq(from)'?
>>
> Missed this one. That isn't necessary as if from is not a parallel
> submit the following compare of parents will always return false. I
> could add if you insist as either way works.
>
> Matt
It was more a question of whether req_to_parent() works fine 
irrespective of whether the rq is a parent, child or single?

John.

>
>>> +		return request_to_parent(to) == request_to_parent(from);
>>> +
>>> +	return false;
>>> +}
>>> +
>>>    int
>>>    i915_request_await_execution(struct i915_request *rq,
>>>    			     struct dma_fence *fence)
>>> @@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
>>>    		 * want to run our callback in all cases.
>>>    		 */
>>> -		if (dma_fence_is_i915(fence))
>>> +		if (dma_fence_is_i915(fence)) {
>>> +			if (is_same_parallel_context(rq, to_request(fence)))
>>> +				continue;
>>>    			ret = __i915_request_await_execution(rq,
>>>    							     to_request(fence));
>>> -		else
>>> +		} else {
>>>    			ret = i915_request_await_external(rq, fence);
>>> +		}
>>>    		if (ret < 0)
>>>    			return ret;
>>>    	} while (--nchild);
>>> @@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>>>    						 fence))
>>>    			continue;
>>> -		if (dma_fence_is_i915(fence))
>>> +		if (dma_fence_is_i915(fence)) {
>>> +			if (is_same_parallel_context(rq, to_request(fence)))
>>> +				continue;
>>>    			ret = i915_request_await_request(rq, to_request(fence));
>>> -		else
>>> +		} else {
>>>    			ret = i915_request_await_external(rq, fence);
>>> +		}
>>>    		if (ret < 0)
>>>    			return ret;
>>> @@ -1539,16 +1564,6 @@ i915_request_await_object(struct i915_request *to,
>>>    	return ret;
>>>    }
>>> -static inline bool is_parallel_rq(struct i915_request *rq)
>>> -{
>>> -	return intel_context_is_parallel(rq->context);
>>> -}
>>> -
>>> -static inline struct intel_context *request_to_parent(struct i915_request *rq)
>>> -{
>>> -	return intel_context_to_parent(rq->context);
>>> -}
>>> -
>>>    static struct i915_request *
>>>    __i915_request_ensure_parallel_ordering(struct i915_request *rq,
>>>    					struct intel_timeline *timeline)
John Harrison Oct. 13, 2021, 7:35 p.m. UTC | #5
On 10/12/2021 17:32, Matthew Brost wrote:
> On Tue, Oct 12, 2021 at 03:08:05PM -0700, John Harrison wrote:
>> On 10/4/2021 15:06, Matthew Brost wrote:
>>> If an object in the excl or shared slot is a composite fence from a
>>> parallel submit and the current request in the conflict tracking is from
>>> the same parallel context there is no need to enforce ordering as the
>>> ordering already implicit. Make the request conflict tracking understand
>> ordering already -> ordering is already
>>
> Yep.
>
>>> this by comparing the parents parallel fence values and skipping the
>> parents -> parent's
>>
> Yep.
>
>>> conflict insertion if the values match.
>> Presumably, this is to cope with the fact that the parallel submit fences do
>> not look like regular submission fences. And hence the existing code that
>> says 'new fence belongs to same context as old fence, so safe to ignore'
>> does not work with parallel submission. However, this change does not appear
> Yes. The check for 'if (fence->context == rq->fence.context)' doesn't
> work with parallel submission as each rq->fence.context corresponds to a
> timeline. With parallel submission each intel_context in the parallel
> submit has its own timeline (seqno) so the compare fails for different
> intel_context within the same parallel submit. This is the reason for
> the additional compare on parallel submits parents, if they have the
> same parent it is the same parallel submission and there is no need to
> enforce additional ordering.
>
>> to be adding parallel submit support to an existing 'same context' check. It
>> seems to be a brand new check that does not exist for single submission.
>> What makes parallel submit different? If we aren't skipping same context
>> fences for single submits, why do we need it for parallel? Conversely, if we
>> need it for parallel then why don't we need it for single?
>>
> I'm confused by what you are asking here. The existing same context
> check is fine for parallel submits - it will just return true when we
> compare requests with the same intel_context and new additional check
> only true parallel submissions with the same parent.
>
>> And if the single submission version is simply somewhere else in the code,
>> why do the parallel version here instead of at the same place?
>>
> Again I'm confused by what you are asking. We might just need to sync on
> a quick call.
That's okay. I think I had partly confused myself ;).

I was just meaning that the parallel compliant version of the 'ctxtA == 
ctxtB -> skip' test should be coded adjacent to the single submission 
version of the same test. I had somehow completely missed that the 
single submission version is indeed the line above in 
i915_request_await_execution(). So the two are indeed very definitely 
next to each other.

It's all good :).

John.


>
> Matt
>   
>> John.
>>
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c | 43 +++++++++++++++++++----------
>>>    1 file changed, 29 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index e9bfa32f9270..cf89624020ad 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1325,6 +1325,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
>>>    	return err;
>>>    }
>>> +static inline bool is_parallel_rq(struct i915_request *rq)
>>> +{
>>> +	return intel_context_is_parallel(rq->context);
>>> +}
>>> +
>>> +static inline struct intel_context *request_to_parent(struct i915_request *rq)
>>> +{
>>> +	return intel_context_to_parent(rq->context);
>>> +}
>>> +
>>> +static bool is_same_parallel_context(struct i915_request *to,
>>> +				     struct i915_request *from)
>>> +{
>>> +	if (is_parallel_rq(to))
>> Should this not say '&& is_parallel_rq(from)'?
>>
>>> +		return request_to_parent(to) == request_to_parent(from);
>>> +
>>> +	return false;
>>> +}
>>> +
>>>    int
>>>    i915_request_await_execution(struct i915_request *rq,
>>>    			     struct dma_fence *fence)
>>> @@ -1356,11 +1375,14 @@ i915_request_await_execution(struct i915_request *rq,
>>>    		 * want to run our callback in all cases.
>>>    		 */
>>> -		if (dma_fence_is_i915(fence))
>>> +		if (dma_fence_is_i915(fence)) {
>>> +			if (is_same_parallel_context(rq, to_request(fence)))
>>> +				continue;
>>>    			ret = __i915_request_await_execution(rq,
>>>    							     to_request(fence));
>>> -		else
>>> +		} else {
>>>    			ret = i915_request_await_external(rq, fence);
>>> +		}
>>>    		if (ret < 0)
>>>    			return ret;
>>>    	} while (--nchild);
>>> @@ -1461,10 +1483,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>>>    						 fence))
>>>    			continue;
>>> -		if (dma_fence_is_i915(fence))
>>> +		if (dma_fence_is_i915(fence)) {
>>> +			if (is_same_parallel_context(rq, to_request(fence)))
>>> +				continue;
>>>    			ret = i915_request_await_request(rq, to_request(fence));
>>> -		else
>>> +		} else {
>>>    			ret = i915_request_await_external(rq, fence);
>>> +		}
>>>    		if (ret < 0)
>>>    			return ret;
>>> @@ -1539,16 +1564,6 @@ i915_request_await_object(struct i915_request *to,
>>>    	return ret;
>>>    }
>>> -static inline bool is_parallel_rq(struct i915_request *rq)
>>> -{
>>> -	return intel_context_is_parallel(rq->context);
>>> -}
>>> -
>>> -static inline struct intel_context *request_to_parent(struct i915_request *rq)
>>> -{
>>> -	return intel_context_to_parent(rq->context);
>>> -}
>>> -
>>>    static struct i915_request *
>>>    __i915_request_ensure_parallel_ordering(struct i915_request *rq,
>>>    					struct intel_timeline *timeline)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e9bfa32f9270..cf89624020ad 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1325,6 +1325,25 @@  i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
 	return err;
 }
 
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+	return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+	return intel_context_to_parent(rq->context);
+}
+
+static bool is_same_parallel_context(struct i915_request *to,
+				     struct i915_request *from)
+{
+	if (is_parallel_rq(to))
+		return request_to_parent(to) == request_to_parent(from);
+
+	return false;
+}
+
 int
 i915_request_await_execution(struct i915_request *rq,
 			     struct dma_fence *fence)
@@ -1356,11 +1375,14 @@  i915_request_await_execution(struct i915_request *rq,
 		 * want to run our callback in all cases.
 		 */
 
-		if (dma_fence_is_i915(fence))
+		if (dma_fence_is_i915(fence)) {
+			if (is_same_parallel_context(rq, to_request(fence)))
+				continue;
 			ret = __i915_request_await_execution(rq,
 							     to_request(fence));
-		else
+		} else {
 			ret = i915_request_await_external(rq, fence);
+		}
 		if (ret < 0)
 			return ret;
 	} while (--nchild);
@@ -1461,10 +1483,13 @@  i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 						 fence))
 			continue;
 
-		if (dma_fence_is_i915(fence))
+		if (dma_fence_is_i915(fence)) {
+			if (is_same_parallel_context(rq, to_request(fence)))
+				continue;
 			ret = i915_request_await_request(rq, to_request(fence));
-		else
+		} else {
 			ret = i915_request_await_external(rq, fence);
+		}
 		if (ret < 0)
 			return ret;
 
@@ -1539,16 +1564,6 @@  i915_request_await_object(struct i915_request *to,
 	return ret;
 }
 
-static inline bool is_parallel_rq(struct i915_request *rq)
-{
-	return intel_context_is_parallel(rq->context);
-}
-
-static inline struct intel_context *request_to_parent(struct i915_request *rq)
-{
-	return intel_context_to_parent(rq->context);
-}
-
 static struct i915_request *
 __i915_request_ensure_parallel_ordering(struct i915_request *rq,
 					struct intel_timeline *timeline)