diff mbox series

[09/66] drm/i915: Provide a fastpath for waiting on vma bindings

Message ID 20200715115147.11866-9-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/66] drm/i915: Reduce i915_request.lock contention for i915_request_wait | expand

Commit Message

Chris Wilson July 15, 2020, 11:50 a.m. UTC
Before we can execute a request, we must wait for all of its vma to be
bound. This is a frequent operation for which we can optimise away a
few atomic operations (notably a cmpxchg) in lieu of the RCU protection.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++
 drivers/gpu/drm/i915/i915_vma.c    |  9 +++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

Comments

Tvrtko Ursulin July 17, 2020, 1:23 p.m. UTC | #1
On 15/07/2020 12:50, Chris Wilson wrote:
> Before we can execute a request, we must wait for all of its vma to be
> bound. This is a frequent operation for which we can optimise away a
> few atomic operations (notably a cmpxchg) in lieu of the RCU protection.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++
>   drivers/gpu/drm/i915/i915_vma.c    |  9 +++++++--
>   2 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index b9e0394e2975..fb165d3f01cf 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
>   struct i915_active *i915_active_get(struct i915_active *ref);
>   void i915_active_put(struct i915_active *ref);
>   
> +static inline int __i915_request_await_exclusive(struct i915_request *rq,
> +						 struct i915_active *active)
> +{
> +	struct dma_fence *fence;
> +	int err = 0;
> +
> +	fence = i915_active_fence_get(&active->excl);
> +	if (fence) {
> +		err = i915_request_await_dma_fence(rq, fence);
> +		dma_fence_put(fence);
> +	}
> +
> +	return err;
> +}
> +
>   #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index bc64f773dcdb..cd12047c7791 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>   		list_del(&vma->obj->userfault_link);
>   }
>   
> +static int
> +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
> +{
> +	return __i915_request_await_exclusive(rq, &vma->active);
> +}
> +
>   int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
>   {
>   	int err;
> @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
>   	GEM_BUG_ON(!i915_vma_is_pinned(vma));
>   
>   	/* Wait for the vma to be bound before we start! */
> -	err = i915_request_await_active(rq, &vma->active,
> -					I915_ACTIVE_AWAIT_EXCL);
> +	err = __i915_request_await_bind(rq, vma);
>   	if (err)
>   		return err;
>   
> 

Looks like for like, apart from missing i915_active_acquire_if_busy 
across the operation. Remind me please what is acquire/release 
protecting against? :)

Regards,

Tvrtko
Thomas Hellström (Intel) July 22, 2020, 3:07 p.m. UTC | #2
On 2020-07-15 13:50, Chris Wilson wrote:
> Before we can execute a request, we must wait for all of its vma to be
> bound. This is a frequent operation for which we can optimise away a
> few atomic operations (notably a cmpxchg) in lieu of the RCU protection.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

LGTM. Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>


> ---
>   drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++
>   drivers/gpu/drm/i915/i915_vma.c    |  9 +++++++--
>   2 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index b9e0394e2975..fb165d3f01cf 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
>   struct i915_active *i915_active_get(struct i915_active *ref);
>   void i915_active_put(struct i915_active *ref);
>   
> +static inline int __i915_request_await_exclusive(struct i915_request *rq,
> +						 struct i915_active *active)
> +{
> +	struct dma_fence *fence;
> +	int err = 0;
> +
> +	fence = i915_active_fence_get(&active->excl);
> +	if (fence) {
> +		err = i915_request_await_dma_fence(rq, fence);
> +		dma_fence_put(fence);
> +	}
> +
> +	return err;
> +}
> +
>   #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index bc64f773dcdb..cd12047c7791 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>   		list_del(&vma->obj->userfault_link);
>   }
>   
> +static int
> +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
> +{
> +	return __i915_request_await_exclusive(rq, &vma->active);
> +}
> +
>   int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
>   {
>   	int err;
> @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
>   	GEM_BUG_ON(!i915_vma_is_pinned(vma));
>   
>   	/* Wait for the vma to be bound before we start! */
> -	err = i915_request_await_active(rq, &vma->active,
> -					I915_ACTIVE_AWAIT_EXCL);
> +	err = __i915_request_await_bind(rq, vma);
>   	if (err)
>   		return err;
>
Chris Wilson July 28, 2020, 2:35 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-07-17 14:23:22)
> 
> On 15/07/2020 12:50, Chris Wilson wrote:
> > Before we can execute a request, we must wait for all of its vma to be
> > bound. This is a frequent operation for which we can optimise away a
> > few atomic operations (notably a cmpxchg) in lieu of the RCU protection.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++
> >   drivers/gpu/drm/i915/i915_vma.c    |  9 +++++++--
> >   2 files changed, 22 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> > index b9e0394e2975..fb165d3f01cf 100644
> > --- a/drivers/gpu/drm/i915/i915_active.h
> > +++ b/drivers/gpu/drm/i915/i915_active.h
> > @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
> >   struct i915_active *i915_active_get(struct i915_active *ref);
> >   void i915_active_put(struct i915_active *ref);
> >   
> > +static inline int __i915_request_await_exclusive(struct i915_request *rq,
> > +                                              struct i915_active *active)
> > +{
> > +     struct dma_fence *fence;
> > +     int err = 0;
> > +
> > +     fence = i915_active_fence_get(&active->excl);
> > +     if (fence) {
> > +             err = i915_request_await_dma_fence(rq, fence);
> > +             dma_fence_put(fence);
> > +     }
> > +
> > +     return err;
> > +}
> > +
> >   #endif /* _I915_ACTIVE_H_ */
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index bc64f773dcdb..cd12047c7791 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
> >               list_del(&vma->obj->userfault_link);
> >   }
> >   
> > +static int
> > +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
> > +{
> > +     return __i915_request_await_exclusive(rq, &vma->active);
> > +}
> > +
> >   int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
> >   {
> >       int err;
> > @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
> >       GEM_BUG_ON(!i915_vma_is_pinned(vma));
> >   
> >       /* Wait for the vma to be bound before we start! */
> > -     err = i915_request_await_active(rq, &vma->active,
> > -                                     I915_ACTIVE_AWAIT_EXCL);
> > +     err = __i915_request_await_bind(rq, vma);
> >       if (err)
> >               return err;
> >   
> > 
> 
> Looks like for like, apart from missing i915_active_acquire_if_busy 
> across the operation. Remind me please what is acquire/release 
> protecting against? :)

To protect the rbtree walk. So, this is the function we started with for
active_await, but when we added the option to walk the entire rbtree as
well, we pulled it all under a single acquire/release. perf suggests
that was a mistake if all we frequently want to do is grab the exclusive
fence for an await.
-Chris
Tvrtko Ursulin July 29, 2020, 12:43 p.m. UTC | #4
On 28/07/2020 15:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-07-17 14:23:22)
>>
>> On 15/07/2020 12:50, Chris Wilson wrote:
>>> Before we can execute a request, we must wait for all of its vma to be
>>> bound. This is a frequent operation for which we can optimise away a
>>> few atomic operations (notably a cmpxchg) in lieu of the RCU protection.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++
>>>    drivers/gpu/drm/i915/i915_vma.c    |  9 +++++++--
>>>    2 files changed, 22 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
>>> index b9e0394e2975..fb165d3f01cf 100644
>>> --- a/drivers/gpu/drm/i915/i915_active.h
>>> +++ b/drivers/gpu/drm/i915/i915_active.h
>>> @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
>>>    struct i915_active *i915_active_get(struct i915_active *ref);
>>>    void i915_active_put(struct i915_active *ref);
>>>    
>>> +static inline int __i915_request_await_exclusive(struct i915_request *rq,
>>> +                                              struct i915_active *active)
>>> +{
>>> +     struct dma_fence *fence;
>>> +     int err = 0;
>>> +
>>> +     fence = i915_active_fence_get(&active->excl);
>>> +     if (fence) {
>>> +             err = i915_request_await_dma_fence(rq, fence);
>>> +             dma_fence_put(fence);
>>> +     }
>>> +
>>> +     return err;
>>> +}
>>> +
>>>    #endif /* _I915_ACTIVE_H_ */
>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>> index bc64f773dcdb..cd12047c7791 100644
>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>> @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>>>                list_del(&vma->obj->userfault_link);
>>>    }
>>>    
>>> +static int
>>> +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
>>> +{
>>> +     return __i915_request_await_exclusive(rq, &vma->active);
>>> +}
>>> +
>>>    int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
>>>    {
>>>        int err;
>>> @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
>>>        GEM_BUG_ON(!i915_vma_is_pinned(vma));
>>>    
>>>        /* Wait for the vma to be bound before we start! */
>>> -     err = i915_request_await_active(rq, &vma->active,
>>> -                                     I915_ACTIVE_AWAIT_EXCL);
>>> +     err = __i915_request_await_bind(rq, vma);
>>>        if (err)
>>>                return err;
>>>    
>>>
>>
>> Looks like for like, apart from missing i915_active_acquire_if_busy
>> across the operation. Remind me please what is acquire/release
>> protecting against? :)
> 
> To protect the rbtree walk. So, this is the function we started with for
> active_await, but when we added the option to walk the entire rbtree as
> well, we pulled it all under a single acquire/release. perf suggests
> that was a mistake if all we frequently want to do is grab the exclusive
> fence for an await.

Ok!

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index b9e0394e2975..fb165d3f01cf 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -231,4 +231,19 @@  struct i915_active *i915_active_create(void);
 struct i915_active *i915_active_get(struct i915_active *ref);
 void i915_active_put(struct i915_active *ref);
 
+static inline int __i915_request_await_exclusive(struct i915_request *rq,
+						 struct i915_active *active)
+{
+	struct dma_fence *fence;
+	int err = 0;
+
+	fence = i915_active_fence_get(&active->excl);
+	if (fence) {
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	return err;
+}
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index bc64f773dcdb..cd12047c7791 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1167,6 +1167,12 @@  void i915_vma_revoke_mmap(struct i915_vma *vma)
 		list_del(&vma->obj->userfault_link);
 }
 
+static int
+__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
+{
+	return __i915_request_await_exclusive(rq, &vma->active);
+}
+
 int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
 {
 	int err;
@@ -1174,8 +1180,7 @@  int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 
 	/* Wait for the vma to be bound before we start! */
-	err = i915_request_await_active(rq, &vma->active,
-					I915_ACTIVE_AWAIT_EXCL);
+	err = __i915_request_await_bind(rq, vma);
 	if (err)
 		return err;