Message ID | 20200715115147.11866-9-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/66] drm/i915: Reduce i915_request.lock contention for i915_request_wait | expand |
On 15/07/2020 12:50, Chris Wilson wrote: > Before we can execute a request, we must wait for all of its vma to be > bound. This is a frequent operation for which we can optimise away a > few atomic operations (notably a cmpxchg) in lieu of the RCU protection. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++ > drivers/gpu/drm/i915/i915_vma.c | 9 +++++++-- > 2 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > index b9e0394e2975..fb165d3f01cf 100644 > --- a/drivers/gpu/drm/i915/i915_active.h > +++ b/drivers/gpu/drm/i915/i915_active.h > @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); > struct i915_active *i915_active_get(struct i915_active *ref); > void i915_active_put(struct i915_active *ref); > > +static inline int __i915_request_await_exclusive(struct i915_request *rq, > + struct i915_active *active) > +{ > + struct dma_fence *fence; > + int err = 0; > + > + fence = i915_active_fence_get(&active->excl); > + if (fence) { > + err = i915_request_await_dma_fence(rq, fence); > + dma_fence_put(fence); > + } > + > + return err; > +} > + > #endif /* _I915_ACTIVE_H_ */ > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index bc64f773dcdb..cd12047c7791 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) > list_del(&vma->obj->userfault_link); > } > > +static int > +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) > +{ > + return __i915_request_await_exclusive(rq, &vma->active); > +} > + > int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) > { > int err; > @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) > GEM_BUG_ON(!i915_vma_is_pinned(vma)); > > /* Wait for the vma to be bound before we start! */ > - err = i915_request_await_active(rq, &vma->active, > - I915_ACTIVE_AWAIT_EXCL); > + err = __i915_request_await_bind(rq, vma); > if (err) > return err; > > Looks like for like, apart from missing i915_active_acquire_if_busy across the operation. Remind me please what is acquire/release protecting against? :) Regards, Tvrtko
On 2020-07-15 13:50, Chris Wilson wrote: > Before we can execute a request, we must wait for all of its vma to be > bound. This is a frequent operation for which we can optimise away a > few atomic operations (notably a cmpxchg) in lieu of the RCU protection. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> LGTM. Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com> > --- > drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++ > drivers/gpu/drm/i915/i915_vma.c | 9 +++++++-- > 2 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > index b9e0394e2975..fb165d3f01cf 100644 > --- a/drivers/gpu/drm/i915/i915_active.h > +++ b/drivers/gpu/drm/i915/i915_active.h > @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); > struct i915_active *i915_active_get(struct i915_active *ref); > void i915_active_put(struct i915_active *ref); > > +static inline int __i915_request_await_exclusive(struct i915_request *rq, > + struct i915_active *active) > +{ > + struct dma_fence *fence; > + int err = 0; > + > + fence = i915_active_fence_get(&active->excl); > + if (fence) { > + err = i915_request_await_dma_fence(rq, fence); > + dma_fence_put(fence); > + } > + > + return err; > +} > + > #endif /* _I915_ACTIVE_H_ */ > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index bc64f773dcdb..cd12047c7791 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) > list_del(&vma->obj->userfault_link); > } > > +static int > +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) > +{ > + return __i915_request_await_exclusive(rq, &vma->active); > +} > + > int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) > { > int err; > @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) > GEM_BUG_ON(!i915_vma_is_pinned(vma)); > > /* Wait for the vma to be bound before we start! */ > - err = i915_request_await_active(rq, &vma->active, > - I915_ACTIVE_AWAIT_EXCL); > + err = __i915_request_await_bind(rq, vma); > if (err) > return err; >
Quoting Tvrtko Ursulin (2020-07-17 14:23:22) > > On 15/07/2020 12:50, Chris Wilson wrote: > > Before we can execute a request, we must wait for all of its vma to be > > bound. This is a frequent operation for which we can optimise away a > > few atomic operations (notably a cmpxchg) in lieu of the RCU protection. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++ > > drivers/gpu/drm/i915/i915_vma.c | 9 +++++++-- > > 2 files changed, 22 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > > index b9e0394e2975..fb165d3f01cf 100644 > > --- a/drivers/gpu/drm/i915/i915_active.h > > +++ b/drivers/gpu/drm/i915/i915_active.h > > @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); > > struct i915_active *i915_active_get(struct i915_active *ref); > > void i915_active_put(struct i915_active *ref); > > > > +static inline int __i915_request_await_exclusive(struct i915_request *rq, > > + struct i915_active *active) > > +{ > > + struct dma_fence *fence; > > + int err = 0; > > + > > + fence = i915_active_fence_get(&active->excl); > > + if (fence) { > > + err = i915_request_await_dma_fence(rq, fence); > > + dma_fence_put(fence); > > + } > > + > > + return err; > > +} > > + > > #endif /* _I915_ACTIVE_H_ */ > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > > index bc64f773dcdb..cd12047c7791 100644 > > --- a/drivers/gpu/drm/i915/i915_vma.c > > +++ b/drivers/gpu/drm/i915/i915_vma.c > > @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) > > list_del(&vma->obj->userfault_link); > > } > > > > +static int > > +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) > > +{ > > + return __i915_request_await_exclusive(rq, &vma->active); > > +} > > + > > int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) > > { > > int err; > > @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) > > GEM_BUG_ON(!i915_vma_is_pinned(vma)); > > > > /* Wait for the vma to be bound before we start! */ > > - err = i915_request_await_active(rq, &vma->active, > > - I915_ACTIVE_AWAIT_EXCL); > > + err = __i915_request_await_bind(rq, vma); > > if (err) > > return err; > > > > > > Looks like for like, apart from missing i915_active_acquire_if_busy > across the operation. Remind me please what is acquire/release > protecting against? :) To protect the rbtree walk. So, this is the function we started with for active_await, but when we added the option to walk the entire rbtree as well, we pulled it all under a single acquire/release. perf suggests that was a mistake if all we frequently want to do is grab the exclusive fence for an await. -Chris
On 28/07/2020 15:35, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-07-17 14:23:22) >> >> On 15/07/2020 12:50, Chris Wilson wrote: >>> Before we can execute a request, we must wait for all of its vma to be >>> bound. This is a frequent operation for which we can optimise away a >>> few atomic operations (notably a cmpxchg) in lieu of the RCU protection. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> --- >>> drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++ >>> drivers/gpu/drm/i915/i915_vma.c | 9 +++++++-- >>> 2 files changed, 22 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h >>> index b9e0394e2975..fb165d3f01cf 100644 >>> --- a/drivers/gpu/drm/i915/i915_active.h >>> +++ b/drivers/gpu/drm/i915/i915_active.h >>> @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); >>> struct i915_active *i915_active_get(struct i915_active *ref); >>> void i915_active_put(struct i915_active *ref); >>> >>> +static inline int __i915_request_await_exclusive(struct i915_request *rq, >>> + struct i915_active *active) >>> +{ >>> + struct dma_fence *fence; >>> + int err = 0; >>> + >>> + fence = i915_active_fence_get(&active->excl); >>> + if (fence) { >>> + err = i915_request_await_dma_fence(rq, fence); >>> + dma_fence_put(fence); >>> + } >>> + >>> + return err; >>> +} >>> + >>> #endif /* _I915_ACTIVE_H_ */ >>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c >>> index bc64f773dcdb..cd12047c7791 100644 >>> --- a/drivers/gpu/drm/i915/i915_vma.c >>> +++ b/drivers/gpu/drm/i915/i915_vma.c >>> @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) >>> list_del(&vma->obj->userfault_link); >>> } >>> >>> +static int >>> +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) >>> +{ >>> + return __i915_request_await_exclusive(rq, &vma->active); >>> +} >>> + >>> int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) >>> { >>> int err; >>> @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) >>> GEM_BUG_ON(!i915_vma_is_pinned(vma)); >>> >>> /* Wait for the vma to be bound before we start! */ >>> - err = i915_request_await_active(rq, &vma->active, >>> - I915_ACTIVE_AWAIT_EXCL); >>> + err = __i915_request_await_bind(rq, vma); >>> if (err) >>> return err; >>> >>> >> >> Looks like for like, apart from missing i915_active_acquire_if_busy >> across the operation. Remind me please what is acquire/release >> protecting against? :) > > To protect the rbtree walk. So, this is the function we started with for > active_await, but when we added the option to walk the entire rbtree as > well, we pulled it all under a single acquire/release. perf suggests > that was a mistake if all we frequently want to do is grab the exclusive > fence for an await. Ok! Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index b9e0394e2975..fb165d3f01cf 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); struct i915_active *i915_active_get(struct i915_active *ref); void i915_active_put(struct i915_active *ref); +static inline int __i915_request_await_exclusive(struct i915_request *rq, + struct i915_active *active) +{ + struct dma_fence *fence; + int err = 0; + + fence = i915_active_fence_get(&active->excl); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + return err; +} + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bc64f773dcdb..cd12047c7791 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1167,6 +1167,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +static int +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) +{ + return __i915_request_await_exclusive(rq, &vma->active); +} + int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) { int err; @@ -1174,8 +1180,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) GEM_BUG_ON(!i915_vma_is_pinned(vma)); /* Wait for the vma to be bound before we start! */ - err = i915_request_await_active(rq, &vma->active, - I915_ACTIVE_AWAIT_EXCL); + err = __i915_request_await_bind(rq, vma); if (err) return err;
Before we can execute a request, we must wait for all of its vma to be bound. This is a frequent operation for which we can optimise away a few atomic operations (notably a cmpxchg) in lieu of the RCU protection. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_active.h | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_vma.c | 9 +++++++-- 2 files changed, 22 insertions(+), 2 deletions(-)