Message ID | 20180814144058.19286-7-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Per context dynamic (sub)slice power-gating | expand |
Chris, for this one please let me know if it is okay to give you authorship and to add your S-o-B. Tvrtko On 14/08/2018 15:40, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Global barrier is a facility to allow serialization between different > timelines. > > After calling i915_gem_set_global_barrier on a request, all following > submissions on any engine will be set up as depending on this global > barrier. Once the global barrier has been completed it automatically gets > cleared and things continue as normal. > > This facility will be used by the upcoming context SSEU code. > > ------------------------------------------------------------------------- > This code was part of the larger SSEU patch but I extracted it to be > separate for ease of review and clarity. I think it originates from Chris > Wilson so permission pending I will change the author and add appropriate > S-o-B. > ------------------------------------------------------------------------- > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_drv.h | 27 +++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem.c | 2 ++ > drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++++ > 3 files changed, 45 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 5c12d2676435..643089ba01b9 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2098,6 +2098,16 @@ struct drm_i915_private { > u32 active_requests; > u32 request_serial; > > + /** > + * Global barrier for the ability to serialize ordering between > + * different timelines. > + * > + * Users can call i915_gem_set_global_barrier which will make > + * all subsequent submission be execute only after this barrier > + * has been completed. > + */ > + struct i915_gem_active global_barrier; > + > /** > * Is the GPU currently considered idle, or busy executing > * userspace requests? Whilst idle, we allow runtime power > @@ -3230,6 +3240,23 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) > return container_of(vm, struct i915_hw_ppgtt, vm); > } > > +/** > + * i915_gem_set_global_barrier - orders submission on different timelines > + * @i915: i915 device private > + * @rq: request after which new submissions can proceed > + * > + * Sets the passed in request as the serialization point for all subsequent > + * submissions, regardless of the engine/timeline. Subsequent requests will not > + * be submitted to GPU until the global barrier has been completed. > + */ > +static inline void > +i915_gem_set_global_barrier(struct drm_i915_private *i915, > + struct i915_request *rq) > +{ > + lockdep_assert_held(&i915->drm.struct_mutex); > + i915_gem_active_set(&i915->gt.global_barrier, rq); > +} > + > /* i915_gem_fence_reg.c */ > struct drm_i915_fence_reg * > i915_reserve_fence(struct drm_i915_private *dev_priv); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 0453eb42a1a3..be462ef65786 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -5752,6 +5752,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) > if (!dev_priv->priorities) > goto err_dependencies; > > + init_request_active(&dev_priv->gt.global_barrier, NULL); > + > INIT_LIST_HEAD(&dev_priv->gt.timelines); > INIT_LIST_HEAD(&dev_priv->gt.active_rings); > INIT_LIST_HEAD(&dev_priv->gt.closed_vma); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 09ed48833b54..8b45f74dc748 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -644,6 +644,18 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) > return NOTIFY_DONE; > } > > +static int add_global_barrier(struct i915_request *rq) > +{ > + struct i915_request *barrier; > + > + barrier = i915_gem_active_raw(&rq->i915->gt.global_barrier, > + &rq->i915->drm.struct_mutex); > + if (barrier) > + return i915_request_await_dma_fence(rq, &barrier->fence); > + > + return 0; > +} > + > /** > * i915_request_alloc - allocate a request structure > * > @@ -806,6 +818,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > */ > rq->head = rq->ring->emit; > > + ret = add_global_barrier(rq); > + if (ret) > + goto err_unwind; > + > /* Unconditionally invalidate GPU caches and TLBs. */ > ret = engine->emit_flush(rq, EMIT_INVALIDATE); > if (ret) >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c12d2676435..643089ba01b9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2098,6 +2098,16 @@ struct drm_i915_private { u32 active_requests; u32 request_serial; + /** + * Global barrier for the ability to serialize ordering between + * different timelines. + * + * Users can call i915_gem_set_global_barrier which will make + * all subsequent submission be execute only after this barrier + * has been completed. + */ + struct i915_gem_active global_barrier; + /** * Is the GPU currently considered idle, or busy executing * userspace requests? Whilst idle, we allow runtime power @@ -3230,6 +3240,23 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) return container_of(vm, struct i915_hw_ppgtt, vm); } +/** + * i915_gem_set_global_barrier - orders submission on different timelines + * @i915: i915 device private + * @rq: request after which new submissions can proceed + * + * Sets the passed in request as the serialization point for all subsequent + * submissions, regardless of the engine/timeline. Subsequent requests will not + * be submitted to GPU until the global barrier has been completed. + */ +static inline void +i915_gem_set_global_barrier(struct drm_i915_private *i915, + struct i915_request *rq) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + i915_gem_active_set(&i915->gt.global_barrier, rq); +} + /* i915_gem_fence_reg.c */ struct drm_i915_fence_reg * i915_reserve_fence(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0453eb42a1a3..be462ef65786 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5752,6 +5752,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; + init_request_active(&dev_priv->gt.global_barrier, NULL); + INIT_LIST_HEAD(&dev_priv->gt.timelines); INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 09ed48833b54..8b45f74dc748 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -644,6 +644,18 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } +static int add_global_barrier(struct i915_request *rq) +{ + struct i915_request *barrier; + + barrier = i915_gem_active_raw(&rq->i915->gt.global_barrier, + &rq->i915->drm.struct_mutex); + if (barrier) + return i915_request_await_dma_fence(rq, &barrier->fence); + + return 0; +} + /** * i915_request_alloc - allocate a request structure * @@ -806,6 +818,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; + ret = add_global_barrier(rq); + if (ret) + goto err_unwind; + /* Unconditionally invalidate GPU caches and TLBs. */ ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret)