Message ID | 1461172195-3959-2-git-send-email-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Op 20-04-16 om 19:09 schreef John.C.Harrison@Intel.com: > From: John Harrison <John.C.Harrison@Intel.com> > > There is a construct in the linux kernel called 'struct fence' that is > intended to keep track of work that is executed on hardware. I.e. it > solves the basic problem that the drivers 'struct > drm_i915_gem_request' is trying to address. The request structure does > quite a lot more than simply track the execution progress so is very > definitely still required. However, the basic completion status side > could be updated to use the ready made fence implementation and gain > all the advantages that provides. > > This patch makes the first step of integrating a struct fence into the > request. It replaces the explicit reference count with that of the > fence. It also replaces the 'is completed' test with the fence's > equivalent. Currently, that simply chains on to the original request > implementation. A future patch will improve this. > > v3: Updated after review comments by Tvrtko Ursulin. Added fence > context/seqno pair to the debugfs request info. Renamed fence 'driver > name' to just 'i915'. Removed BUG_ONs. > > v5: Changed seqno format in debugfs to %x rather than %u as that is > apparently the preferred appearance. Line wrapped some long lines to > keep the style checker happy. > > v6: Updated to newer nigthly and resolved conflicts. The biggest issue > was with the re-worked busy spin precursor to waiting on a request. In > particular, the addition of a 'request_started' helper function. This > has no corresponding concept within the fence framework. However, it > is only ever used in one place and the whole point of that place is to > always directly read the seqno for absolutely lowest latency possible. > So the simple solution is to just make the seqno test explicit at that > point now rather than later in the series (it was previously being > done anyway when fences become interrupt driven). > > v7: Rebased to newer nightly - lots of ring -> engine renaming and > interface change to get_seqno(). > > For: VIZ-5190 > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 5 ++- > drivers/gpu/drm/i915/i915_drv.h | 51 ++++++++++------------- > drivers/gpu/drm/i915/i915_gem.c | 72 +++++++++++++++++++++++++++++---- > drivers/gpu/drm/i915/intel_lrc.c | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ > 6 files changed, 94 insertions(+), 39 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 2d11b49..6917515 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -707,11 +707,12 @@ static int i915_gem_request_info(struct seq_file *m, void *data) > task = NULL; > if (req->pid) > task = pid_task(req->pid, PIDTYPE_PID); > - seq_printf(m, " %x @ %d: %s [%d]\n", > + seq_printf(m, " %x @ %d: %s [%d], fence = %x:%x\n", > req->seqno, > (int) (jiffies - req->emitted_jiffies), > task ? task->comm : "<unknown>", > - task ? task->pid : -1); > + task ? task->pid : -1, > + req->fence.context, req->fence.seqno); > rcu_read_unlock(); > } > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d1e6e58..e5f49f3 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -54,6 +54,7 @@ > #include <linux/pm_qos.h> > #include "intel_guc.h" > #include "intel_dpll_mgr.h" > +#include <linux/fence.h> > > /* General customization: > */ > @@ -2242,7 +2243,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, > * initial reference taken using kref_init > */ > struct drm_i915_gem_request { > - struct kref ref; > + /** > + * Underlying object for implementing the signal/wait stuff. > + * NB: Never call fence_later() or return this fence object to user > + * land! Due to lazy allocation, scheduler re-ordering, pre-emption, > + * etc., there is no guarantee at all about the validity or > + * sequentiality of the fence's seqno! It is also unsafe to let > + * anything outside of the i915 driver get hold of the fence object > + * as the clean up when decrementing the reference count requires > + * holding the driver mutex lock. > + */ > + struct fence fence; > > /** On Which ring this request was generated */ > struct drm_i915_private *i915; > @@ -2328,7 +2339,13 @@ struct drm_i915_gem_request * __must_check > i915_gem_request_alloc(struct intel_engine_cs *engine, > struct intel_context *ctx); > void i915_gem_request_cancel(struct drm_i915_gem_request *req); > -void i915_gem_request_free(struct kref *req_ref); > + > +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, > + bool lazy_coherency) > +{ > + return fence_is_signaled(&req->fence); > +} > + > int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, > struct drm_file *file); > > @@ -2348,7 +2365,7 @@ static inline struct drm_i915_gem_request * > i915_gem_request_reference(struct drm_i915_gem_request *req) > { > if (req) > - kref_get(&req->ref); > + fence_get(&req->fence); > return req; > } > > @@ -2356,7 +2373,7 @@ static inline void > i915_gem_request_unreference(struct drm_i915_gem_request *req) > { > WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); > - kref_put(&req->ref, i915_gem_request_free); > + fence_put(&req->fence); > } > > static inline void > @@ -2368,7 +2385,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) > return; > > dev = req->engine->dev; > - if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex)) > + if (kref_put_mutex(&req->fence.refcount, fence_release, &dev->struct_mutex)) > mutex_unlock(&dev->struct_mutex); > } > > @@ -2385,12 +2402,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, > } > > /* > - * XXX: i915_gem_request_completed should be here but currently needs the > - * definition of i915_seqno_passed() which is below. It will be moved in > - * a later patch when the call to i915_seqno_passed() is obsoleted... > - */ > - > -/* > * A command that requires special handling by the command parser. > */ > struct drm_i915_cmd_descriptor { > @@ -3055,24 +3066,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) > return (int32_t)(seq1 - seq2) >= 0; > } > > -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, > - bool lazy_coherency) > -{ > - if (!lazy_coherency && req->engine->irq_seqno_barrier) > - req->engine->irq_seqno_barrier(req->engine); > - return i915_seqno_passed(req->engine->get_seqno(req->engine), > - req->previous_seqno); > -} > - > -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, > - bool lazy_coherency) > -{ > - if (!lazy_coherency && req->engine->irq_seqno_barrier) > - req->engine->irq_seqno_barrier(req->engine); > - return i915_seqno_passed(req->engine->get_seqno(req->engine), > - req->seqno); > -} > - > int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); > int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index ebef03b..1add29a9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1183,6 +1183,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) > { > unsigned long timeout; > unsigned cpu; > + uint32_t seqno; > > /* When waiting for high frequency requests, e.g. during synchronous > * rendering split between the CPU and GPU, the finite amount of time > @@ -1198,12 +1199,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) > return -EBUSY; > > /* Only spin if we know the GPU is processing this request */ > - if (!i915_gem_request_started(req, true)) > + seqno = req->engine->get_seqno(req->engine); > + if (!i915_seqno_passed(seqno, req->previous_seqno)) > return -EAGAIN; > > timeout = local_clock_us(&cpu) + 5; > while (!need_resched()) { > - if (i915_gem_request_completed(req, true)) > + seqno = req->engine->get_seqno(req->engine); > + if (i915_seqno_passed(seqno, req->seqno)) > return 0; > > if (signal_pending_state(state, current)) > @@ -1215,7 +1218,10 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) > cpu_relax_lowlatency(); > } > > - if (i915_gem_request_completed(req, false)) > + if (req->engine->irq_seqno_barrier) > + req->engine->irq_seqno_barrier(req->engine); > + seqno = req->engine->get_seqno(req->engine); > + if (i915_seqno_passed(seqno, req->seqno)) > return 0; > > return -EAGAIN; > @@ -2721,12 +2727,14 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, > } > } > > -void i915_gem_request_free(struct kref *req_ref) > +static void i915_gem_request_free(struct fence *req_fence) > { > - struct drm_i915_gem_request *req = container_of(req_ref, > - typeof(*req), ref); > + struct drm_i915_gem_request *req = container_of(req_fence, > + typeof(*req), fence); > struct intel_context *ctx = req->ctx; > > + WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); > + > if (req->file_priv) > i915_gem_request_remove_from_client(req); > > Is kmem_cache_free rcu-safe? I don't think it is, and that would cause some hard to debug issues. Adding SLAB_DESTROY_BY_RCU to flags wouldn't do what you would expect here, so your best bet would be to do a call_rcu(&fence->rcu, wrapper_for_kmem_cache_free); ~Maarten
Op 20-04-16 om 19:09 schreef John.C.Harrison@Intel.com: > <snip> > +/* if (!lazy_coherency && req->engine->irq_seqno_barrier) > + req->engine->irq_seqno_barrier(req->engine);*/ > Why keep this hunk in i915_gem_request_is_completed?
On 21/04/2016 08:06, Maarten Lankhorst wrote: > Op 20-04-16 om 19:09 schreef John.C.Harrison@Intel.com: >> From: John Harrison <John.C.Harrison@Intel.com> >> >> There is a construct in the linux kernel called 'struct fence' that is >> intended to keep track of work that is executed on hardware. I.e. it >> solves the basic problem that the drivers 'struct >> drm_i915_gem_request' is trying to address. The request structure does >> quite a lot more than simply track the execution progress so is very >> definitely still required. However, the basic completion status side >> could be updated to use the ready made fence implementation and gain >> all the advantages that provides. >> >> This patch makes the first step of integrating a struct fence into the >> request. It replaces the explicit reference count with that of the >> fence. It also replaces the 'is completed' test with the fence's >> equivalent. Currently, that simply chains on to the original request >> implementation. A future patch will improve this. >> >> v3: Updated after review comments by Tvrtko Ursulin. Added fence >> context/seqno pair to the debugfs request info. Renamed fence 'driver >> name' to just 'i915'. Removed BUG_ONs. >> >> v5: Changed seqno format in debugfs to %x rather than %u as that is >> apparently the preferred appearance. Line wrapped some long lines to >> keep the style checker happy. >> >> v6: Updated to newer nigthly and resolved conflicts. The biggest issue >> was with the re-worked busy spin precursor to waiting on a request. In >> particular, the addition of a 'request_started' helper function. This >> has no corresponding concept within the fence framework. However, it >> is only ever used in one place and the whole point of that place is to >> always directly read the seqno for absolutely lowest latency possible. >> So the simple solution is to just make the seqno test explicit at that >> point now rather than later in the series (it was previously being >> done anyway when fences become interrupt driven). >> >> v7: Rebased to newer nightly - lots of ring -> engine renaming and >> interface change to get_seqno(). >> >> For: VIZ-5190 >> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> >> --- >> drivers/gpu/drm/i915/i915_debugfs.c | 5 ++- >> drivers/gpu/drm/i915/i915_drv.h | 51 ++++++++++------------- >> drivers/gpu/drm/i915/i915_gem.c | 72 +++++++++++++++++++++++++++++---- >> drivers/gpu/drm/i915/intel_lrc.c | 1 + >> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + >> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ >> 6 files changed, 94 insertions(+), 39 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c >> index 2d11b49..6917515 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -707,11 +707,12 @@ static int i915_gem_request_info(struct seq_file *m, void *data) >> task = NULL; >> if (req->pid) >> task = pid_task(req->pid, PIDTYPE_PID); >> - seq_printf(m, " %x @ %d: %s [%d]\n", >> + seq_printf(m, " %x @ %d: %s [%d], fence = %x:%x\n", >> req->seqno, >> (int) (jiffies - req->emitted_jiffies), >> task ? task->comm : "<unknown>", >> - task ? task->pid : -1); >> + task ? task->pid : -1, >> + req->fence.context, req->fence.seqno); >> rcu_read_unlock(); >> } >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index d1e6e58..e5f49f3 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -54,6 +54,7 @@ >> #include <linux/pm_qos.h> >> #include "intel_guc.h" >> #include "intel_dpll_mgr.h" >> +#include <linux/fence.h> >> >> /* General customization: >> */ >> @@ -2242,7 +2243,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, >> * initial reference taken using kref_init >> */ >> struct drm_i915_gem_request { >> - struct kref ref; >> + /** >> + * Underlying object for implementing the signal/wait stuff. >> + * NB: Never call fence_later() or return this fence object to user >> + * land! Due to lazy allocation, scheduler re-ordering, pre-emption, >> + * etc., there is no guarantee at all about the validity or >> + * sequentiality of the fence's seqno! It is also unsafe to let >> + * anything outside of the i915 driver get hold of the fence object >> + * as the clean up when decrementing the reference count requires >> + * holding the driver mutex lock. >> + */ >> + struct fence fence; >> >> /** On Which ring this request was generated */ >> struct drm_i915_private *i915; >> @@ -2328,7 +2339,13 @@ struct drm_i915_gem_request * __must_check >> i915_gem_request_alloc(struct intel_engine_cs *engine, >> struct intel_context *ctx); >> void i915_gem_request_cancel(struct drm_i915_gem_request *req); >> -void i915_gem_request_free(struct kref *req_ref); >> + >> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >> + bool lazy_coherency) >> +{ >> + return fence_is_signaled(&req->fence); >> +} >> + >> int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, >> struct drm_file *file); >> >> @@ -2348,7 +2365,7 @@ static inline struct drm_i915_gem_request * >> i915_gem_request_reference(struct drm_i915_gem_request *req) >> { >> if (req) >> - kref_get(&req->ref); >> + fence_get(&req->fence); >> return req; >> } >> >> @@ -2356,7 +2373,7 @@ static inline void >> i915_gem_request_unreference(struct drm_i915_gem_request *req) >> { >> WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); >> - kref_put(&req->ref, i915_gem_request_free); >> + fence_put(&req->fence); >> } >> >> static inline void >> @@ -2368,7 +2385,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) >> return; >> >> dev = req->engine->dev; >> - if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex)) >> + if (kref_put_mutex(&req->fence.refcount, fence_release, &dev->struct_mutex)) >> mutex_unlock(&dev->struct_mutex); >> } >> >> @@ -2385,12 +2402,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, >> } >> >> /* >> - * XXX: i915_gem_request_completed should be here but currently needs the >> - * definition of i915_seqno_passed() which is below. It will be moved in >> - * a later patch when the call to i915_seqno_passed() is obsoleted... >> - */ >> - >> -/* >> * A command that requires special handling by the command parser. >> */ >> struct drm_i915_cmd_descriptor { >> @@ -3055,24 +3066,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) >> return (int32_t)(seq1 - seq2) >= 0; >> } >> >> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, >> - bool lazy_coherency) >> -{ >> - if (!lazy_coherency && req->engine->irq_seqno_barrier) >> - req->engine->irq_seqno_barrier(req->engine); >> - return i915_seqno_passed(req->engine->get_seqno(req->engine), >> - req->previous_seqno); >> -} >> - >> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >> - bool lazy_coherency) >> -{ >> - if (!lazy_coherency && req->engine->irq_seqno_barrier) >> - req->engine->irq_seqno_barrier(req->engine); >> - return i915_seqno_passed(req->engine->get_seqno(req->engine), >> - req->seqno); >> -} >> - >> int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); >> int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); >> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index ebef03b..1add29a9 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -1183,6 +1183,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) >> { >> unsigned long timeout; >> unsigned cpu; >> + uint32_t seqno; >> >> /* When waiting for high frequency requests, e.g. during synchronous >> * rendering split between the CPU and GPU, the finite amount of time >> @@ -1198,12 +1199,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) >> return -EBUSY; >> >> /* Only spin if we know the GPU is processing this request */ >> - if (!i915_gem_request_started(req, true)) >> + seqno = req->engine->get_seqno(req->engine); >> + if (!i915_seqno_passed(seqno, req->previous_seqno)) >> return -EAGAIN; >> >> timeout = local_clock_us(&cpu) + 5; >> while (!need_resched()) { >> - if (i915_gem_request_completed(req, true)) >> + seqno = req->engine->get_seqno(req->engine); >> + if (i915_seqno_passed(seqno, req->seqno)) >> return 0; >> >> if (signal_pending_state(state, current)) >> @@ -1215,7 +1218,10 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) >> cpu_relax_lowlatency(); >> } >> >> - if (i915_gem_request_completed(req, false)) >> + if (req->engine->irq_seqno_barrier) >> + req->engine->irq_seqno_barrier(req->engine); >> + seqno = req->engine->get_seqno(req->engine); >> + if (i915_seqno_passed(seqno, req->seqno)) >> return 0; >> >> return -EAGAIN; >> @@ -2721,12 +2727,14 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, >> } >> } >> >> -void i915_gem_request_free(struct kref *req_ref) >> +static void i915_gem_request_free(struct fence *req_fence) >> { >> - struct drm_i915_gem_request *req = container_of(req_ref, >> - typeof(*req), ref); >> + struct drm_i915_gem_request *req = container_of(req_fence, >> + typeof(*req), fence); >> struct intel_context *ctx = req->ctx; >> >> + WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); >> + >> if (req->file_priv) >> i915_gem_request_remove_from_client(req); >> >> > Is kmem_cache_free rcu-safe? > > I don't think it is, and that would cause some hard to debug issues. > > Adding SLAB_DESTROY_BY_RCU to flags wouldn't do what you would expect here, > so your best bet would be to do a call_rcu(&fence->rcu, wrapper_for_kmem_cache_free); > > ~Maarten I don't understand what you mean? Are you referring to the kmem_cache_free that frees the request object at the end of the above function (which you have actually deleted from your reply)? Or are you referring to something inside the i915_gem_request_remove_from_client() call that your comments seem to be in reply to? If you mean the free of the request itself, then the only usage of that particular kmem_cache are within the driver mutex lock. Does that not make it safe? If you mean the client remove, then where is the kmem_cache_free in that call path?
Op 21-04-16 om 12:26 schreef John Harrison: > On 21/04/2016 08:06, Maarten Lankhorst wrote: >> Op 20-04-16 om 19:09 schreef John.C.Harrison@Intel.com: >>> From: John Harrison <John.C.Harrison@Intel.com> >>> >>> There is a construct in the linux kernel called 'struct fence' that is >>> intended to keep track of work that is executed on hardware. I.e. it >>> solves the basic problem that the drivers 'struct >>> drm_i915_gem_request' is trying to address. The request structure does >>> quite a lot more than simply track the execution progress so is very >>> definitely still required. However, the basic completion status side >>> could be updated to use the ready made fence implementation and gain >>> all the advantages that provides. >>> >>> This patch makes the first step of integrating a struct fence into the >>> request. It replaces the explicit reference count with that of the >>> fence. It also replaces the 'is completed' test with the fence's >>> equivalent. Currently, that simply chains on to the original request >>> implementation. A future patch will improve this. >>> >>> v3: Updated after review comments by Tvrtko Ursulin. Added fence >>> context/seqno pair to the debugfs request info. Renamed fence 'driver >>> name' to just 'i915'. Removed BUG_ONs. >>> >>> v5: Changed seqno format in debugfs to %x rather than %u as that is >>> apparently the preferred appearance. Line wrapped some long lines to >>> keep the style checker happy. >>> >>> v6: Updated to newer nigthly and resolved conflicts. The biggest issue >>> was with the re-worked busy spin precursor to waiting on a request. In >>> particular, the addition of a 'request_started' helper function. This >>> has no corresponding concept within the fence framework. However, it >>> is only ever used in one place and the whole point of that place is to >>> always directly read the seqno for absolutely lowest latency possible. >>> So the simple solution is to just make the seqno test explicit at that >>> point now rather than later in the series (it was previously being >>> done anyway when fences become interrupt driven). >>> >>> v7: Rebased to newer nightly - lots of ring -> engine renaming and >>> interface change to get_seqno(). >>> >>> For: VIZ-5190 >>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> >>> --- >>> drivers/gpu/drm/i915/i915_debugfs.c | 5 ++- >>> drivers/gpu/drm/i915/i915_drv.h | 51 ++++++++++------------- >>> drivers/gpu/drm/i915/i915_gem.c | 72 +++++++++++++++++++++++++++++---- >>> drivers/gpu/drm/i915/intel_lrc.c | 1 + >>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + >>> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ >>> 6 files changed, 94 insertions(+), 39 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c >>> index 2d11b49..6917515 100644 >>> --- a/drivers/gpu/drm/i915/i915_debugfs.c >>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >>> @@ -707,11 +707,12 @@ static int i915_gem_request_info(struct seq_file *m, void *data) >>> task = NULL; >>> if (req->pid) >>> task = pid_task(req->pid, PIDTYPE_PID); >>> - seq_printf(m, " %x @ %d: %s [%d]\n", >>> + seq_printf(m, " %x @ %d: %s [%d], fence = %x:%x\n", >>> req->seqno, >>> (int) (jiffies - req->emitted_jiffies), >>> task ? task->comm : "<unknown>", >>> - task ? task->pid : -1); >>> + task ? task->pid : -1, >>> + req->fence.context, req->fence.seqno); >>> rcu_read_unlock(); >>> } >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >>> index d1e6e58..e5f49f3 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -54,6 +54,7 @@ >>> #include <linux/pm_qos.h> >>> #include "intel_guc.h" >>> #include "intel_dpll_mgr.h" >>> +#include <linux/fence.h> >>> /* General customization: >>> */ >>> @@ -2242,7 +2243,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, >>> * initial reference taken using kref_init >>> */ >>> struct drm_i915_gem_request { >>> - struct kref ref; >>> + /** >>> + * Underlying object for implementing the signal/wait stuff. >>> + * NB: Never call fence_later() or return this fence object to user >>> + * land! Due to lazy allocation, scheduler re-ordering, pre-emption, >>> + * etc., there is no guarantee at all about the validity or >>> + * sequentiality of the fence's seqno! It is also unsafe to let >>> + * anything outside of the i915 driver get hold of the fence object >>> + * as the clean up when decrementing the reference count requires >>> + * holding the driver mutex lock. >>> + */ >>> + struct fence fence; >>> /** On Which ring this request was generated */ >>> struct drm_i915_private *i915; >>> @@ -2328,7 +2339,13 @@ struct drm_i915_gem_request * __must_check >>> i915_gem_request_alloc(struct intel_engine_cs *engine, >>> struct intel_context *ctx); >>> void i915_gem_request_cancel(struct drm_i915_gem_request *req); >>> -void i915_gem_request_free(struct kref *req_ref); >>> + >>> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >>> + bool lazy_coherency) >>> +{ >>> + return fence_is_signaled(&req->fence); >>> +} >>> + >>> int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, >>> struct drm_file *file); >>> @@ -2348,7 +2365,7 @@ static inline struct drm_i915_gem_request * >>> i915_gem_request_reference(struct drm_i915_gem_request *req) >>> { >>> if (req) >>> - kref_get(&req->ref); >>> + fence_get(&req->fence); >>> return req; >>> } >>> @@ -2356,7 +2373,7 @@ static inline void >>> i915_gem_request_unreference(struct drm_i915_gem_request *req) >>> { >>> WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); >>> - kref_put(&req->ref, i915_gem_request_free); >>> + fence_put(&req->fence); >>> } >>> static inline void >>> @@ -2368,7 +2385,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) >>> return; >>> dev = req->engine->dev; >>> - if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex)) >>> + if (kref_put_mutex(&req->fence.refcount, fence_release, &dev->struct_mutex)) >>> mutex_unlock(&dev->struct_mutex); >>> } >>> @@ -2385,12 +2402,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, >>> } >>> /* >>> - * XXX: i915_gem_request_completed should be here but currently needs the >>> - * definition of i915_seqno_passed() which is below. It will be moved in >>> - * a later patch when the call to i915_seqno_passed() is obsoleted... >>> - */ >>> - >>> -/* >>> * A command that requires special handling by the command parser. >>> */ >>> struct drm_i915_cmd_descriptor { >>> @@ -3055,24 +3066,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) >>> return (int32_t)(seq1 - seq2) >= 0; >>> } >>> -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, >>> - bool lazy_coherency) >>> -{ >>> - if (!lazy_coherency && req->engine->irq_seqno_barrier) >>> - req->engine->irq_seqno_barrier(req->engine); >>> - return i915_seqno_passed(req->engine->get_seqno(req->engine), >>> - req->previous_seqno); >>> -} >>> - >>> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >>> - bool lazy_coherency) >>> -{ >>> - if (!lazy_coherency && req->engine->irq_seqno_barrier) >>> - req->engine->irq_seqno_barrier(req->engine); >>> - return i915_seqno_passed(req->engine->get_seqno(req->engine), >>> - req->seqno); >>> -} >>> - >>> int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); >>> int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >>> index ebef03b..1add29a9 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem.c >>> +++ b/drivers/gpu/drm/i915/i915_gem.c >>> @@ -1183,6 +1183,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) >>> { >>> unsigned long timeout; >>> unsigned cpu; >>> + uint32_t seqno; >>> /* When waiting for high frequency requests, e.g. during synchronous >>> * rendering split between the CPU and GPU, the finite amount of time >>> @@ -1198,12 +1199,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) >>> return -EBUSY; >>> /* Only spin if we know the GPU is processing this request */ >>> - if (!i915_gem_request_started(req, true)) >>> + seqno = req->engine->get_seqno(req->engine); >>> + if (!i915_seqno_passed(seqno, req->previous_seqno)) >>> return -EAGAIN; >>> timeout = local_clock_us(&cpu) + 5; >>> while (!need_resched()) { >>> - if (i915_gem_request_completed(req, true)) >>> + seqno = req->engine->get_seqno(req->engine); >>> + if (i915_seqno_passed(seqno, req->seqno)) >>> return 0; >>> if (signal_pending_state(state, current)) >>> @@ -1215,7 +1218,10 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) >>> cpu_relax_lowlatency(); >>> } >>> - if (i915_gem_request_completed(req, false)) >>> + if (req->engine->irq_seqno_barrier) >>> + req->engine->irq_seqno_barrier(req->engine); >>> + seqno = req->engine->get_seqno(req->engine); >>> + if (i915_seqno_passed(seqno, req->seqno)) >>> return 0; >>> return -EAGAIN; >>> @@ -2721,12 +2727,14 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, >>> } >>> } >>> -void i915_gem_request_free(struct kref *req_ref) >>> +static void i915_gem_request_free(struct fence *req_fence) >>> { >>> - struct drm_i915_gem_request *req = container_of(req_ref, >>> - typeof(*req), ref); >>> + struct drm_i915_gem_request *req = container_of(req_fence, >>> + typeof(*req), fence); >>> struct intel_context *ctx = req->ctx; >>> + WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); >>> + >>> if (req->file_priv) >>> i915_gem_request_remove_from_client(req); >>> >> Is kmem_cache_free rcu-safe? >> >> I don't think it is, and that would cause some hard to debug issues. >> >> Adding SLAB_DESTROY_BY_RCU to flags wouldn't do what you would expect here, >> so your best bet would be to do a call_rcu(&fence->rcu, wrapper_for_kmem_cache_free); >> >> ~Maarten > I don't understand what you mean? Are you referring to the kmem_cache_free that frees the request object at the end of the above function (which you have actually deleted from your reply)? Or are you referring to something inside the i915_gem_request_remove_from_client() call that your comments seem to be in reply to? > > If you mean the free of the request itself, then the only usage of that particular kmem_cache are within the driver mutex lock. Does that not make it safe? If you mean the client remove, then where is the kmem_cache_free in that call path? Just because a function is locked doesn't make it RCU safe. The fence api has function called fence_get_rcu and it requires that the memory backing the fence should be freed with kfree_rcu, call_rcu, or by calling synchronize_rcu before freeing. In particular kmem_cache_free wouldn't work as intended, which results in another fence possibly re-using the memory. Needs a __rcu annotated version of https://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=tasklet&id=59f63caac74bbea817225e134e51ca97ecd06568 ~Maarten
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2d11b49..6917515 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -707,11 +707,12 @@ static int i915_gem_request_info(struct seq_file *m, void *data) task = NULL; if (req->pid) task = pid_task(req->pid, PIDTYPE_PID); - seq_printf(m, " %x @ %d: %s [%d]\n", + seq_printf(m, " %x @ %d: %s [%d], fence = %x:%x\n", req->seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "<unknown>", - task ? task->pid : -1); + task ? task->pid : -1, + req->fence.context, req->fence.seqno); rcu_read_unlock(); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d1e6e58..e5f49f3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -54,6 +54,7 @@ #include <linux/pm_qos.h> #include "intel_guc.h" #include "intel_dpll_mgr.h" +#include <linux/fence.h> /* General customization: */ @@ -2242,7 +2243,17 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, * initial reference taken using kref_init */ struct drm_i915_gem_request { - struct kref ref; + /** + * Underlying object for implementing the signal/wait stuff. + * NB: Never call fence_later() or return this fence object to user + * land! Due to lazy allocation, scheduler re-ordering, pre-emption, + * etc., there is no guarantee at all about the validity or + * sequentiality of the fence's seqno! It is also unsafe to let + * anything outside of the i915 driver get hold of the fence object + * as the clean up when decrementing the reference count requires + * holding the driver mutex lock. + */ + struct fence fence; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2328,7 +2339,13 @@ struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct intel_context *ctx); void i915_gem_request_cancel(struct drm_i915_gem_request *req); -void i915_gem_request_free(struct kref *req_ref); + +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, + bool lazy_coherency) +{ + return fence_is_signaled(&req->fence); +} + int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file); @@ -2348,7 +2365,7 @@ static inline struct drm_i915_gem_request * i915_gem_request_reference(struct drm_i915_gem_request *req) { if (req) - kref_get(&req->ref); + fence_get(&req->fence); return req; } @@ -2356,7 +2373,7 @@ static inline void i915_gem_request_unreference(struct drm_i915_gem_request *req) { WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); - kref_put(&req->ref, i915_gem_request_free); + fence_put(&req->fence); } static inline void @@ -2368,7 +2385,7 @@ i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req) return; dev = req->engine->dev; - if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex)) + if (kref_put_mutex(&req->fence.refcount, fence_release, &dev->struct_mutex)) mutex_unlock(&dev->struct_mutex); } @@ -2385,12 +2402,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, } /* - * XXX: i915_gem_request_completed should be here but currently needs the - * definition of i915_seqno_passed() which is below. It will be moved in - * a later patch when the call to i915_seqno_passed() is obsoleted... - */ - -/* * A command that requires special handling by the command parser. */ struct drm_i915_cmd_descriptor { @@ -3055,24 +3066,6 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, - bool lazy_coherency) -{ - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); - return i915_seqno_passed(req->engine->get_seqno(req->engine), - req->previous_seqno); -} - -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) -{ - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); - return i915_seqno_passed(req->engine->get_seqno(req->engine), - req->seqno); -} - int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ebef03b..1add29a9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1183,6 +1183,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) { unsigned long timeout; unsigned cpu; + uint32_t seqno; /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time @@ -1198,12 +1199,14 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) return -EBUSY; /* Only spin if we know the GPU is processing this request */ - if (!i915_gem_request_started(req, true)) + seqno = req->engine->get_seqno(req->engine); + if (!i915_seqno_passed(seqno, req->previous_seqno)) return -EAGAIN; timeout = local_clock_us(&cpu) + 5; while (!need_resched()) { - if (i915_gem_request_completed(req, true)) + seqno = req->engine->get_seqno(req->engine); + if (i915_seqno_passed(seqno, req->seqno)) return 0; if (signal_pending_state(state, current)) @@ -1215,7 +1218,10 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) cpu_relax_lowlatency(); } - if (i915_gem_request_completed(req, false)) + if (req->engine->irq_seqno_barrier) + req->engine->irq_seqno_barrier(req->engine); + seqno = req->engine->get_seqno(req->engine); + if (i915_seqno_passed(seqno, req->seqno)) return 0; return -EAGAIN; @@ -2721,12 +2727,14 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, } } -void i915_gem_request_free(struct kref *req_ref) +static void i915_gem_request_free(struct fence *req_fence) { - struct drm_i915_gem_request *req = container_of(req_ref, - typeof(*req), ref); + struct drm_i915_gem_request *req = container_of(req_fence, + typeof(*req), fence); struct intel_context *ctx = req->ctx; + WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex)); + if (req->file_priv) i915_gem_request_remove_from_client(req); @@ -2740,6 +2748,48 @@ void i915_gem_request_free(struct kref *req_ref) kmem_cache_free(req->i915->requests, req); } +static bool i915_gem_request_enable_signaling(struct fence *req_fence) +{ + /* Interrupt driven fences are not implemented yet.*/ + WARN(true, "This should not be called!"); + return true; +} + +static bool i915_gem_request_is_completed(struct fence *req_fence) +{ + struct drm_i915_gem_request *req = container_of(req_fence, + typeof(*req), fence); + u32 seqno; + +/* if (!lazy_coherency && req->engine->irq_seqno_barrier) + req->engine->irq_seqno_barrier(req->engine);*/ + + seqno = req->engine->get_seqno(req->engine); + + return i915_seqno_passed(seqno, req->seqno); +} + +static const char *i915_gem_request_get_driver_name(struct fence *req_fence) +{ + return "i915"; +} + +static const char *i915_gem_request_get_timeline_name(struct fence *req_fence) +{ + struct drm_i915_gem_request *req = container_of(req_fence, + typeof(*req), fence); + return req->engine->name; +} + +static const struct fence_ops i915_gem_request_fops = { + .enable_signaling = i915_gem_request_enable_signaling, + .signaled = i915_gem_request_is_completed, + .wait = fence_default_wait, + .release = i915_gem_request_free, + .get_driver_name = i915_gem_request_get_driver_name, + .get_timeline_name = i915_gem_request_get_timeline_name, +}; + static inline int __i915_gem_request_alloc(struct intel_engine_cs *engine, struct intel_context *ctx, @@ -2762,7 +2812,6 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err; - kref_init(&req->ref); req->i915 = dev_priv; req->engine = engine; req->ctx = ctx; @@ -2777,6 +2826,9 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, goto err; } + fence_init(&req->fence, &i915_gem_request_fops, &engine->fence_lock, + engine->fence_context, req->seqno); + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the @@ -4887,7 +4939,7 @@ i915_gem_init_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; - int ret, j; + int ret, j, fence_base; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4957,10 +5009,14 @@ i915_gem_init_hw(struct drm_device *dev) if (ret) goto out; + fence_base = fence_context_alloc(I915_NUM_ENGINES); + /* Now it is safe to go back round and do everything else: */ for_each_engine(engine, dev_priv) { struct drm_i915_gem_request *req; + engine->fence_context = fence_base + engine->id; + req = i915_gem_request_alloc(engine, NULL); if (IS_ERR(req)) { ret = PTR_ERR(req); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5e08ea5..268e024 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2133,6 +2133,7 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine) engine->dev = dev; INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); + spin_lock_init(&engine->fence_lock); i915_gem_batch_pool_init(dev, &engine->batch_pool); init_waitqueue_head(&engine->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 19ebe77..a52b81c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2229,6 +2229,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->execlist_queue); INIT_LIST_HEAD(&engine->buffers); + spin_lock_init(&engine->fence_lock); i915_gem_batch_pool_init(dev, &engine->batch_pool); memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2ade194..2cd9d2c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -348,6 +348,9 @@ struct intel_engine_cs { * to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); + + unsigned fence_context; + spinlock_t fence_lock; }; static inline bool