Message ID | 1464971847-15809-14-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jun 03, 2016 at 05:36:38PM +0100, Chris Wilson wrote: > dma-buf provides a generic fence class for interoperation between > drivers. Internally we use the request structure as a fence, and so with > only a little bit of interfacing we can rebase those requests on top of > dma-buf fences. This will allow us, in the future, to pass those fences > back to userspace or between drivers. > > v2: The fence_context needs to be globally unique, not just unique to > this device. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Jesse Barnes <jbarnes@virtuousgeek.org> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 2 +- > drivers/gpu/drm/i915/i915_gem_request.c | 116 ++++++++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_request.h | 33 ++++---- > drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- > drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- > drivers/gpu/drm/i915/i915_trace.h | 10 +-- > drivers/gpu/drm/i915/intel_breadcrumbs.c | 7 +- > drivers/gpu/drm/i915/intel_lrc.c | 3 +- > drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +-- > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + > 10 files changed, 143 insertions(+), 46 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 8f576b443ff6..8e37315443f3 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data) > if (req->pid) > task = pid_task(req->pid, PIDTYPE_PID); > seq_printf(m, " %x @ %d: %s [%d]\n", > - req->seqno, > + req->fence.seqno, > (int) (jiffies - req->emitted_jiffies), > task ? task->comm : "<unknown>", > task ? task->pid : -1); > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 34b2f151cdfc..512b15153ac6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -24,6 +24,98 @@ > > #include "i915_drv.h" > > +static inline struct drm_i915_gem_request * > +to_i915_request(struct fence *fence) > +{ > + return container_of(fence, struct drm_i915_gem_request, fence); > +} > + > +static const char *i915_fence_get_driver_name(struct fence *fence) > +{ > + return "i915"; > +} > + > +static const char *i915_fence_get_timeline_name(struct fence *fence) > +{ > + /* Timelines are bound by eviction to a VM. However, since > + * we only have a global seqno at the moment, we only have > + * a single timeline. Note that each timeline will have > + * multiple execution contexts (fence contexts) as we allow > + * engines within a single timeline to execute in parallel. > + */ > + return "global"; > +} > + > +static bool i915_fence_signaled(struct fence *fence) > +{ > + return i915_gem_request_completed(to_i915_request(fence)); > +} > + > +static bool i915_fence_enable_signaling(struct fence *fence) > +{ > + if (i915_fence_signaled(fence)) > + return false; > + > + return intel_engine_enable_signaling(to_i915_request(fence)) == 0; > +} > + > +static signed long i915_fence_wait(struct fence *fence, > + bool interruptible, > + signed long timeout_jiffies) > +{ > + s64 timeout_ns, *timeout; > + int ret; > + > + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { > + timeout_ns = jiffies_to_nsecs(timeout_jiffies); > + timeout = &timeout_ns; > + } else > + timeout = NULL; > + > + ret = __i915_wait_request(to_i915_request(fence), > + interruptible, timeout, > + NULL); > + if (ret == -ETIME) > + return 0; > + > + if (ret < 0) > + return ret; > + > + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) > + timeout_jiffies = nsecs_to_jiffies(timeout_ns); > + > + return timeout_jiffies; > +} > + > +static void i915_fence_value_str(struct fence *fence, char *str, int size) > +{ > + snprintf(str, size, "%u", fence->seqno); > +} > + > +static void i915_fence_timeline_value_str(struct fence *fence, char *str, > + int size) > +{ > + snprintf(str, size, "%u", > + intel_engine_get_seqno(to_i915_request(fence)->engine)); > +} > + > +static void i915_fence_release(struct fence *fence) > +{ > + struct drm_i915_gem_request *req = to_i915_request(fence); > + kmem_cache_free(req->i915->requests, req); > +} > + > +static const struct fence_ops i915_fence_ops = { > + .get_driver_name = i915_fence_get_driver_name, > + .get_timeline_name = i915_fence_get_timeline_name, > + .enable_signaling = i915_fence_enable_signaling, > + .signaled = i915_fence_signaled, > + .wait = i915_fence_wait, > + .release = i915_fence_release, > + .fence_value_str = i915_fence_value_str, > + .timeline_value_str = i915_fence_timeline_value_str, > +}; > + > static int i915_gem_check_wedge(unsigned reset_counter, bool interruptible) > { > if (__i915_terminally_wedged(reset_counter)) > @@ -117,6 +209,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, > struct drm_i915_private *dev_priv = engine->i915; > unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); > struct drm_i915_gem_request *req; > + u32 seqno; > int ret; > > if (!req_out) > @@ -136,11 +229,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, > if (req == NULL) > return -ENOMEM; > > - ret = i915_gem_get_seqno(dev_priv, &req->seqno); > + ret = i915_gem_get_seqno(dev_priv, &seqno); > if (ret) > goto err; > > - kref_init(&req->ref); > + spin_lock_init(&req->lock); > + fence_init(&req->fence, > + &i915_fence_ops, > + &req->lock, > + engine->fence_context, > + seqno); > + > req->i915 = dev_priv; > req->engine = engine; > req->reset_counter = reset_counter; > @@ -376,7 +475,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, > */ > request->emitted_jiffies = jiffies; > request->previous_seqno = engine->last_submitted_seqno; > - smp_store_mb(engine->last_submitted_seqno, request->seqno); > + smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); > list_add_tail(&request->list, &engine->request_list); > > /* Record the position of the start of the request so that > @@ -543,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, > if (i915_spin_request(req, state, 5)) > goto complete; > > - intel_wait_init(&wait, req->seqno); > + intel_wait_init(&wait, req->fence.seqno); > set_current_state(state); > if (intel_engine_add_wait(req->engine, &wait)) > /* In order to check that we haven't missed the interrupt > @@ -609,7 +708,7 @@ complete: > *timeout = 0; > } > > - if (rps && req->seqno == req->engine->last_submitted_seqno) { > + if (rps && req->fence.seqno == req->engine->last_submitted_seqno) { > /* The GPU is now idle and this client has stalled. > * Since no other client has submitted a request in the > * meantime, assume that this client is the only one > @@ -650,10 +749,3 @@ int i915_wait_request(struct drm_i915_gem_request *req) > > return 0; > } > - > -void i915_gem_request_free(struct kref *req_ref) > -{ > - struct drm_i915_gem_request *req = > - container_of(req_ref, typeof(*req), ref); > - kmem_cache_free(req->i915->requests, req); > -} > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 166e0733d2d8..248aec2c09b7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -25,6 +25,8 @@ > #ifndef I915_GEM_REQUEST_H > #define I915_GEM_REQUEST_H > > +#include <linux/fence.h> > + > /** > * Request queue structure. > * > @@ -36,11 +38,11 @@ > * emission time to be associated with the request for tracking how far ahead > * of the GPU the submission is. > * > - * The requests are reference counted, so upon creation they should have an > - * initial reference taken using kref_init > + * The requests are reference counted. > */ > struct drm_i915_gem_request { > - struct kref ref; > + struct fence fence; > + spinlock_t lock; > > /** On Which ring this request was generated */ > struct drm_i915_private *i915; > @@ -68,12 +70,6 @@ struct drm_i915_gem_request { > */ > u32 previous_seqno; > > - /** GEM sequence number associated with this request, > - * when the HWS breadcrumb is equal or greater than this the GPU > - * has finished processing this request. > - */ > - u32 seqno; > - > /** Position in the ringbuffer of the start of the request */ > u32 head; > > @@ -152,7 +148,6 @@ __request_to_i915(const struct drm_i915_gem_request *request) > struct drm_i915_gem_request * __must_check > i915_gem_request_alloc(struct intel_engine_cs *engine, > struct i915_gem_context *ctx); > -void i915_gem_request_free(struct kref *req_ref); > int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, > struct drm_file *file); > void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); > @@ -160,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); > static inline uint32_t > i915_gem_request_get_seqno(struct drm_i915_gem_request *req) > { > - return req ? req->seqno : 0; > + return req ? req->fence.seqno : 0; > } > > static inline struct intel_engine_cs * > @@ -170,17 +165,23 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req) > } > > static inline struct drm_i915_gem_request * > +to_request(struct fence *fence) > +{ > + /* We assume that NULL fence/request are interoperable */ > + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); > + return container_of(fence, struct drm_i915_gem_request, fence); For future-proofing to make sure we don't accidentally call this on a foreign fence: BUG_ON(fence->ops != i915_fence_ops); BUG_ON since I don't want to splatter all callers with handlers for this. And if we ever get this wrong debugging it with just some randomy corruption would be serious pain, so I think the overhead is justified. -Daniel > +} > + > +static inline struct drm_i915_gem_request * > i915_gem_request_reference(struct drm_i915_gem_request *req) > { > - if (req) > - kref_get(&req->ref); > - return req; > + return to_request(fence_get(&req->fence)); > } > > static inline void > i915_gem_request_unreference(struct drm_i915_gem_request *req) > { > - kref_put(&req->ref, i915_gem_request_free); > + fence_put(&req->fence); > } > > static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, > @@ -230,7 +231,7 @@ static inline bool i915_gem_request_started(const struct drm_i915_gem_request *r > static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) > { > return i915_seqno_passed(intel_engine_get_seqno(req->engine), > - req->seqno); > + req->fence.seqno); > } > > bool __i915_spin_request(const struct drm_i915_gem_request *request, > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 3ba5302ce19f..5332bd32c555 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -1181,7 +1181,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, > } > > erq = &error->ring[i].requests[count++]; > - erq->seqno = request->seqno; > + erq->seqno = request->fence.seqno; > erq->jiffies = request->emitted_jiffies; > erq->tail = request->postfix; > } > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index ac72451c571c..629111d42ce0 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -538,7 +538,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, > rq->engine); > > wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; > - wqi->fence_id = rq->seqno; > + wqi->fence_id = rq->fence.seqno; > > kunmap_atomic(base); > } > @@ -578,7 +578,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) > client->b_fail += 1; > > guc->submissions[engine_id] += 1; > - guc->last_seqno[engine_id] = rq->seqno; > + guc->last_seqno[engine_id] = rq->fence.seqno; > > return b_ret; > } > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h > index f59cf07184ae..0296a77b586a 100644 > --- a/drivers/gpu/drm/i915/i915_trace.h > +++ b/drivers/gpu/drm/i915/i915_trace.h > @@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to, > __entry->dev = from->i915->dev->primary->index; > __entry->sync_from = from->id; > __entry->sync_to = to_req->engine->id; > - __entry->seqno = i915_gem_request_get_seqno(req); > + __entry->seqno = req->fence.seqno; > ), > > TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", > @@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch, > TP_fast_assign( > __entry->dev = req->i915->dev->primary->index; > __entry->ring = req->engine->id; > - __entry->seqno = req->seqno; > + __entry->seqno = req->fence.seqno; > __entry->flags = flags; > - intel_engine_enable_signaling(req); > + fence_enable_sw_signaling(&req->fence); > ), > > TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", > @@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, > TP_fast_assign( > __entry->dev = req->i915->dev->primary->index; > __entry->ring = req->engine->id; > - __entry->seqno = req->seqno; > + __entry->seqno = req->fence.seqno; > ), > > TP_printk("dev=%u, ring=%u, seqno=%u", > @@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, > TP_fast_assign( > __entry->dev = req->i915->dev->primary->index; > __entry->ring = req->engine->id; > - __entry->seqno = req->seqno; > + __entry->seqno = req->fence.seqno; > __entry->blocking = > mutex_is_locked(&req->i915->dev->struct_mutex); > ), > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c > index dc65a007fa20..05f62f706897 100644 > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c > @@ -396,6 +396,7 @@ static int intel_breadcrumbs_signaler(void *arg) > */ > intel_engine_remove_wait(engine, > &request->signaling.wait); > + fence_signal(&request->fence); > > /* Find the next oldest signal. Note that as we have > * not been holding the lock, another client may > @@ -444,7 +445,7 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request) > } > > request->signaling.wait.task = b->signaler; > - request->signaling.wait.seqno = request->seqno; > + request->signaling.wait.seqno = request->fence.seqno; > i915_gem_request_reference(request); > > /* First add ourselves into the list of waiters, but register our > @@ -466,8 +467,8 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request) > p = &b->signals.rb_node; > while (*p) { > parent = *p; > - if (i915_seqno_passed(request->seqno, > - to_signal(parent)->seqno)) { > + if (i915_seqno_passed(request->fence.seqno, > + to_signal(parent)->fence.seqno)) { > p = &parent->rb_right; > first = false; > } else > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 0742a849acce..c7a9ebdb0811 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1731,7 +1731,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) > intel_hws_seqno_address(request->engine) | > MI_FLUSH_DW_USE_GTT); > intel_logical_ring_emit(ringbuf, 0); > - intel_logical_ring_emit(ringbuf, request->seqno); > + intel_logical_ring_emit(ringbuf, request->fence.seqno); > intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); > intel_logical_ring_emit(ringbuf, MI_NOOP); > return intel_logical_ring_advance_and_submit(request); > @@ -1964,6 +1964,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id) > engine->exec_id = info->exec_id; > engine->guc_id = info->guc_id; > engine->mmio_base = info->mmio_base; > + engine->fence_context = fence_context_alloc(1); > > engine->i915 = dev_priv; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 327ad7fdf118..c3d6345aa2c1 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1266,7 +1266,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, > PIPE_CONTROL_CS_STALL); > intel_ring_emit(signaller, lower_32_bits(gtt_offset)); > intel_ring_emit(signaller, upper_32_bits(gtt_offset)); > - intel_ring_emit(signaller, signaller_req->seqno); > + intel_ring_emit(signaller, signaller_req->fence.seqno); > intel_ring_emit(signaller, 0); > intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | > MI_SEMAPHORE_TARGET(waiter->hw_id)); > @@ -1304,7 +1304,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, > intel_ring_emit(signaller, lower_32_bits(gtt_offset) | > MI_FLUSH_DW_USE_GTT); > intel_ring_emit(signaller, upper_32_bits(gtt_offset)); > - intel_ring_emit(signaller, signaller_req->seqno); > + intel_ring_emit(signaller, signaller_req->fence.seqno); > intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | > MI_SEMAPHORE_TARGET(waiter->hw_id)); > intel_ring_emit(signaller, 0); > @@ -1337,7 +1337,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, > if (i915_mmio_reg_valid(mbox_reg)) { > intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); > intel_ring_emit_reg(signaller, mbox_reg); > - intel_ring_emit(signaller, signaller_req->seqno); > + intel_ring_emit(signaller, signaller_req->fence.seqno); > } > } > > @@ -1373,7 +1373,7 @@ gen6_add_request(struct drm_i915_gem_request *req) > intel_ring_emit(engine, MI_STORE_DWORD_INDEX); > intel_ring_emit(engine, > I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(engine, req->seqno); > + intel_ring_emit(engine, req->fence.seqno); > intel_ring_emit(engine, MI_USER_INTERRUPT); > __intel_ring_advance(engine); > > @@ -1623,7 +1623,7 @@ i9xx_add_request(struct drm_i915_gem_request *req) > intel_ring_emit(engine, MI_STORE_DWORD_INDEX); > intel_ring_emit(engine, > I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(engine, req->seqno); > + intel_ring_emit(engine, req->fence.seqno); > intel_ring_emit(engine, MI_USER_INTERRUPT); > __intel_ring_advance(engine); > > @@ -2092,6 +2092,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, > WARN_ON(engine->buffer); > > engine->i915 = dev_priv; > + engine->fence_context = fence_context_alloc(1); > INIT_LIST_HEAD(&engine->active_list); > INIT_LIST_HEAD(&engine->request_list); > INIT_LIST_HEAD(&engine->execlist_queue); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 6017367e94fb..b041fb6a6d01 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -158,6 +158,7 @@ struct intel_engine_cs { > unsigned int exec_id; > unsigned int hw_id; > unsigned int guc_id; /* XXX same as hw_id? */ > + unsigned fence_context; > u32 mmio_base; > struct intel_ringbuffer *buffer; > struct list_head buffers; > -- > 2.8.1 >
On Wed, Jun 08, 2016 at 11:14:23AM +0200, Daniel Vetter wrote: > On Fri, Jun 03, 2016 at 05:36:38PM +0100, Chris Wilson wrote: > > static inline struct drm_i915_gem_request * > > +to_request(struct fence *fence) > > +{ > > + /* We assume that NULL fence/request are interoperable */ > > + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); > > + return container_of(fence, struct drm_i915_gem_request, fence); > > For future-proofing to make sure we don't accidentally call this on a > foreign fence: > > BUG_ON(fence->ops != i915_fence_ops); > > BUG_ON since I don't want to splatter all callers with handlers for this. > And if we ever get this wrong debugging it with just some randomy > corruption would be serious pain, so I think the overhead is justified. How about I just remove the function? It is only used on known requests today, or call it __to_request_from_fence() ? -Chris
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8f576b443ff6..8e37315443f3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data) if (req->pid) task = pid_task(req->pid, PIDTYPE_PID); seq_printf(m, " %x @ %d: %s [%d]\n", - req->seqno, + req->fence.seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "<unknown>", task ? task->pid : -1); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 34b2f151cdfc..512b15153ac6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -24,6 +24,98 @@ #include "i915_drv.h" +static inline struct drm_i915_gem_request * +to_i915_request(struct fence *fence) +{ + return container_of(fence, struct drm_i915_gem_request, fence); +} + +static const char *i915_fence_get_driver_name(struct fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct fence *fence) +{ + /* Timelines are bound by eviction to a VM. However, since + * we only have a global seqno at the moment, we only have + * a single timeline. Note that each timeline will have + * multiple execution contexts (fence contexts) as we allow + * engines within a single timeline to execute in parallel. + */ + return "global"; +} + +static bool i915_fence_signaled(struct fence *fence) +{ + return i915_gem_request_completed(to_i915_request(fence)); +} + +static bool i915_fence_enable_signaling(struct fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + return intel_engine_enable_signaling(to_i915_request(fence)) == 0; +} + +static signed long i915_fence_wait(struct fence *fence, + bool interruptible, + signed long timeout_jiffies) +{ + s64 timeout_ns, *timeout; + int ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + timeout_ns = jiffies_to_nsecs(timeout_jiffies); + timeout = &timeout_ns; + } else + timeout = NULL; + + ret = __i915_wait_request(to_i915_request(fence), + interruptible, timeout, + NULL); + if (ret == -ETIME) + return 0; + + if (ret < 0) + return ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) + timeout_jiffies = nsecs_to_jiffies(timeout_ns); + + return timeout_jiffies; +} + +static void i915_fence_value_str(struct fence *fence, char *str, int size) +{ + snprintf(str, size, "%u", fence->seqno); +} + +static void i915_fence_timeline_value_str(struct fence *fence, char *str, + int size) +{ + snprintf(str, size, "%u", + intel_engine_get_seqno(to_i915_request(fence)->engine)); +} + +static void i915_fence_release(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_i915_request(fence); + kmem_cache_free(req->i915->requests, req); +} + +static const struct fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, + .fence_value_str = i915_fence_value_str, + .timeline_value_str = i915_fence_timeline_value_str, +}; + static int i915_gem_check_wedge(unsigned reset_counter, bool interruptible) { if (__i915_terminally_wedged(reset_counter)) @@ -117,6 +209,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); struct drm_i915_gem_request *req; + u32 seqno; int ret; if (!req_out) @@ -136,11 +229,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (req == NULL) return -ENOMEM; - ret = i915_gem_get_seqno(dev_priv, &req->seqno); + ret = i915_gem_get_seqno(dev_priv, &seqno); if (ret) goto err; - kref_init(&req->ref); + spin_lock_init(&req->lock); + fence_init(&req->fence, + &i915_fence_ops, + &req->lock, + engine->fence_context, + seqno); + req->i915 = dev_priv; req->engine = engine; req->reset_counter = reset_counter; @@ -376,7 +475,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->emitted_jiffies = jiffies; request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); + smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); list_add_tail(&request->list, &engine->request_list); /* Record the position of the start of the request so that @@ -543,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (i915_spin_request(req, state, 5)) goto complete; - intel_wait_init(&wait, req->seqno); + intel_wait_init(&wait, req->fence.seqno); set_current_state(state); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt @@ -609,7 +708,7 @@ complete: *timeout = 0; } - if (rps && req->seqno == req->engine->last_submitted_seqno) { + if (rps && req->fence.seqno == req->engine->last_submitted_seqno) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -650,10 +749,3 @@ int i915_wait_request(struct drm_i915_gem_request *req) return 0; } - -void i915_gem_request_free(struct kref *req_ref) -{ - struct drm_i915_gem_request *req = - container_of(req_ref, typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); -} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 166e0733d2d8..248aec2c09b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -25,6 +25,8 @@ #ifndef I915_GEM_REQUEST_H #define I915_GEM_REQUEST_H +#include <linux/fence.h> + /** * Request queue structure. * @@ -36,11 +38,11 @@ * emission time to be associated with the request for tracking how far ahead * of the GPU the submission is. * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init + * The requests are reference counted. */ struct drm_i915_gem_request { - struct kref ref; + struct fence fence; + spinlock_t lock; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -68,12 +70,6 @@ struct drm_i915_gem_request { */ u32 previous_seqno; - /** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - /** Position in the ringbuffer of the start of the request */ u32 head; @@ -152,7 +148,6 @@ __request_to_i915(const struct drm_i915_gem_request *request) struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); -void i915_gem_request_free(struct kref *req_ref); int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); @@ -160,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline uint32_t i915_gem_request_get_seqno(struct drm_i915_gem_request *req) { - return req ? req->seqno : 0; + return req ? req->fence.seqno : 0; } static inline struct intel_engine_cs * @@ -170,17 +165,23 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req) } static inline struct drm_i915_gem_request * +to_request(struct fence *fence) +{ + /* We assume that NULL fence/request are interoperable */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); + return container_of(fence, struct drm_i915_gem_request, fence); +} + +static inline struct drm_i915_gem_request * i915_gem_request_reference(struct drm_i915_gem_request *req) { - if (req) - kref_get(&req->ref); - return req; + return to_request(fence_get(&req->fence)); } static inline void i915_gem_request_unreference(struct drm_i915_gem_request *req) { - kref_put(&req->ref, i915_gem_request_free); + fence_put(&req->fence); } static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, @@ -230,7 +231,7 @@ static inline bool i915_gem_request_started(const struct drm_i915_gem_request *r static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->seqno); + req->fence.seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3ba5302ce19f..5332bd32c555 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1181,7 +1181,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } erq = &error->ring[i].requests[count++]; - erq->seqno = request->seqno; + erq->seqno = request->fence.seqno; erq->jiffies = request->emitted_jiffies; erq->tail = request->postfix; } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ac72451c571c..629111d42ce0 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -538,7 +538,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, rq->engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->seqno; + wqi->fence_id = rq->fence.seqno; kunmap_atomic(base); } @@ -578,7 +578,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) client->b_fail += 1; guc->submissions[engine_id] += 1; - guc->last_seqno[engine_id] = rq->seqno; + guc->last_seqno[engine_id] = rq->fence.seqno; return b_ret; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f59cf07184ae..0296a77b586a 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->dev = from->i915->dev->primary->index; __entry->sync_from = from->id; __entry->sync_to = to_req->engine->id; - __entry->seqno = i915_gem_request_get_seqno(req); + __entry->seqno = req->fence.seqno; ), TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", @@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->dev->primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - intel_engine_enable_signaling(req); + fence_enable_sw_signaling(&req->fence); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", @@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_fast_assign( __entry->dev = req->i915->dev->primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; ), TP_printk("dev=%u, ring=%u, seqno=%u", @@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->dev->primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->blocking = mutex_is_locked(&req->i915->dev->struct_mutex); ), diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index dc65a007fa20..05f62f706897 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -396,6 +396,7 @@ static int intel_breadcrumbs_signaler(void *arg) */ intel_engine_remove_wait(engine, &request->signaling.wait); + fence_signal(&request->fence); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -444,7 +445,7 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request) } request->signaling.wait.task = b->signaler; - request->signaling.wait.seqno = request->seqno; + request->signaling.wait.seqno = request->fence.seqno; i915_gem_request_reference(request); /* First add ourselves into the list of waiters, but register our @@ -466,8 +467,8 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->seqno, - to_signal(parent)->seqno)) { + if (i915_seqno_passed(request->fence.seqno, + to_signal(parent)->fence.seqno)) { p = &parent->rb_right; first = false; } else diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0742a849acce..c7a9ebdb0811 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1731,7 +1731,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT); intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, request->seqno); + intel_logical_ring_emit(ringbuf, request->fence.seqno); intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); return intel_logical_ring_advance_and_submit(request); @@ -1964,6 +1964,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id) engine->exec_id = info->exec_id; engine->guc_id = info->guc_id; engine->mmio_base = info->mmio_base; + engine->fence_context = fence_context_alloc(1); engine->i915 = dev_priv; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 327ad7fdf118..c3d6345aa2c1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1266,7 +1266,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, PIPE_CONTROL_CS_STALL); intel_ring_emit(signaller, lower_32_bits(gtt_offset)); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(signaller, signaller_req->fence.seqno); intel_ring_emit(signaller, 0); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); @@ -1304,7 +1304,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, intel_ring_emit(signaller, lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(signaller, signaller_req->fence.seqno); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); @@ -1337,7 +1337,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(signaller, mbox_reg); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(signaller, signaller_req->fence.seqno); } } @@ -1373,7 +1373,7 @@ gen6_add_request(struct drm_i915_gem_request *req) intel_ring_emit(engine, MI_STORE_DWORD_INDEX); intel_ring_emit(engine, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); + intel_ring_emit(engine, req->fence.seqno); intel_ring_emit(engine, MI_USER_INTERRUPT); __intel_ring_advance(engine); @@ -1623,7 +1623,7 @@ i9xx_add_request(struct drm_i915_gem_request *req) intel_ring_emit(engine, MI_STORE_DWORD_INDEX); intel_ring_emit(engine, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); + intel_ring_emit(engine, req->fence.seqno); intel_ring_emit(engine, MI_USER_INTERRUPT); __intel_ring_advance(engine); @@ -2092,6 +2092,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, WARN_ON(engine->buffer); engine->i915 = dev_priv; + engine->fence_context = fence_context_alloc(1); INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->execlist_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6017367e94fb..b041fb6a6d01 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -158,6 +158,7 @@ struct intel_engine_cs { unsigned int exec_id; unsigned int hw_id; unsigned int guc_id; /* XXX same as hw_id? */ + unsigned fence_context; u32 mmio_base; struct intel_ringbuffer *buffer; struct list_head buffers;
dma-buf provides a generic fence class for interoperation between drivers. Internally we use the request structure as a fence, and so with only a little bit of interfacing we can rebase those requests on top of dma-buf fences. This will allow us, in the future, to pass those fences back to userspace or between drivers. v2: The fence_context needs to be globally unique, not just unique to this device. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jesse Barnes <jbarnes@virtuousgeek.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.c | 116 ++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_request.h | 33 ++++---- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +- drivers/gpu/drm/i915/i915_trace.h | 10 +-- drivers/gpu/drm/i915/intel_breadcrumbs.c | 7 +- drivers/gpu/drm/i915/intel_lrc.c | 3 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 10 files changed, 143 insertions(+), 46 deletions(-)