Message ID | 1402673891-14618-27-git-send-email-oscar.mateo@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jun 13, 2014 at 08:37:44AM -0700, oscar.mateo@intel.com wrote: > From: Oscar Mateo <oscar.mateo@intel.com> > > Well, new-ish: if all this code looks familiar, that's because it's > a clone of the existing submission mechanism (with some modifications > here and there to adapt it to LRCs and Execlists). > > And why did we do this? Execlists offer several advantages, like > control over when the GPU is done with a given workload, that can > help simplify the submission mechanism, no doubt, but I am interested > in getting Execlists to work first and foremost. As we are creating > a parallel submission mechanism (even if itñ? just a clone), we can > now start improving it without the fear of breaking old gens. > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> > --- > drivers/gpu/drm/i915/intel_lrc.c | 214 +++++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.h | 18 ++++ > 2 files changed, 232 insertions(+) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 02fc3d0..89aed7a 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -86,6 +86,220 @@ bool intel_enable_execlists(struct drm_device *dev) > return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); > } > > +static inline struct intel_ringbuffer * > +logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx) > +{ > + return ctx->engine[ring->id].ringbuf; > +} > + > +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > + > + intel_logical_ring_advance(ringbuf); > + > + if (intel_ring_stopped(ring)) > + return; > + > + ring->submit_ctx(ring, ctx, ringbuf->tail); > +} > + > +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + if (ring->outstanding_lazy_seqno) > + return 0; > + > + if (ring->preallocated_lazy_request == NULL) { > + struct drm_i915_gem_request *request; > + > + request = kmalloc(sizeof(*request), GFP_KERNEL); > + if (request == NULL) > + return -ENOMEM; > + > + ring->preallocated_lazy_request = request; > + } > + > + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); > +} > + > +static int logical_ring_wait_request(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx, > + int bytes) > +{ > + struct drm_i915_gem_request *request; > + u32 seqno = 0; > + int ret; > + > + if (ringbuf->last_retired_head != -1) { > + ringbuf->head = ringbuf->last_retired_head; > + ringbuf->last_retired_head = -1; > + > + ringbuf->space = intel_ring_space(ringbuf); > + if (ringbuf->space >= bytes) > + return 0; > + } > + > + list_for_each_entry(request, &ring->request_list, list) { > + if (__intel_ring_space(request->tail, ringbuf->tail, > + ringbuf->size) >= bytes) { > + seqno = request->seqno; > + break; > + } > + } > + > + if (seqno == 0) > + return -ENOSPC; > + > + ret = i915_wait_seqno(ring, seqno); > + if (ret) > + return ret; > + > + /* TODO: make sure we update the right ringbuffer's last_retired_head > + * when retiring requests */ > + i915_gem_retire_requests_ring(ring); > + ringbuf->head = ringbuf->last_retired_head; > + ringbuf->last_retired_head = -1; > + > + ringbuf->space = intel_ring_space(ringbuf); > + return 0; > +} > + > +static int logical_ring_wait_for_space(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx, > + int bytes) > +{ > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + unsigned long end; > + int ret; > + > + ret = logical_ring_wait_request(ring, ringbuf, ctx, bytes); > + if (ret != -ENOSPC) > + return ret; > + > + /* Force the context submission in case we have been skipping it */ > + intel_logical_ring_advance_and_submit(ring, ctx); > + > + /* With GEM the hangcheck timer should kick us out of the loop, > + * leaving it early runs the risk of corrupting GEM state (due > + * to running on almost untested codepaths). But on resume > + * timers don't work yet, so prevent a complete hang in that > + * case by choosing an insanely large timeout. */ > + end = jiffies + 60 * HZ; > + In the legacy ringbuffer version, there are tracepoints around the do loop. Should we keep those? Or add lrc specific equivalents? > + do { > + ringbuf->head = I915_READ_HEAD(ring); > + ringbuf->space = intel_ring_space(ringbuf); > + if (ringbuf->space >= bytes) { > + ret = 0; > + break; > + } > + > + if (!drm_core_check_feature(dev, DRIVER_MODESET) && > + dev->primary->master) { > + struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; > + if (master_priv->sarea_priv) > + master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; > + } > + > + msleep(1); > + > + if (dev_priv->mm.interruptible && signal_pending(current)) { > + ret = -ERESTARTSYS; > + break; > + } > + > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > + dev_priv->mm.interruptible); > + if (ret) > + break; > + > + if (time_after(jiffies, end)) { > + ret = -EBUSY; > + break; > + } > + } while (1); > + > + return ret; > +} > + > +static int logical_ring_wrap_buffer(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx) > +{ > + uint32_t __iomem *virt; > + int rem = ringbuf->size - ringbuf->tail; > + > + if (ringbuf->space < rem) { > + int ret = logical_ring_wait_for_space(ring, ringbuf, ctx, rem); > + if (ret) > + return ret; > + } > + > + virt = ringbuf->virtual_start + ringbuf->tail; > + rem /= 4; > + while (rem--) > + iowrite32(MI_NOOP, virt++); > + > + ringbuf->tail = 0; > + ringbuf->space = intel_ring_space(ringbuf); > + > + return 0; > +} > + > +static int logical_ring_prepare(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx, > + int bytes) > +{ > + int ret; > + > + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { > + ret = logical_ring_wrap_buffer(ring, ringbuf, ctx); > + if (unlikely(ret)) > + return ret; > + } > + > + if (unlikely(ringbuf->space < bytes)) { > + ret = logical_ring_wait_for_space(ring, ringbuf, ctx, bytes); > + if (unlikely(ret)) > + return ret; > + } > + > + return 0; > +} > + > +int intel_logical_ring_begin(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + int num_dwords) > +{ > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > + int ret; > + > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > + dev_priv->mm.interruptible); > + if (ret) > + return ret; > + > + ret = logical_ring_prepare(ring, ringbuf, ctx, > + num_dwords * sizeof(uint32_t)); > + if (ret) > + return ret; > + > + /* Preallocate the olr before touching the ring */ > + ret = logical_ring_alloc_seqno(ring, ctx); > + if (ret) > + return ret; > + > + ringbuf->space -= num_dwords * sizeof(uint32_t); > + return 0; > +} > + > static int gen8_init_common_ring(struct intel_engine_cs *ring) > { > struct drm_device *dev = ring->dev; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index 26b0949..686ebf5 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -5,6 +5,24 @@ > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); > int intel_logical_rings_init(struct drm_device *dev); > > +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, > + struct intel_context *ctx); > + > +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) > +{ > + ringbuf->tail &= ringbuf->size - 1; > +} > + > +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) > +{ > + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); > + ringbuf->tail += 4; > +} > + > +int intel_logical_ring_begin(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + int num_dwords); > + I think all of these are only used in intel_lrc.c, so don't need to be in the header and could all be static. Right? Brad > /* Logical Ring Contexts */ > void intel_lr_context_free(struct intel_context *ctx); > int intel_lr_context_deferred_create(struct intel_context *ctx, > -- > 1.9.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> -----Original Message----- > From: Volkin, Bradley D > Sent: Friday, June 20, 2014 10:01 PM > To: Mateo Lozano, Oscar > Cc: intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > submission mechanism > > On Fri, Jun 13, 2014 at 08:37:44AM -0700, oscar.mateo@intel.com wrote: > > From: Oscar Mateo <oscar.mateo@intel.com> > > > > Well, new-ish: if all this code looks familiar, that's because it's a > > clone of the existing submission mechanism (with some modifications > > here and there to adapt it to LRCs and Execlists). > > > > And why did we do this? Execlists offer several advantages, like > > control over when the GPU is done with a given workload, that can help > > simplify the submission mechanism, no doubt, but I am interested in > > getting Execlists to work first and foremost. As we are creating a > > parallel submission mechanism (even if itñ? just a clone), we can now > > start improving it without the fear of breaking old gens. > > > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> > > --- > > drivers/gpu/drm/i915/intel_lrc.c | 214 > > +++++++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/i915/intel_lrc.h | 18 ++++ > > 2 files changed, 232 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > > b/drivers/gpu/drm/i915/intel_lrc.c > > index 02fc3d0..89aed7a 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -86,6 +86,220 @@ bool intel_enable_execlists(struct drm_device > *dev) > > return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); } > > > > +static inline struct intel_ringbuffer * logical_ringbuf_get(struct > > +intel_engine_cs *ring, struct intel_context *ctx) { > > + return ctx->engine[ring->id].ringbuf; } > > + > > +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, > > + struct intel_context *ctx) > > +{ > > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > > + > > + intel_logical_ring_advance(ringbuf); > > + > > + if (intel_ring_stopped(ring)) > > + return; > > + > > + ring->submit_ctx(ring, ctx, ringbuf->tail); } > > + > > +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > > + struct intel_context *ctx) > > +{ > > + if (ring->outstanding_lazy_seqno) > > + return 0; > > + > > + if (ring->preallocated_lazy_request == NULL) { > > + struct drm_i915_gem_request *request; > > + > > + request = kmalloc(sizeof(*request), GFP_KERNEL); > > + if (request == NULL) > > + return -ENOMEM; > > + > > + ring->preallocated_lazy_request = request; > > + } > > + > > + return i915_gem_get_seqno(ring->dev, &ring- > >outstanding_lazy_seqno); > > +} > > + > > +static int logical_ring_wait_request(struct intel_engine_cs *ring, > > + struct intel_ringbuffer *ringbuf, > > + struct intel_context *ctx, > > + int bytes) > > +{ > > + struct drm_i915_gem_request *request; > > + u32 seqno = 0; > > + int ret; > > + > > + if (ringbuf->last_retired_head != -1) { > > + ringbuf->head = ringbuf->last_retired_head; > > + ringbuf->last_retired_head = -1; > > + > > + ringbuf->space = intel_ring_space(ringbuf); > > + if (ringbuf->space >= bytes) > > + return 0; > > + } > > + > > + list_for_each_entry(request, &ring->request_list, list) { > > + if (__intel_ring_space(request->tail, ringbuf->tail, > > + ringbuf->size) >= bytes) { > > + seqno = request->seqno; > > + break; > > + } > > + } > > + > > + if (seqno == 0) > > + return -ENOSPC; > > + > > + ret = i915_wait_seqno(ring, seqno); > > + if (ret) > > + return ret; > > + > > + /* TODO: make sure we update the right ringbuffer's > last_retired_head > > + * when retiring requests */ > > + i915_gem_retire_requests_ring(ring); > > + ringbuf->head = ringbuf->last_retired_head; > > + ringbuf->last_retired_head = -1; > > + > > + ringbuf->space = intel_ring_space(ringbuf); > > + return 0; > > +} > > + > > +static int logical_ring_wait_for_space(struct intel_engine_cs *ring, > > + struct intel_ringbuffer > *ringbuf, > > + struct intel_context *ctx, > > + int bytes) > > +{ > > + struct drm_device *dev = ring->dev; > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + unsigned long end; > > + int ret; > > + > > + ret = logical_ring_wait_request(ring, ringbuf, ctx, bytes); > > + if (ret != -ENOSPC) > > + return ret; > > + > > + /* Force the context submission in case we have been skipping it */ > > + intel_logical_ring_advance_and_submit(ring, ctx); > > + > > + /* With GEM the hangcheck timer should kick us out of the loop, > > + * leaving it early runs the risk of corrupting GEM state (due > > + * to running on almost untested codepaths). But on resume > > + * timers don't work yet, so prevent a complete hang in that > > + * case by choosing an insanely large timeout. */ > > + end = jiffies + 60 * HZ; > > + > > In the legacy ringbuffer version, there are tracepoints around the do loop. > Should we keep those? Or add lrc specific equivalents? > > > + do { > > + ringbuf->head = I915_READ_HEAD(ring); > > + ringbuf->space = intel_ring_space(ringbuf); > > + if (ringbuf->space >= bytes) { > > + ret = 0; > > + break; > > + } > > + > > + if (!drm_core_check_feature(dev, DRIVER_MODESET) && > > + dev->primary->master) { > > + struct drm_i915_master_private *master_priv = dev- > >primary->master->driver_priv; > > + if (master_priv->sarea_priv) > > + master_priv->sarea_priv->perf_boxes |= > I915_BOX_WAIT; > > + } > > + > > + msleep(1); > > + > > + if (dev_priv->mm.interruptible && signal_pending(current)) { > > + ret = -ERESTARTSYS; > > + break; > > + } > > + > > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > > + dev_priv->mm.interruptible); > > + if (ret) > > + break; > > + > > + if (time_after(jiffies, end)) { > > + ret = -EBUSY; > > + break; > > + } > > + } while (1); > > + > > + return ret; > > +} > > + > > +static int logical_ring_wrap_buffer(struct intel_engine_cs *ring, > > + struct intel_ringbuffer > *ringbuf, > > + struct intel_context *ctx) > > +{ > > + uint32_t __iomem *virt; > > + int rem = ringbuf->size - ringbuf->tail; > > + > > + if (ringbuf->space < rem) { > > + int ret = logical_ring_wait_for_space(ring, ringbuf, ctx, rem); > > + if (ret) > > + return ret; > > + } > > + > > + virt = ringbuf->virtual_start + ringbuf->tail; > > + rem /= 4; > > + while (rem--) > > + iowrite32(MI_NOOP, virt++); > > + > > + ringbuf->tail = 0; > > + ringbuf->space = intel_ring_space(ringbuf); > > + > > + return 0; > > +} > > + > > +static int logical_ring_prepare(struct intel_engine_cs *ring, > > + struct intel_ringbuffer *ringbuf, > > + struct intel_context *ctx, > > + int bytes) > > +{ > > + int ret; > > + > > + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { > > + ret = logical_ring_wrap_buffer(ring, ringbuf, ctx); > > + if (unlikely(ret)) > > + return ret; > > + } > > + > > + if (unlikely(ringbuf->space < bytes)) { > > + ret = logical_ring_wait_for_space(ring, ringbuf, ctx, bytes); > > + if (unlikely(ret)) > > + return ret; > > + } > > + > > + return 0; > > +} > > + > > +int intel_logical_ring_begin(struct intel_engine_cs *ring, > > + struct intel_context *ctx, > > + int num_dwords) > > +{ > > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > > + int ret; > > + > > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > > + dev_priv->mm.interruptible); > > + if (ret) > > + return ret; > > + > > + ret = logical_ring_prepare(ring, ringbuf, ctx, > > + num_dwords * sizeof(uint32_t)); > > + if (ret) > > + return ret; > > + > > + /* Preallocate the olr before touching the ring */ > > + ret = logical_ring_alloc_seqno(ring, ctx); > > + if (ret) > > + return ret; > > + > > + ringbuf->space -= num_dwords * sizeof(uint32_t); > > + return 0; > > +} > > + > > static int gen8_init_common_ring(struct intel_engine_cs *ring) { > > struct drm_device *dev = ring->dev; > > diff --git a/drivers/gpu/drm/i915/intel_lrc.h > > b/drivers/gpu/drm/i915/intel_lrc.h > > index 26b0949..686ebf5 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.h > > +++ b/drivers/gpu/drm/i915/intel_lrc.h > > @@ -5,6 +5,24 @@ > > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int > > intel_logical_rings_init(struct drm_device *dev); > > > > +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, > > + struct intel_context *ctx); > > + > > +static inline void intel_logical_ring_advance(struct intel_ringbuffer > > +*ringbuf) { > > + ringbuf->tail &= ringbuf->size - 1; > > +} > > + > > +static inline void intel_logical_ring_emit(struct intel_ringbuffer > > +*ringbuf, u32 data) { > > + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); > > + ringbuf->tail += 4; > > +} > > + > > +int intel_logical_ring_begin(struct intel_engine_cs *ring, > > + struct intel_context *ctx, > > + int num_dwords); > > + > > I think all of these are only used in intel_lrc.c, so don't need to be in the > header and could all be static. Right? > > Brad So far, yes, but that´s only because I artificially made intel_lrc.c self-contained, as Daniel requested. What if we need to execute commands from somewhere else, like in intel_gen7_queue_flip()? And this takes me to another discussion: this logical ring vs legacy ring split is probably a good idea (time will tell), but we should provide a way of sending commands for execution without knowing if Execlists are enabled or not. In the early series that was easy because we reused the ring_begin, ring_emit & ring_advance functions, but this is not the case anymore. And without this, sooner or later somebody will break legacy or execlists (this already happened last week, when somebody here was implementing native sync without knowing about Execlists). So, the questions is: how do you feel about a dev_priv.gt vfunc that takes a context, a ring, an array of DWORDS and a BB length and does the intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? -- Oscar
On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar wrote: > So far, yes, but that´s only because I artificially made intel_lrc.c self-contained, as Daniel requested. What if we need to execute commands from somewhere else, like in intel_gen7_queue_flip()? > > And this takes me to another discussion: this logical ring vs legacy ring split is probably a good idea (time will tell), but we should provide a way of sending commands for execution without knowing if Execlists are enabled or not. In the early series that was easy because we reused the ring_begin, ring_emit & ring_advance functions, but this is not the case anymore. And without this, sooner or later somebody will break legacy or execlists (this already happened last week, when somebody here was implementing native sync without knowing about Execlists). > > So, the questions is: how do you feel about a dev_priv.gt vfunc that takes a context, a ring, an array of DWORDS and a BB length and does the intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? I'm still baffled by the design. intel_ring_begin() and friends should be able to find their context (logical or legacy) from the ring and dtrt. -Chris
> -----Original Message----- > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > Sent: Monday, June 23, 2014 2:14 PM > To: Mateo Lozano, Oscar > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > submission mechanism > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar wrote: > > So far, yes, but that´s only because I artificially made intel_lrc.c self- > contained, as Daniel requested. What if we need to execute commands from > somewhere else, like in intel_gen7_queue_flip()? > > > > And this takes me to another discussion: this logical ring vs legacy ring split > is probably a good idea (time will tell), but we should provide a way of > sending commands for execution without knowing if Execlists are enabled or > not. In the early series that was easy because we reused the ring_begin, > ring_emit & ring_advance functions, but this is not the case anymore. And > without this, sooner or later somebody will break legacy or execlists (this > already happened last week, when somebody here was implementing native > sync without knowing about Execlists). > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc that takes a > context, a ring, an array of DWORDS and a BB length and does the > intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? > > I'm still baffled by the design. intel_ring_begin() and friends should be able to > find their context (logical or legacy) from the ring and dtrt. > -Chris Sorry, Chris, I obviously don´t have the same experience with 915 you have: how do you propose to extract the right context from the ring?
On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Monday, June 23, 2014 2:14 PM > > To: Mateo Lozano, Oscar > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > > submission mechanism > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar wrote: > > > So far, yes, but that´s only because I artificially made intel_lrc.c self- > > contained, as Daniel requested. What if we need to execute commands from > > somewhere else, like in intel_gen7_queue_flip()? > > > > > > And this takes me to another discussion: this logical ring vs legacy ring split > > is probably a good idea (time will tell), but we should provide a way of > > sending commands for execution without knowing if Execlists are enabled or > > not. In the early series that was easy because we reused the ring_begin, > > ring_emit & ring_advance functions, but this is not the case anymore. And > > without this, sooner or later somebody will break legacy or execlists (this > > already happened last week, when somebody here was implementing native > > sync without knowing about Execlists). > > > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc that takes a > > context, a ring, an array of DWORDS and a BB length and does the > > intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? > > > > I'm still baffled by the design. intel_ring_begin() and friends should be able to > > find their context (logical or legacy) from the ring and dtrt. > > -Chris > > Sorry, Chris, I obviously don´t have the same experience with 915 you have: how do you propose to extract the right context from the ring? The rings are a set of buffers and vfuncs that are associated with a context. Before you can call intel_ring_begin() you must know what context you want to operate on and therefore can pick the right logical/legacy ring and interface for RCS/BCS/VCS/etc -Chris
> -----Original Message----- > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > Sent: Monday, June 23, 2014 2:27 PM > To: Mateo Lozano, Oscar > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > submission mechanism > > On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar wrote: > > > -----Original Message----- > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > Sent: Monday, June 23, 2014 2:14 PM > > > To: Mateo Lozano, Oscar > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > ring submission mechanism > > > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar > wrote: > > > > So far, yes, but that´s only because I artificially made > > > > intel_lrc.c self- > > > contained, as Daniel requested. What if we need to execute commands > > > from somewhere else, like in intel_gen7_queue_flip()? > > > > > > > > And this takes me to another discussion: this logical ring vs > > > > legacy ring split > > > is probably a good idea (time will tell), but we should provide a > > > way of sending commands for execution without knowing if Execlists > > > are enabled or not. In the early series that was easy because we > > > reused the ring_begin, ring_emit & ring_advance functions, but this > > > is not the case anymore. And without this, sooner or later somebody > > > will break legacy or execlists (this already happened last week, > > > when somebody here was implementing native sync without knowing > about Execlists). > > > > > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc > > > > that takes a > > > context, a ring, an array of DWORDS and a BB length and does the > > > intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? > > > > > > I'm still baffled by the design. intel_ring_begin() and friends > > > should be able to find their context (logical or legacy) from the ring and > dtrt. > > > -Chris > > > > Sorry, Chris, I obviously don´t have the same experience with 915 you have: > how do you propose to extract the right context from the ring? > > The rings are a set of buffers and vfuncs that are associated with a context. > Before you can call intel_ring_begin() you must know what context you want > to operate on and therefore can pick the right logical/legacy ring and > interface for RCS/BCS/VCS/etc -Chris Ok, but then you need to pass some extra stuff down together with the intel_engine_cs, either intel_context or intel_ringbuffer, right? Because that´s exactly what I did in previous versions, plumbing intel_context everywhere where it was needed (I could have plumbed intel_ringbuffer instead, it really doesn´t matter). This was rejected for being too intrusive and not allowing easy maintenance in the future. -- Oscar
On Mon, Jun 23, 2014 at 01:36:07PM +0000, Mateo Lozano, Oscar wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Monday, June 23, 2014 2:27 PM > > To: Mateo Lozano, Oscar > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > > submission mechanism > > > > On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar wrote: > > > > -----Original Message----- > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > Sent: Monday, June 23, 2014 2:14 PM > > > > To: Mateo Lozano, Oscar > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > ring submission mechanism > > > > > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar > > wrote: > > > > > So far, yes, but that´s only because I artificially made > > > > > intel_lrc.c self- > > > > contained, as Daniel requested. What if we need to execute commands > > > > from somewhere else, like in intel_gen7_queue_flip()? > > > > > > > > > > And this takes me to another discussion: this logical ring vs > > > > > legacy ring split > > > > is probably a good idea (time will tell), but we should provide a > > > > way of sending commands for execution without knowing if Execlists > > > > are enabled or not. In the early series that was easy because we > > > > reused the ring_begin, ring_emit & ring_advance functions, but this > > > > is not the case anymore. And without this, sooner or later somebody > > > > will break legacy or execlists (this already happened last week, > > > > when somebody here was implementing native sync without knowing > > about Execlists). > > > > > > > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc > > > > > that takes a > > > > context, a ring, an array of DWORDS and a BB length and does the > > > > intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? > > > > > > > > I'm still baffled by the design. intel_ring_begin() and friends > > > > should be able to find their context (logical or legacy) from the ring and > > dtrt. > > > > -Chris > > > > > > Sorry, Chris, I obviously don´t have the same experience with 915 you have: > > how do you propose to extract the right context from the ring? > > > > The rings are a set of buffers and vfuncs that are associated with a context. > > Before you can call intel_ring_begin() you must know what context you want > > to operate on and therefore can pick the right logical/legacy ring and > > interface for RCS/BCS/VCS/etc -Chris > > Ok, but then you need to pass some extra stuff down together with the intel_engine_cs, either intel_context or intel_ringbuffer, right? Because that´s exactly what I did in previous versions, plumbing intel_context everywhere where it was needed (I could have plumbed intel_ringbuffer instead, it really doesn´t matter). This was rejected for being too intrusive and not allowing easy maintenance in the future. Nope. You didn't redesign the ringbuffers to function as we expected but tacked on extra information and layering violations. -Chris
> -----Original Message----- > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > Sent: Monday, June 23, 2014 2:42 PM > To: Mateo Lozano, Oscar > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > submission mechanism > > On Mon, Jun 23, 2014 at 01:36:07PM +0000, Mateo Lozano, Oscar wrote: > > > -----Original Message----- > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > Sent: Monday, June 23, 2014 2:27 PM > > > To: Mateo Lozano, Oscar > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > ring submission mechanism > > > > > > On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar > wrote: > > > > > -----Original Message----- > > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > > Sent: Monday, June 23, 2014 2:14 PM > > > > > To: Mateo Lozano, Oscar > > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > > ring submission mechanism > > > > > > > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar > > > wrote: > > > > > > So far, yes, but that´s only because I artificially made > > > > > > intel_lrc.c self- > > > > > contained, as Daniel requested. What if we need to execute > > > > > commands from somewhere else, like in intel_gen7_queue_flip()? > > > > > > > > > > > > And this takes me to another discussion: this logical ring vs > > > > > > legacy ring split > > > > > is probably a good idea (time will tell), but we should provide > > > > > a way of sending commands for execution without knowing if > > > > > Execlists are enabled or not. In the early series that was easy > > > > > because we reused the ring_begin, ring_emit & ring_advance > > > > > functions, but this is not the case anymore. And without this, > > > > > sooner or later somebody will break legacy or execlists (this > > > > > already happened last week, when somebody here was implementing > > > > > native sync without knowing > > > about Execlists). > > > > > > > > > > > > So, the questions is: how do you feel about a dev_priv.gt > > > > > > vfunc that takes a > > > > > context, a ring, an array of DWORDS and a BB length and does the > > > > > intel_(logical)_ring_begin/emit/advance based on > i915.enable_execlists? > > > > > > > > > > I'm still baffled by the design. intel_ring_begin() and friends > > > > > should be able to find their context (logical or legacy) from > > > > > the ring and > > > dtrt. > > > > > -Chris > > > > > > > > Sorry, Chris, I obviously don´t have the same experience with 915 you > have: > > > how do you propose to extract the right context from the ring? > > > > > > The rings are a set of buffers and vfuncs that are associated with a > context. > > > Before you can call intel_ring_begin() you must know what context > > > you want to operate on and therefore can pick the right > > > logical/legacy ring and interface for RCS/BCS/VCS/etc -Chris > > > > Ok, but then you need to pass some extra stuff down together with the > intel_engine_cs, either intel_context or intel_ringbuffer, right? Because > that´s exactly what I did in previous versions, plumbing intel_context > everywhere where it was needed (I could have plumbed intel_ringbuffer > instead, it really doesn´t matter). This was rejected for being too intrusive > and not allowing easy maintenance in the future. > > Nope. You didn't redesign the ringbuffers to function as we expected but > tacked on extra information and layering violations. > -Chris I know is no excuse, but as I said, I don´t know the code as well as you do. Let me explain to you the history of this one and maybe you can help me out discovering where I got it all wrong: - The original design I inherited from Ben and Jesse created a completely new "struct intel_ring_buffer" per context, and passed that one on instead of passing one of the &dev_priv->ring[i] ones. When it was time to submit a context to the ELSP, they simply took it from ring->context. The problem with that was that creating an unbound number of "struct intel_ring_buffer" meant there were also an unbound number of things like "struct list_head active_list" or "u32 irq_enable_mask", which made little sense. - What we really needed, I naively thought, is to get rid of the concept of ring: there are no rings, there are engines (like the render engine, the vebox, etc...) and there are ringbuffers (as in circular buffer with a head offset, tail offset, and control information). So I went on and renamed the old "intel_ring_buffer" to "intel_engine_cs", then extracting a few things into a new "intel_ringbuffer" struct. Pre-Execlists, there was a 1:1 relationship between the ringbuffers and the engines. With Execlists, however, this 1:1 relationship is between the ringbuffers and the contexts. - I remember explaining this problem in a face-to-face meeting in the UK with some OTC people back in February (I think they tried to get you on the phone but they didn´t manage. I do remember they got Daniel though). Here, I proposed that an easy solution (easy, but maybe not right) was to plumb the context down: in Execlists mode, we would retrieve the ringbuffer from the context while, in legacy mode, we would get it from the engine. Everybody seemed to agree with this. - I worked on this premise for several versions that I sent to the mailing list for review (first the internal, then intel-gfx). Daniel only complained last month, when he pointed out that he asked, a long long time ago, for a completely separate execution path for Execlists. And this is where we are now... And now back to the problem at hand: what do you suggest I do now (other than ritual seppuku, I mean)? I need to know the engine (for a lot of reasons), the ringbuffer (to know where to place commands) and the context (to know what to submit to the ELSP). I can spin it a thousand different ways, but I still need to know those three things. At the same time, I cannot reuse the old intel_ringbuffer code because Daniel won´t approve. What would you do?
On Mon, Jun 23, 2014 at 07:35:38AM -0700, Mateo Lozano, Oscar wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Monday, June 23, 2014 2:42 PM > > To: Mateo Lozano, Oscar > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > > submission mechanism > > > > On Mon, Jun 23, 2014 at 01:36:07PM +0000, Mateo Lozano, Oscar wrote: > > > > -----Original Message----- > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > Sent: Monday, June 23, 2014 2:27 PM > > > > To: Mateo Lozano, Oscar > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > ring submission mechanism > > > > > > > > On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar > > wrote: > > > > > > -----Original Message----- > > > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > > > Sent: Monday, June 23, 2014 2:14 PM > > > > > > To: Mateo Lozano, Oscar > > > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > > > ring submission mechanism > > > > > > > > > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar > > > > wrote: > > > > > > > So far, yes, but that´s only because I artificially made > > > > > > > intel_lrc.c self- > > > > > > contained, as Daniel requested. What if we need to execute > > > > > > commands from somewhere else, like in intel_gen7_queue_flip()? > > > > > > > > > > > > > > And this takes me to another discussion: this logical ring vs > > > > > > > legacy ring split > > > > > > is probably a good idea (time will tell), but we should provide > > > > > > a way of sending commands for execution without knowing if > > > > > > Execlists are enabled or not. In the early series that was easy > > > > > > because we reused the ring_begin, ring_emit & ring_advance > > > > > > functions, but this is not the case anymore. And without this, > > > > > > sooner or later somebody will break legacy or execlists (this > > > > > > already happened last week, when somebody here was implementing > > > > > > native sync without knowing > > > > about Execlists). > > > > > > > > > > > > > > So, the questions is: how do you feel about a dev_priv.gt > > > > > > > vfunc that takes a > > > > > > context, a ring, an array of DWORDS and a BB length and does the > > > > > > intel_(logical)_ring_begin/emit/advance based on > > i915.enable_execlists? There are 3 cases of non-execbuffer submissions that I can think of: flips, render state, and clear-buffer (proposed patches on the list). I wonder if the right approach might be to use batchbuffers with a small wrapper around the dispatch_execbuffer/emit_bb_start vfuncs. Basically the rule would be to only touch a ringbuffer from within the intel_engine_cs vfuncs, which always know which set of functions to use. For flips, we could use MMIO flips. Render state already uses the existing dispatch_execbuffer() and add_request(). The clear code could potentially do the same. There would obviously be some overhead in using a batch buffer for what could end up being just a few commands. Perhaps the batch buffer pool code from the command parser would help though. > > > > > > > > > > > > I'm still baffled by the design. intel_ring_begin() and friends > > > > > > should be able to find their context (logical or legacy) from > > > > > > the ring and > > > > dtrt. > > > > > > -Chris > > > > > > > > > > Sorry, Chris, I obviously don´t have the same experience with 915 you > > have: > > > > how do you propose to extract the right context from the ring? > > > > > > > > The rings are a set of buffers and vfuncs that are associated with a > > context. > > > > Before you can call intel_ring_begin() you must know what context > > > > you want to operate on and therefore can pick the right > > > > logical/legacy ring and interface for RCS/BCS/VCS/etc -Chris > > > > > > Ok, but then you need to pass some extra stuff down together with the > > intel_engine_cs, either intel_context or intel_ringbuffer, right? Because > > that´s exactly what I did in previous versions, plumbing intel_context > > everywhere where it was needed (I could have plumbed intel_ringbuffer > > instead, it really doesn´t matter). This was rejected for being too intrusive > > and not allowing easy maintenance in the future. > > > > Nope. You didn't redesign the ringbuffers to function as we expected but > > tacked on extra information and layering violations. Not sure what your proposed alternative is here Chris. I'll elaborate on what I had proposed r.e. plumbing intel_ringbuffer instead of intel_context, which might be along the lines of what you envision. At this point, we're starting to go in circles, so I don't know if it's worth revisting beyond that. The earlier versions of the series modified all of the intel_ring_* functions to accept (engine, context) as parameters. At or near the bottom of the call chain (e.g. in the engine vfuncs), we called a new function, intel_ringbuffer_get(engine, context), to return the appropriate ringbuffer in both legacy and lrc modes. However, a given struct intel_ringbuffer is only ever used with a particular engine (for both legacy and lrc) and with a particular context (for lrc only). So I suggested that we: - Add a back pointer from struct intel_rinbuffer to intel_context (would only be valid for lrc mode) - Move the intel_ringbuffer_get(engine, context) calls up to the callers - Pass (engine, ringbuf) instead of (engine, context) to intel_ring_* functions - Have the vfunc implemenations get the context from the ringbuffer where needed and ignore it where not Looking again, we could probably add a back pointer to the intel_engine_cs as well and then just pass around the ringbuf. In any case, we went back and forth on this a bit and decided to just stick with passing (engine, context). Which we then decided was too invasive, and here we are. So is that closer to what you're thinking of, or did you have something else in mind? Thanks, Brad > > -Chris > > I know is no excuse, but as I said, I don´t know the code as well as you do. Let me explain to you the history of this one and maybe you can help me out discovering where I got it all wrong: > > - The original design I inherited from Ben and Jesse created a completely new "struct intel_ring_buffer" per context, and passed that one on instead of passing one of the &dev_priv->ring[i] ones. When it was time to submit a context to the ELSP, they simply took it from ring->context. The problem with that was that creating an unbound number of "struct intel_ring_buffer" meant there were also an unbound number of things like "struct list_head active_list" or "u32 irq_enable_mask", which made little sense. > - What we really needed, I naively thought, is to get rid of the concept of ring: there are no rings, there are engines (like the render engine, the vebox, etc...) and there are ringbuffers (as in circular buffer with a head offset, tail offset, and control information). So I went on and renamed the old "intel_ring_buffer" to "intel_engine_cs", then extracting a few things into a new "intel_ringbuffer" struct. Pre-Execlists, there was a 1:1 relationship between the ringbuffers and the engines. With Execlists, however, this 1:1 relationship is between the ringbuffers and the contexts. > - I remember explaining this problem in a face-to-face meeting in the UK with some OTC people back in February (I think they tried to get you on the phone but they didn´t manage. I do remember they got Daniel though). Here, I proposed that an easy solution (easy, but maybe not right) was to plumb the context down: in Execlists mode, we would retrieve the ringbuffer from the context while, in legacy mode, we would get it from the engine. Everybody seemed to agree with this. > - I worked on this premise for several versions that I sent to the mailing list for review (first the internal, then intel-gfx). Daniel only complained last month, when he pointed out that he asked, a long long time ago, for a completely separate execution path for Execlists. And this is where we are now... > > And now back to the problem at hand: what do you suggest I do now (other than ritual seppuku, I mean)? I need to know the engine (for a lot of reasons), the ringbuffer (to know where to place commands) and the context (to know what to submit to the ELSP). I can spin it a thousand different ways, but I still need to know those three things. At the same time, I cannot reuse the old intel_ringbuffer code because Daniel won´t approve. What would you do?
On Mon, Jun 23, 2014 at 02:35:38PM +0000, Mateo Lozano, Oscar wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Monday, June 23, 2014 2:42 PM > > To: Mateo Lozano, Oscar > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > > submission mechanism > > > > On Mon, Jun 23, 2014 at 01:36:07PM +0000, Mateo Lozano, Oscar wrote: > > > > -----Original Message----- > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > Sent: Monday, June 23, 2014 2:27 PM > > > > To: Mateo Lozano, Oscar > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > ring submission mechanism > > > > > > > > On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar > > wrote: > > > > > > -----Original Message----- > > > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > > > Sent: Monday, June 23, 2014 2:14 PM > > > > > > To: Mateo Lozano, Oscar > > > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > > > ring submission mechanism > > > > > > > > > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar > > > > wrote: > > > > > > > So far, yes, but that´s only because I artificially made > > > > > > > intel_lrc.c self- > > > > > > contained, as Daniel requested. What if we need to execute > > > > > > commands from somewhere else, like in intel_gen7_queue_flip()? > > > > > > > > > > > > > > And this takes me to another discussion: this logical ring vs > > > > > > > legacy ring split > > > > > > is probably a good idea (time will tell), but we should provide > > > > > > a way of sending commands for execution without knowing if > > > > > > Execlists are enabled or not. In the early series that was easy > > > > > > because we reused the ring_begin, ring_emit & ring_advance > > > > > > functions, but this is not the case anymore. And without this, > > > > > > sooner or later somebody will break legacy or execlists (this > > > > > > already happened last week, when somebody here was implementing > > > > > > native sync without knowing > > > > about Execlists). > > > > > > > > > > > > > > So, the questions is: how do you feel about a dev_priv.gt > > > > > > > vfunc that takes a > > > > > > context, a ring, an array of DWORDS and a BB length and does the > > > > > > intel_(logical)_ring_begin/emit/advance based on > > i915.enable_execlists? > > > > > > > > > > > > I'm still baffled by the design. intel_ring_begin() and friends > > > > > > should be able to find their context (logical or legacy) from > > > > > > the ring and > > > > dtrt. > > > > > > -Chris > > > > > > > > > > Sorry, Chris, I obviously don´t have the same experience with 915 you > > have: > > > > how do you propose to extract the right context from the ring? > > > > > > > > The rings are a set of buffers and vfuncs that are associated with a > > context. > > > > Before you can call intel_ring_begin() you must know what context > > > > you want to operate on and therefore can pick the right > > > > logical/legacy ring and interface for RCS/BCS/VCS/etc -Chris > > > > > > Ok, but then you need to pass some extra stuff down together with the > > intel_engine_cs, either intel_context or intel_ringbuffer, right? Because > > that´s exactly what I did in previous versions, plumbing intel_context > > everywhere where it was needed (I could have plumbed intel_ringbuffer > > instead, it really doesn´t matter). This was rejected for being too intrusive > > and not allowing easy maintenance in the future. > > > > Nope. You didn't redesign the ringbuffers to function as we expected but > > tacked on extra information and layering violations. > > -Chris > > I know is no excuse, but as I said, I don´t know the code as well as you do. Let me explain to you the history of this one and maybe you can help me out discovering where I got it all wrong: > > - The original design I inherited from Ben and Jesse created a completely new "struct intel_ring_buffer" per context, and passed that one on instead of passing one of the &dev_priv->ring[i] ones. When it was time to submit a context to the ELSP, they simply took it from ring->context. The problem with that was that creating an unbound number of "struct intel_ring_buffer" meant there were also an unbound number of things like "struct list_head active_list" or "u32 irq_enable_mask", which made little sense. > - What we really needed, I naively thought, is to get rid of the concept of ring: there are no rings, there are engines (like the render engine, the vebox, etc...) and there are ringbuffers (as in circular buffer with a head offset, tail offset, and control information). So I went on and renamed the old "intel_ring_buffer" to "intel_engine_cs", then extracting a few things into a new "intel_ringbuffer" struct. Pre-Execlists, there was a 1:1 relationship between the ringbuffers and the engines. With Execlists, however, this 1:1 relationship is between the ringbuffers and the contexts. > - I remember explaining this problem in a face-to-face meeting in the UK with some OTC people back in February (I think they tried to get you on the phone but they didn´t manage. I do remember they got Daniel though). Here, I proposed that an easy solution (easy, but maybe not right) was to plumb the context down: in Execlists mode, we would retrieve the ringbuffer from the context while, in legacy mode, we would get it from the engine. Everybody seemed to agree with this. > - I worked on this premise for several versions that I sent to the mailing list for review (first the internal, then intel-gfx). Daniel only complained last month, when he pointed out that he asked, a long long time ago, for a completely separate execution path for Execlists. And this is where we are now... > > And now back to the problem at hand: what do you suggest I do now (other than ritual seppuku, I mean)? I need to know the engine (for a lot of reasons), the ringbuffer (to know where to place commands) and the context (to know what to submit to the ELSP). I can spin it a thousand different ways, but I still need to know those three things. At the same time, I cannot reuse the old intel_ringbuffer code because Daniel won´t approve. What would you do? I think what Chris is trying to say is that all of your operations should be context driven. The ringbuffer is always a derivative of the context. If I understand Chris correctly, I agree with him, but he is being characteristically terse. There should be two data structures: intel_engine_cs, formerly intel_ringbuffer - for legacy intel_context, formerly intel_hw_context - for execlist You did that. Then there should be a data structure to represent the ringbuffer within the execlist context. You did that, too. I don't think the fact that there is a separate execbuf path makes any difference in this conversation, but perhaps I missed some detail. So at least from what I can tell, the data structures are right. The problem is that we're mixing and matching intel_engine_cs with the new [and I wish we could have used a different name] intel_ringbuffer. As an example from near the top of the patch: + intel_logical_ring_advance(ringbuf); + + if (intel_ring_stopped(ring)) + return; You're advancing the ringbuf, but checking the ring? That's confusing to me. I think the only solution for what Chris is asking for is to implement this as 1 context per engine, as opposed to 1 context with a context object per engine. As you correctly stated, I think we all agreed the latter was fine when we met. Functionally, I see no difference, but it does allow you to always use a context as the sole mechanism for making any decisions and performing any operations. Now without writing all the code, I can't promise it actually will look better, but I think it's likely going to be a lot cleaner. Before you do any changes though... On to what I see as the real problem: fundamentally, if Daniel put Chris in charge of giving the thumbs or down, then you should get Daniel to agree that he will defer to Chris, and you should do whatever Chris says. You need not be caught in the middle of Daniel and Chris - it is a bad place to be (I know from much experience). If Daniel is not okay with that, then he needs to find a different reviewer.
Ok, let´s try to extract something positive out of all this. OPTION A (Ben´s proposal): > I think the only solution for what Chris is asking for is to implement this as 1 > context per engine, as opposed to 1 context with a context object per > engine. As you correctly stated, I think we all agreed the latter was fine when > we met. Functionally, I see no difference, but it does allow you to always use > a context as the sole mechanism for making any decisions and performing > any operations. Now without writing all the code, I can't promise it actually > will look better, but I think it's likely going to be a lot cleaner. Before you do > any changes though... We agreed on this early on (v1), yes, but then the idea was frowned upon by Brad and then by Daniel. I cannot recall exactly why anymore, but one big reason was that the idr mechanism makes it difficult to track several contexts with the same id (and userspace only has one context handle) and something about ctx->hang_stats. From v2 on, we agreed to multiplex different engines inside one intel_context (and that´s why we renamed i915_hw_context to intel_context). OPTION B (Brad´s proposal): > So I suggested that we: > > - Add a back pointer from struct intel_rinbuffer to intel_context (would only > be valid for lrc mode) > - Move the intel_ringbuffer_get(engine, context) calls up to the callers > - Pass (engine, ringbuf) instead of (engine, context) to intel_ring_* functions > - Have the vfunc implemenations get the context from the ringbuffer where > needed and ignore it where not > > Looking again, we could probably add a back pointer to the intel_engine_cs > as well and then just pass around the ringbuf. Sounds fine by me: intel_ringbuffer is only related to exactly one intel_engine_cs and one intel_context, so having pointers to those two makes sense. As before, this could be easily done within the existing code (passing intel_rinbgbuffer instead of intel_engine_cs), but Daniel wants a code split, so I can only do it for the logical ring functions.
> -----Original Message----- > From: Volkin, Bradley D > Sent: Monday, June 23, 2014 8:10 PM > To: Mateo Lozano, Oscar > Cc: Chris Wilson; intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > submission mechanism > > On Mon, Jun 23, 2014 at 07:35:38AM -0700, Mateo Lozano, Oscar wrote: > > > -----Original Message----- > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > Sent: Monday, June 23, 2014 2:42 PM > > > To: Mateo Lozano, Oscar > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > ring submission mechanism > > > > > > On Mon, Jun 23, 2014 at 01:36:07PM +0000, Mateo Lozano, Oscar > wrote: > > > > > -----Original Message----- > > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > > Sent: Monday, June 23, 2014 2:27 PM > > > > > To: Mateo Lozano, Oscar > > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical > > > > > ring submission mechanism > > > > > > > > > > On Mon, Jun 23, 2014 at 01:18:35PM +0000, Mateo Lozano, Oscar > > > wrote: > > > > > > > -----Original Message----- > > > > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > > > > Sent: Monday, June 23, 2014 2:14 PM > > > > > > > To: Mateo Lozano, Oscar > > > > > > > Cc: Volkin, Bradley D; intel-gfx@lists.freedesktop.org > > > > > > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New > > > > > > > logical ring submission mechanism > > > > > > > > > > > > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, > > > > > > > Oscar > > > > > wrote: > > > > > > > > So far, yes, but that´s only because I artificially made > > > > > > > > intel_lrc.c self- > > > > > > > contained, as Daniel requested. What if we need to execute > > > > > > > commands from somewhere else, like in intel_gen7_queue_flip()? > > > > > > > > > > > > > > > > And this takes me to another discussion: this logical ring > > > > > > > > vs legacy ring split > > > > > > > is probably a good idea (time will tell), but we should > > > > > > > provide a way of sending commands for execution without > > > > > > > knowing if Execlists are enabled or not. In the early series > > > > > > > that was easy because we reused the ring_begin, ring_emit & > > > > > > > ring_advance functions, but this is not the case anymore. > > > > > > > And without this, sooner or later somebody will break legacy > > > > > > > or execlists (this already happened last week, when somebody > > > > > > > here was implementing native sync without knowing > > > > > about Execlists). > > > > > > > > > > > > > > > > So, the questions is: how do you feel about a dev_priv.gt > > > > > > > > vfunc that takes a > > > > > > > context, a ring, an array of DWORDS and a BB length and does > > > > > > > the intel_(logical)_ring_begin/emit/advance based on > > > i915.enable_execlists? > > There are 3 cases of non-execbuffer submissions that I can think of: flips, > render state, and clear-buffer (proposed patches on the list). I wonder if the > right approach might be to use batchbuffers with a small wrapper around the > dispatch_execbuffer/emit_bb_start vfuncs. Basically the rule would be to > only touch a ringbuffer from within the intel_engine_cs vfuncs, which always > know which set of functions to use. > > For flips, we could use MMIO flips. Render state already uses the existing > dispatch_execbuffer() and add_request(). The clear code could potentially do > the same. There would obviously be some overhead in using a batch buffer > for what could end up being just a few commands. Perhaps the batch buffer > pool code from the command parser would help though. This has another positive side-effect: the scheduler guys do not like things inside the ring without a proper batchbuffer & request, because it makes their life more complex.
On Tue, Jun 24, 2014 at 04:45:05AM -0700, Mateo Lozano, Oscar wrote: > Ok, let´s try to extract something positive out of all this. > > OPTION A (Ben´s proposal): > > > I think the only solution for what Chris is asking for is to implement this as 1 > > context per engine, as opposed to 1 context with a context object per > > engine. As you correctly stated, I think we all agreed the latter was fine when > > we met. Functionally, I see no difference, but it does allow you to always use > > a context as the sole mechanism for making any decisions and performing > > any operations. Now without writing all the code, I can't promise it actually > > will look better, but I think it's likely going to be a lot cleaner. Before you do > > any changes though... > > We agreed on this early on (v1), yes, but then the idea was frowned upon by Brad and then by Daniel. I cannot recall exactly why anymore, but one big reason was that the idr mechanism makes it difficult to track several contexts with the same id (and userspace only has one context handle) and something about ctx->hang_stats. From v2 on, we agreed to multiplex different engines inside one intel_context (and that´s why we renamed i915_hw_context to intel_context). Yeah, at least for me, the reason was that the multiple structs per context id code felt awkward given that most/all of the fields in a struct intel_context are logically per-context rather than per-engine (vm, hang_stats, etc). It didn't seem like the right approach to me at the time. Brad > > OPTION B (Brad´s proposal): > > > So I suggested that we: > > > > - Add a back pointer from struct intel_rinbuffer to intel_context (would only > > be valid for lrc mode) > > - Move the intel_ringbuffer_get(engine, context) calls up to the callers > > - Pass (engine, ringbuf) instead of (engine, context) to intel_ring_* functions > > - Have the vfunc implemenations get the context from the ringbuffer where > > needed and ignore it where not > > > > Looking again, we could probably add a back pointer to the intel_engine_cs > > as well and then just pass around the ringbuf. > > Sounds fine by me: intel_ringbuffer is only related to exactly one intel_engine_cs and one intel_context, so having pointers to those two makes sense. > As before, this could be easily done within the existing code (passing intel_rinbgbuffer instead of intel_engine_cs), but Daniel wants a code split, so I can only do it for the logical ring functions.
On Mon, 23 Jun 2014 14:13:55 +0100 Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar wrote: > > So far, yes, but that´s only because I artificially made > > intel_lrc.c self-contained, as Daniel requested. What if we need to > > execute commands from somewhere else, like in > > intel_gen7_queue_flip()? > > > > And this takes me to another discussion: this logical ring vs > > legacy ring split is probably a good idea (time will tell), but we > > should provide a way of sending commands for execution without > > knowing if Execlists are enabled or not. In the early series that > > was easy because we reused the ring_begin, ring_emit & ring_advance > > functions, but this is not the case anymore. And without this, > > sooner or later somebody will break legacy or execlists (this > > already happened last week, when somebody here was implementing > > native sync without knowing about Execlists). > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc > > that takes a context, a ring, an array of DWORDS and a BB length > > and does the intel_(logical)_ring_begin/emit/advance based on > > i915.enable_execlists? > > I'm still baffled by the design. intel_ring_begin() and friends should > be able to find their context (logical or legacy) from the ring and > dtrt. To me, given that the LRC contains the ring, the right thing to do would be to do what Oscar did in the first place: pass the context around everywhere. If there were cases where that wasn't ideal (the layering violations you mention later?) we should fix them up instead. But given that this is a standalone file, it's easy to fix up however we want incrementally, as long as things work well to begin with and it's reasonably easy to review it... Jesse
> -----Original Message----- > From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org] > Sent: Tuesday, June 24, 2014 6:20 PM > To: Chris Wilson > Cc: Mateo Lozano, Oscar; intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > submission mechanism > > On Mon, 23 Jun 2014 14:13:55 +0100 > Chris Wilson <chris@chris-wilson.co.uk> wrote: > > > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar wrote: > > > So far, yes, but that´s only because I artificially made intel_lrc.c > > > self-contained, as Daniel requested. What if we need to execute > > > commands from somewhere else, like in intel_gen7_queue_flip()? > > > > > > And this takes me to another discussion: this logical ring vs legacy > > > ring split is probably a good idea (time will tell), but we should > > > provide a way of sending commands for execution without knowing if > > > Execlists are enabled or not. In the early series that was easy > > > because we reused the ring_begin, ring_emit & ring_advance > > > functions, but this is not the case anymore. And without this, > > > sooner or later somebody will break legacy or execlists (this > > > already happened last week, when somebody here was implementing > > > native sync without knowing about Execlists). > > > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc that > > > takes a context, a ring, an array of DWORDS and a BB length and does > > > the intel_(logical)_ring_begin/emit/advance based on > > > i915.enable_execlists? > > > > I'm still baffled by the design. intel_ring_begin() and friends should > > be able to find their context (logical or legacy) from the ring and > > dtrt. > > To me, given that the LRC contains the ring, the right thing to do would be to > do what Oscar did in the first place: pass the context around everywhere. If > there were cases where that wasn't ideal (the layering violations you > mention later?) we should fix them up instead. > > But given that this is a standalone file, it's easy to fix up however we want > incrementally, as long as things work well to begin with and it's reasonably > easy to review it... Hi Jesse, I spoke with Chris on IRC, and he told me that he is currently rewriting this stuff to show (and prove) what his approach is. In the meantime, I´ll keep working on the review comments I have received from Brad and Daniel and I´ll also send some early patches with prep-work that (hopefully) can be merged without too much fuss. -- Oscar
On Tue, Jun 24, 2014 at 12:29:45PM +0000, Mateo Lozano, Oscar wrote: > > -----Original Message----- > > From: Volkin, Bradley D > > Sent: Monday, June 23, 2014 8:10 PM > > To: Mateo Lozano, Oscar > > Cc: Chris Wilson; intel-gfx@lists.freedesktop.org > > Subject: Re: [Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring > > submission mechanism > > There are 3 cases of non-execbuffer submissions that I can think of: flips, > > render state, and clear-buffer (proposed patches on the list). I wonder if the > > right approach might be to use batchbuffers with a small wrapper around the > > dispatch_execbuffer/emit_bb_start vfuncs. Basically the rule would be to > > only touch a ringbuffer from within the intel_engine_cs vfuncs, which always > > know which set of functions to use. > > > > For flips, we could use MMIO flips. Render state already uses the existing > > dispatch_execbuffer() and add_request(). The clear code could potentially do > > the same. There would obviously be some overhead in using a batch buffer > > for what could end up being just a few commands. Perhaps the batch buffer > > pool code from the command parser would help though. > > This has another positive side-effect: the scheduler guys do not like > things inside the ring without a proper batchbuffer & request, because > it makes their life more complex. I'm probably missing all the context here but I've thought this is the plan forward: We'll use mmio flips with execlists and otherwise we'll submit everything with the right context/engine whaterever using execlist-specific functions. Since the gpu clear code isn't merged yet we can imo ignore it for now and merge execlists first. -Daniel
On Mon, Jun 23, 2014 at 02:13:55PM +0100, Chris Wilson wrote: > On Mon, Jun 23, 2014 at 01:09:37PM +0000, Mateo Lozano, Oscar wrote: > > So far, yes, but that´s only because I artificially made intel_lrc.c self-contained, as Daniel requested. What if we need to execute commands from somewhere else, like in intel_gen7_queue_flip()? > > > > And this takes me to another discussion: this logical ring vs legacy ring split is probably a good idea (time will tell), but we should provide a way of sending commands for execution without knowing if Execlists are enabled or not. In the early series that was easy because we reused the ring_begin, ring_emit & ring_advance functions, but this is not the case anymore. And without this, sooner or later somebody will break legacy or execlists (this already happened last week, when somebody here was implementing native sync without knowing about Execlists). > > > > So, the questions is: how do you feel about a dev_priv.gt vfunc that takes a context, a ring, an array of DWORDS and a BB length and does the intel_(logical)_ring_begin/emit/advance based on i915.enable_execlists? > > I'm still baffled by the design. intel_ring_begin() and friends should > be able to find their context (logical or legacy) from the ring and > dtrt. Well I'm opting for a the different approach of presuming that the callers knows whether we're running with execlists or legacy rings and so will have a clean (and full) split. If we really need to submit massive amounts of cs commands from the kernel we should launch that as a batch, which should be fairly unform for both legacy ring and execlists mode. -Daniel
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 02fc3d0..89aed7a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -86,6 +86,220 @@ bool intel_enable_execlists(struct drm_device *dev) return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); } +static inline struct intel_ringbuffer * +logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx) +{ + return ctx->engine[ring->id].ringbuf; +} + +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, + struct intel_context *ctx) +{ + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); + + intel_logical_ring_advance(ringbuf); + + if (intel_ring_stopped(ring)) + return; + + ring->submit_ctx(ring, ctx, ringbuf->tail); +} + +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, + struct intel_context *ctx) +{ + if (ring->outstanding_lazy_seqno) + return 0; + + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); +} + +static int logical_ring_wait_request(struct intel_engine_cs *ring, + struct intel_ringbuffer *ringbuf, + struct intel_context *ctx, + int bytes) +{ + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + if (ringbuf->last_retired_head != -1) { + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= bytes) { + seqno = request->seqno; + break; + } + } + + if (seqno == 0) + return -ENOSPC; + + ret = i915_wait_seqno(ring, seqno); + if (ret) + return ret; + + /* TODO: make sure we update the right ringbuffer's last_retired_head + * when retiring requests */ + i915_gem_retire_requests_ring(ring); + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + return 0; +} + +static int logical_ring_wait_for_space(struct intel_engine_cs *ring, + struct intel_ringbuffer *ringbuf, + struct intel_context *ctx, + int bytes) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long end; + int ret; + + ret = logical_ring_wait_request(ring, ringbuf, ctx, bytes); + if (ret != -ENOSPC) + return ret; + + /* Force the context submission in case we have been skipping it */ + intel_logical_ring_advance_and_submit(ring, ctx); + + /* With GEM the hangcheck timer should kick us out of the loop, + * leaving it early runs the risk of corrupting GEM state (due + * to running on almost untested codepaths). But on resume + * timers don't work yet, so prevent a complete hang in that + * case by choosing an insanely large timeout. */ + end = jiffies + 60 * HZ; + + do { + ringbuf->head = I915_READ_HEAD(ring); + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) { + ret = 0; + break; + } + + if (!drm_core_check_feature(dev, DRIVER_MODESET) && + dev->primary->master) { + struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; + if (master_priv->sarea_priv) + master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; + } + + msleep(1); + + if (dev_priv->mm.interruptible && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + break; + + if (time_after(jiffies, end)) { + ret = -EBUSY; + break; + } + } while (1); + + return ret; +} + +static int logical_ring_wrap_buffer(struct intel_engine_cs *ring, + struct intel_ringbuffer *ringbuf, + struct intel_context *ctx) +{ + uint32_t __iomem *virt; + int rem = ringbuf->size - ringbuf->tail; + + if (ringbuf->space < rem) { + int ret = logical_ring_wait_for_space(ring, ringbuf, ctx, rem); + if (ret) + return ret; + } + + virt = ringbuf->virtual_start + ringbuf->tail; + rem /= 4; + while (rem--) + iowrite32(MI_NOOP, virt++); + + ringbuf->tail = 0; + ringbuf->space = intel_ring_space(ringbuf); + + return 0; +} + +static int logical_ring_prepare(struct intel_engine_cs *ring, + struct intel_ringbuffer *ringbuf, + struct intel_context *ctx, + int bytes) +{ + int ret; + + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { + ret = logical_ring_wrap_buffer(ring, ringbuf, ctx); + if (unlikely(ret)) + return ret; + } + + if (unlikely(ringbuf->space < bytes)) { + ret = logical_ring_wait_for_space(ring, ringbuf, ctx, bytes); + if (unlikely(ret)) + return ret; + } + + return 0; +} + +int intel_logical_ring_begin(struct intel_engine_cs *ring, + struct intel_context *ctx, + int num_dwords) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); + int ret; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + return ret; + + ret = logical_ring_prepare(ring, ringbuf, ctx, + num_dwords * sizeof(uint32_t)); + if (ret) + return ret; + + /* Preallocate the olr before touching the ring */ + ret = logical_ring_alloc_seqno(ring, ctx); + if (ret) + return ret; + + ringbuf->space -= num_dwords * sizeof(uint32_t); + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 26b0949..686ebf5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -5,6 +5,24 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, + struct intel_context *ctx); + +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) +{ + ringbuf->tail &= ringbuf->size - 1; +} + +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) +{ + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); + ringbuf->tail += 4; +} + +int intel_logical_ring_begin(struct intel_engine_cs *ring, + struct intel_context *ctx, + int num_dwords); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx,