Message ID | 1435932578-32209-7-git-send-email-mika.kuoppala@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/03/2015 07:09 AM, Mika Kuoppala wrote: > Pass around requests to carry context deeper in callchain. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> > --- > drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++-------- > 1 file changed, 6 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index c3c029a..67ff460 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) > return lrca >> 12; > } > > -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, > - struct drm_i915_gem_object *ctx_obj) > +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq) > { GuC patch series tries to utilize this function. However, changing from ring/context to request makes it unusable in the case where request is not needed (not available). This context descriptor has nothing to do with drm_i915_gem_request IMO. And it is waste of time to call it for each batch submission. This value is fixed when context is pinned. Maybe add a member ctx_desc into intel_context->engine; initialize the value within intel_lr_context_pin; then use it whenever needed. Thanks, Alex > + struct intel_engine_cs *ring = rq->ring; > struct drm_device *dev = ring->dev; > + struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; > uint64_t desc; > uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); > > @@ -299,21 +300,18 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, > struct intel_engine_cs *ring = rq0->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > - struct drm_i915_gem_object *ctx_obj0 = rq0->ctx->engine[ring->id].state; > - struct drm_i915_gem_object *ctx_obj1 = rq1 ? > - rq1->ctx->engine[ring->id].state : NULL; > uint64_t temp = 0; > uint32_t desc[4]; > > /* XXX: You must always write both descriptors in the order below. */ > - if (ctx_obj1) > - temp = execlists_ctx_descriptor(ring, ctx_obj1); > + if (rq1) > + temp = execlists_ctx_descriptor(rq1); > else > temp = 0; > desc[1] = (u32)(temp >> 32); > desc[0] = (u32)temp; > > - temp = execlists_ctx_descriptor(ring, ctx_obj0); > + temp = execlists_ctx_descriptor(rq0); > desc[3] = (u32)(temp >> 32); > desc[2] = (u32)temp; >
On 08/07/15 17:40, Yu Dai wrote: > On 07/03/2015 07:09 AM, Mika Kuoppala wrote: >> Pass around requests to carry context deeper in callchain. >> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> >> --- >> drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++-------- >> 1 file changed, 6 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index c3c029a..67ff460 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct >> drm_i915_gem_object *ctx_obj) >> return lrca >> 12; >> } >> -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, >> - struct drm_i915_gem_object *ctx_obj) >> +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request >> *rq) >> { > > GuC patch series tries to utilize this function. However, changing from > ring/context to request makes it unusable in the case where request is > not needed (not available). This context descriptor has nothing to do > with drm_i915_gem_request IMO. And it is waste of time to call it for > each batch submission. This value is fixed when context is pinned. Maybe > add a member ctx_desc into intel_context->engine; initialize the value > within intel_lr_context_pin; then use it whenever needed. > > Thanks, > Alex Yes, I've effectively reversed this in the next GuC submission series, since we don't have a request during setup, and this function doesn't actually need one either; it's only being used as a handle for extracting the context and ring. So in my version it will take an intel_context and a ring so it's up to the caller to extract those from the drm_i915_gem_request (rq->ctx) and (rq->ring) and pass them separately. Then the GuC can use it during setup as well as at runtime :) .Dave.
On Wed Jul 08 2015 at 18:04:44 +0100, Dave Gordon wrote: > On 08/07/15 17:40, Yu Dai wrote: > >On 07/03/2015 07:09 AM, Mika Kuoppala wrote: > >>Pass around requests to carry context deeper in callchain. > >> > >>Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> > >>--- > >> drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++-------- > >> 1 file changed, 6 insertions(+), 8 deletions(-) > >> > >>diff --git a/drivers/gpu/drm/i915/intel_lrc.c > >>b/drivers/gpu/drm/i915/intel_lrc.c > >>index c3c029a..67ff460 100644 > >>--- a/drivers/gpu/drm/i915/intel_lrc.c > >>+++ b/drivers/gpu/drm/i915/intel_lrc.c > >>@@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct > >>drm_i915_gem_object *ctx_obj) > >> return lrca >> 12; > >> } > >>-static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, > >>- struct drm_i915_gem_object *ctx_obj) > >>+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request > >>*rq) > >> { > > > >GuC patch series tries to utilize this function. However, changing from > >ring/context to request makes it unusable in the case where request is > >not needed (not available). This context descriptor has nothing to do > >with drm_i915_gem_request IMO. And it is waste of time to call it for > >each batch submission. This value is fixed when context is pinned. Maybe > >add a member ctx_desc into intel_context->engine; initialize the value > >within intel_lr_context_pin; then use it whenever needed. > > > >Thanks, > >Alex > > Yes, I've effectively reversed this in the next GuC submission > series, since we don't have a request during setup, and this > function doesn't actually need one either; it's only being used as a > handle for extracting the context and ring. > > So in my version it will take an intel_context and a ring so it's up > to the caller to extract those from the drm_i915_gem_request > (rq->ctx) and (rq->ring) and pass them separately. Then the GuC can > use it during setup as well as at runtime :) > I admit I went one step too far on this one in the series. The bspec says the context ids created should be unique across all engines and all execlists. I was concerned that our lrca only setup and resubmission for pre-emption would break this rule. (and be the reason for skl hickups) The bspec has a note about creating context id with ring/counter(seqno)/lrca combination. Thus the request -> descriptor creation. I didn't post that patch as it didn't cure the skl hang. But it made debugging the context status queue easier as seqnos were part of context_id. -Mika
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c3c029a..67ff460 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -261,10 +261,11 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) return lrca >> 12; } -static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj) +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq) { + struct intel_engine_cs *ring = rq->ring; struct drm_device *dev = ring->dev; + struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; uint64_t desc; uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); @@ -299,21 +300,18 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, struct intel_engine_cs *ring = rq0->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *ctx_obj0 = rq0->ctx->engine[ring->id].state; - struct drm_i915_gem_object *ctx_obj1 = rq1 ? - rq1->ctx->engine[ring->id].state : NULL; uint64_t temp = 0; uint32_t desc[4]; /* XXX: You must always write both descriptors in the order below. */ - if (ctx_obj1) - temp = execlists_ctx_descriptor(ring, ctx_obj1); + if (rq1) + temp = execlists_ctx_descriptor(rq1); else temp = 0; desc[1] = (u32)(temp >> 32); desc[0] = (u32)temp; - temp = execlists_ctx_descriptor(ring, ctx_obj0); + temp = execlists_ctx_descriptor(rq0); desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp;
Pass around requests to carry context deeper in callchain. Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)