Message ID | 20190705131642.9246-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] drm/i915/oa: Reconfigure contexts on the fly | expand |
Looks good, probably best to have someone more familiar with the i915 codebase look at it too. Thanks a bunch! Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> On 05/07/2019 16:16, Chris Wilson wrote: > Avoid a global idle barrier by reconfiguring each context by rewriting > them with MI_STORE_DWORD from the kernel context. > > v2: We only need to determine the desired register values once, they are > the same for all contexts. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 + > drivers/gpu/drm/i915/gt/intel_lrc.c | 7 +- > drivers/gpu/drm/i915/i915_perf.c | 244 +++++++++++++++----- > 3 files changed, 190 insertions(+), 63 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index e367dce2a696..1f0d10bb88c1 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -624,7 +624,9 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) > ctx->sched.priority = I915_USER_PRIORITY(prio); > ctx->ring_size = PAGE_SIZE; > > + /* Isolate the kernel context from prying eyes and sticky fingers */ > GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); > + list_del_init(&ctx->link); > > return ctx; > } > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index e1ae1399c72b..9cc5374401e1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1552,9 +1552,12 @@ __execlists_update_reg_state(struct intel_context *ce, > regs[CTX_RING_TAIL + 1] = ring->tail; > > /* RPCS */ > - if (engine->class == RENDER_CLASS) > + if (engine->class == RENDER_CLASS) { > regs[CTX_R_PWR_CLK_STATE + 1] = > intel_sseu_make_rpcs(engine->i915, &ce->sseu); > + > + i915_oa_init_reg_state(engine, ce, regs); > + } > } > > static int > @@ -2966,8 +2969,6 @@ static void execlists_init_reg_state(u32 *regs, > if (rcs) { > regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); > - > - i915_oa_init_reg_state(engine, ce, regs); > } > > regs[CTX_END] = MI_BATCH_BUFFER_END; > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 357e63beb373..8353589ee31b 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1630,6 +1630,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) > ~GT_NOA_ENABLE)); > } > > +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, > + i915_reg_t reg) > +{ > + u32 mmio = i915_mmio_reg_offset(reg); > + int i; > + > + /* > + * This arbitrary default will select the 'EU FPU0 Pipeline > + * Active' event. In the future it's anticipated that there > + * will be an explicit 'No Event' we can select, but not yet... > + */ > + if (!oa_config) > + return 0; > + > + for (i = 0; i < oa_config->flex_regs_len; i++) { > + if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) > + return oa_config->flex_regs[i].value; > + } > + > + return 0; > +} > /* > * NB: It must always remain pointer safe to run this even if the OA unit > * has been disabled. > @@ -1663,28 +1684,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, > GEN8_OA_COUNTER_RESUME); > > for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { > - u32 state_offset = ctx_flexeu0 + i * 2; > - u32 mmio = i915_mmio_reg_offset(flex_regs[i]); > - > - /* > - * This arbitrary default will select the 'EU FPU0 Pipeline > - * Active' event. In the future it's anticipated that there > - * will be an explicit 'No Event' we can select, but not yet... > - */ > - u32 value = 0; > - > - if (oa_config) { > - u32 j; > - > - for (j = 0; j < oa_config->flex_regs_len; j++) { > - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { > - value = oa_config->flex_regs[j].value; > - break; > - } > - } > - } > - > - CTX_REG(reg_state, state_offset, flex_regs[i], value); > + CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], > + oa_config_flex_reg(oa_config, flex_regs[i])); > } > > CTX_REG(reg_state, > @@ -1692,6 +1693,107 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, > intel_sseu_make_rpcs(i915, &ce->sseu)); > } > > +struct flex { > + i915_reg_t reg; > + u32 offset; > + u32 value; > +}; > + > +static int > +gen8_store_flex(struct i915_request *rq, > + struct intel_context *ce, > + const struct flex *flex, unsigned int count) > +{ > + u32 offset; > + u32 *cs; > + > + cs = intel_ring_begin(rq, 4 * count); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; > + do { > + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; > + *cs++ = offset + (flex->offset + 1) * sizeof(u32); > + *cs++ = 0; > + *cs++ = flex->value; > + } while (flex++, --count); > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} > + > +static int > +gen8_load_flex(struct i915_request *rq, > + struct intel_context *ce, > + const struct flex *flex, unsigned int count) > +{ > + u32 *cs; > + > + GEM_BUG_ON(!count || count > 63); > + > + cs = intel_ring_begin(rq, 2 * count + 2); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + *cs++ = MI_LOAD_REGISTER_IMM(count); > + do { > + *cs++ = i915_mmio_reg_offset(flex->reg); > + *cs++ = flex->value; > + } while (flex++, --count); > + *cs++ = MI_NOOP; > + > + intel_ring_advance(rq, cs); > + > + return 0; > +} > + > +static int gen8_modify_context(struct intel_context *ce, > + const struct flex *flex, unsigned int count) > +{ > + struct i915_request *rq; > + int err; > + > + lockdep_assert_held(&ce->pin_mutex); > + > + rq = i915_request_create(ce->engine->kernel_context); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + /* Serialise with the remote context */ > + err = i915_active_request_set(&ce->ring->timeline->last_request, rq); > + if (err) > + goto out_add; > + > + /* Keep the remote context alive until after we finish editing */ > + err = i915_active_ref(&ce->active, rq->fence.context, rq); > + if (err) > + goto out_add; > + > + err = gen8_store_flex(rq, ce, flex, count); > + > +out_add: > + i915_request_add(rq); > + return err; > +} > + > +static int gen8_modify_self(struct intel_context *ce, > + const struct flex *flex, unsigned int count) > +{ > + struct i915_request *rq; > + int err; > + > + rq = i915_request_create(ce); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + err = gen8_load_flex(rq, ce, flex, count); > + > + i915_request_add(rq); > + return err; > +} > + > /* > * Manages updating the per-context aspects of the OA stream > * configuration across all contexts. > @@ -1716,15 +1818,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, > * > * Note: it's only the RCS/Render context that has any OA state. > */ > -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > +static int gen8_configure_all_contexts(struct drm_i915_private *i915, > const struct i915_oa_config *oa_config) > { > - unsigned int map_type = i915_coherent_map_type(dev_priv); > + /* The MMIO offsets for Flex EU registers aren't contiguous */ > + const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset; > +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) > + struct flex regs[] = { > + { > + GEN8_R_PWR_CLK_STATE, > + CTX_R_PWR_CLK_STATE, > + }, > + { > + GEN8_OACTXCONTROL, > + i915->perf.oa.ctx_oactxctrl_offset, > + ((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > + (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | > + GEN8_OA_COUNTER_RESUME) > + }, > + { EU_PERF_CNTL0, ctx_flexeuN(0) }, > + { EU_PERF_CNTL1, ctx_flexeuN(1) }, > + { EU_PERF_CNTL2, ctx_flexeuN(2) }, > + { EU_PERF_CNTL3, ctx_flexeuN(3) }, > + { EU_PERF_CNTL4, ctx_flexeuN(4) }, > + { EU_PERF_CNTL5, ctx_flexeuN(5) }, > + { EU_PERF_CNTL6, ctx_flexeuN(6) }, > + }; > +#undef ctx_flexeuN > + struct intel_engine_cs *engine; > struct i915_gem_context *ctx; > - struct i915_request *rq; > - int ret; > + enum intel_engine_id id; > + int err; > + int i; > > - lockdep_assert_held(&dev_priv->drm.struct_mutex); > + for (i = 2; i < ARRAY_SIZE(regs); i++) > + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); > + > + lockdep_assert_held(&i915->drm.struct_mutex); > > /* > * The OA register config is setup through the context image. This image > @@ -1735,59 +1865,53 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > * We could emit the OA register config through the batch buffer but > * this might leave small interval of time where the OA unit is > * configured at an invalid sampling period. > - * > - * So far the best way to work around this issue seems to be draining > - * the GPU from any submitted work. > */ > - ret = i915_gem_wait_for_idle(dev_priv, > - I915_WAIT_LOCKED, > - MAX_SCHEDULE_TIMEOUT); > - if (ret) > - return ret; > - > - /* Update all contexts now that we've stalled the submission. */ > - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { > + list_for_each_entry(ctx, &i915->contexts.list, link) { > struct i915_gem_engines_iter it; > struct intel_context *ce; > > for_each_gem_engine(ce, > i915_gem_context_lock_engines(ctx), > it) { > - u32 *regs; > - > if (ce->engine->class != RENDER_CLASS) > continue; > > - /* OA settings will be set upon first use */ > - if (!ce->state) > - continue; > - > - regs = i915_gem_object_pin_map(ce->state->obj, > - map_type); > - if (IS_ERR(regs)) { > - i915_gem_context_unlock_engines(ctx); > - return PTR_ERR(regs); > - } > + err = intel_context_lock_pinned(ce); > + if (err) > + break; > > - ce->state->obj->mm.dirty = true; > - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); > + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); > > - gen8_update_reg_state_unlocked(ce, regs, oa_config); > + /* Otherwise OA settings will be set upon first use */ > + if (intel_context_is_pinned(ce)) > + err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs)); > > - i915_gem_object_unpin_map(ce->state->obj); > + intel_context_unlock_pinned(ce); > + if (err) > + break; > } > i915_gem_context_unlock_engines(ctx); > + if (err) > + return err; > } > > /* > - * Apply the configuration by doing one context restore of the edited > - * context image. > + * After updating all other contexts, we need to modify ourselves. > + * If we don't modify the kernel_context, we do not get events while > + * idle. > */ > - rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); > - if (IS_ERR(rq)) > - return PTR_ERR(rq); > + for_each_engine(engine, i915, id) { > + struct intel_context *ce = engine->kernel_context; > > - i915_request_add(rq); > + if (engine->class != RENDER_CLASS) > + continue; > + > + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); > + > + err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); > + if (err) > + return err; > + } > > return 0; > }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e367dce2a696..1f0d10bb88c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -624,7 +624,9 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) ctx->sched.priority = I915_USER_PRIORITY(prio); ctx->ring_size = PAGE_SIZE; + /* Isolate the kernel context from prying eyes and sticky fingers */ GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + list_del_init(&ctx->link); return ctx; } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e1ae1399c72b..9cc5374401e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1552,9 +1552,12 @@ __execlists_update_reg_state(struct intel_context *ce, regs[CTX_RING_TAIL + 1] = ring->tail; /* RPCS */ - if (engine->class == RENDER_CLASS) + if (engine->class == RENDER_CLASS) { regs[CTX_R_PWR_CLK_STATE + 1] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); + + i915_oa_init_reg_state(engine, ce, regs); + } } static int @@ -2966,8 +2969,6 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - - i915_oa_init_reg_state(engine, ce, regs); } regs[CTX_END] = MI_BATCH_BUFFER_END; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 357e63beb373..8353589ee31b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1630,6 +1630,27 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) ~GT_NOA_ENABLE)); } +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, + i915_reg_t reg) +{ + u32 mmio = i915_mmio_reg_offset(reg); + int i; + + /* + * This arbitrary default will select the 'EU FPU0 Pipeline + * Active' event. In the future it's anticipated that there + * will be an explicit 'No Event' we can select, but not yet... + */ + if (!oa_config) + return 0; + + for (i = 0; i < oa_config->flex_regs_len; i++) { + if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) + return oa_config->flex_regs[i].value; + } + + return 0; +} /* * NB: It must always remain pointer safe to run this even if the OA unit * has been disabled. @@ -1663,28 +1684,8 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, GEN8_OA_COUNTER_RESUME); for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { - u32 state_offset = ctx_flexeu0 + i * 2; - u32 mmio = i915_mmio_reg_offset(flex_regs[i]); - - /* - * This arbitrary default will select the 'EU FPU0 Pipeline - * Active' event. In the future it's anticipated that there - * will be an explicit 'No Event' we can select, but not yet... - */ - u32 value = 0; - - if (oa_config) { - u32 j; - - for (j = 0; j < oa_config->flex_regs_len; j++) { - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { - value = oa_config->flex_regs[j].value; - break; - } - } - } - - CTX_REG(reg_state, state_offset, flex_regs[i], value); + CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], + oa_config_flex_reg(oa_config, flex_regs[i])); } CTX_REG(reg_state, @@ -1692,6 +1693,107 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, intel_sseu_make_rpcs(i915, &ce->sseu)); } +struct flex { + i915_reg_t reg; + u32 offset; + u32 value; +}; + +static int +gen8_store_flex(struct i915_request *rq, + struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + u32 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4 * count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + do { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + (flex->offset + 1) * sizeof(u32); + *cs++ = 0; + *cs++ = flex->value; + } while (flex++, --count); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_load_flex(struct i915_request *rq, + struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + u32 *cs; + + GEM_BUG_ON(!count || count > 63); + + cs = intel_ring_begin(rq, 2 * count + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(count); + do { + *cs++ = i915_mmio_reg_offset(flex->reg); + *cs++ = flex->value; + } while (flex++, --count); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen8_modify_context(struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&ce->pin_mutex); + + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* Serialise with the remote context */ + err = i915_active_request_set(&ce->ring->timeline->last_request, rq); + if (err) + goto out_add; + + /* Keep the remote context alive until after we finish editing */ + err = i915_active_ref(&ce->active, rq->fence.context, rq); + if (err) + goto out_add; + + err = gen8_store_flex(rq, ce, flex, count); + +out_add: + i915_request_add(rq); + return err; +} + +static int gen8_modify_self(struct intel_context *ce, + const struct flex *flex, unsigned int count) +{ + struct i915_request *rq; + int err; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + err = gen8_load_flex(rq, ce, flex, count); + + i915_request_add(rq); + return err; +} + /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -1716,15 +1818,43 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, * * Note: it's only the RCS/Render context that has any OA state. */ -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, +static int gen8_configure_all_contexts(struct drm_i915_private *i915, const struct i915_oa_config *oa_config) { - unsigned int map_type = i915_coherent_map_type(dev_priv); + /* The MMIO offsets for Flex EU registers aren't contiguous */ + const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset; +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) + struct flex regs[] = { + { + GEN8_R_PWR_CLK_STATE, + CTX_R_PWR_CLK_STATE, + }, + { + GEN8_OACTXCONTROL, + i915->perf.oa.ctx_oactxctrl_offset, + ((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME) + }, + { EU_PERF_CNTL0, ctx_flexeuN(0) }, + { EU_PERF_CNTL1, ctx_flexeuN(1) }, + { EU_PERF_CNTL2, ctx_flexeuN(2) }, + { EU_PERF_CNTL3, ctx_flexeuN(3) }, + { EU_PERF_CNTL4, ctx_flexeuN(4) }, + { EU_PERF_CNTL5, ctx_flexeuN(5) }, + { EU_PERF_CNTL6, ctx_flexeuN(6) }, + }; +#undef ctx_flexeuN + struct intel_engine_cs *engine; struct i915_gem_context *ctx; - struct i915_request *rq; - int ret; + enum intel_engine_id id; + int err; + int i; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + for (i = 2; i < ARRAY_SIZE(regs); i++) + regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); + + lockdep_assert_held(&i915->drm.struct_mutex); /* * The OA register config is setup through the context image. This image @@ -1735,59 +1865,53 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * We could emit the OA register config through the batch buffer but * this might leave small interval of time where the OA unit is * configured at an invalid sampling period. - * - * So far the best way to work around this issue seems to be draining - * the GPU from any submitted work. */ - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - /* Update all contexts now that we've stalled the submission. */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + list_for_each_entry(ctx, &i915->contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - u32 *regs; - if (ce->engine->class != RENDER_CLASS) continue; - /* OA settings will be set upon first use */ - if (!ce->state) - continue; - - regs = i915_gem_object_pin_map(ce->state->obj, - map_type); - if (IS_ERR(regs)) { - i915_gem_context_unlock_engines(ctx); - return PTR_ERR(regs); - } + err = intel_context_lock_pinned(ce); + if (err) + break; - ce->state->obj->mm.dirty = true; - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - gen8_update_reg_state_unlocked(ce, regs, oa_config); + /* Otherwise OA settings will be set upon first use */ + if (intel_context_is_pinned(ce)) + err = gen8_modify_context(ce, regs, ARRAY_SIZE(regs)); - i915_gem_object_unpin_map(ce->state->obj); + intel_context_unlock_pinned(ce); + if (err) + break; } i915_gem_context_unlock_engines(ctx); + if (err) + return err; } /* - * Apply the configuration by doing one context restore of the edited - * context image. + * After updating all other contexts, we need to modify ourselves. + * If we don't modify the kernel_context, we do not get events while + * idle. */ - rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); + for_each_engine(engine, i915, id) { + struct intel_context *ce = engine->kernel_context; - i915_request_add(rq); + if (engine->class != RENDER_CLASS) + continue; + + regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); + + err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); + if (err) + return err; + } return 0; }
Avoid a global idle barrier by reconfiguring each context by rewriting them with MI_STORE_DWORD from the kernel context. v2: We only need to determine the desired register values once, they are the same for all contexts. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 + drivers/gpu/drm/i915/gt/intel_lrc.c | 7 +- drivers/gpu/drm/i915/i915_perf.c | 244 +++++++++++++++----- 3 files changed, 190 insertions(+), 63 deletions(-)