Message ID | 20200327112212.16046-3-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] drm/i915: Allow for different modes of interruptible i915_active_wait | expand |
On 27/03/2020 13:22, Chris Wilson wrote: > We wish that the scheduler emit the context modification commands prior > to enabling the oa_config, for which we must explicitly inform it of the > ordering constraints. This is especially important as we now wait for > the final oa_config setup to be completed and as this wait may be on a > distinct context to the state modifications, we need that command packet > to be always last in the queue. > > We borrow the i915_active for its ability to track multiple timelines > and the last dma_fence on each; a flexible dma_resv. Keeping track of > each dma_fence is important for us so that we can efficiently schedule > the requests and reprioritise as required. > > Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 154 ++++++++++++++++--------- > drivers/gpu/drm/i915/i915_perf_types.h | 5 +- > 2 files changed, 102 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 3222f6cd8255..faf4b0970775 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1961,10 +1961,11 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) > return i915_vma_get(oa_bo->vma); > } > > -static struct i915_request * > +static int > emit_oa_config(struct i915_perf_stream *stream, > struct i915_oa_config *oa_config, > - struct intel_context *ce) > + struct intel_context *ce, > + struct i915_active *active) > { > struct i915_request *rq; > struct i915_vma *vma; > @@ -1972,7 +1973,7 @@ emit_oa_config(struct i915_perf_stream *stream, > > vma = get_oa_vma(stream, oa_config); > if (IS_ERR(vma)) > - return ERR_CAST(vma); > + return PTR_ERR(vma); > > err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > if (err) > @@ -1986,6 +1987,18 @@ emit_oa_config(struct i915_perf_stream *stream, > goto err_vma_unpin; > } > > + if (!IS_ERR_OR_NULL(active)) { > + /* After all individual context modifications */ > + err = i915_request_await_active(rq, active, > + I915_ACTIVE_AWAIT_ALL); > + if (err) > + goto err_add_request; > + > + err = i915_active_add_request(active, rq); > + if (err) > + goto err_add_request; > + } > + > i915_vma_lock(vma); > err = i915_request_await_object(rq, vma->obj, 0); > if (!err) > @@ -2000,14 +2013,13 @@ emit_oa_config(struct i915_perf_stream *stream, > if (err) > goto err_add_request; > > - i915_request_get(rq); > err_add_request: > i915_request_add(rq); > err_vma_unpin: > i915_vma_unpin(vma); > err_vma_put: > i915_vma_put(vma); > - return err ? ERR_PTR(err) : rq; > + return err; > } > > static struct intel_context *oa_context(struct i915_perf_stream *stream) > @@ -2015,8 +2027,9 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream) > return stream->pinned_ctx ?: stream->engine->kernel_context; > } > > -static struct i915_request * > -hsw_enable_metric_set(struct i915_perf_stream *stream) > +static int > +hsw_enable_metric_set(struct i915_perf_stream *stream, > + struct i915_active *active) > { > struct intel_uncore *uncore = stream->uncore; > > @@ -2035,7 +2048,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream) > intel_uncore_rmw(uncore, GEN6_UCGCTL1, > 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); > > - return emit_oa_config(stream, stream->oa_config, oa_context(stream)); > + return emit_oa_config(stream, > + stream->oa_config, oa_context(stream), > + active); > } > > static void hsw_disable_metric_set(struct i915_perf_stream *stream) > @@ -2182,8 +2197,10 @@ static int gen8_modify_context(struct intel_context *ce, > return err; > } > > -static int gen8_modify_self(struct intel_context *ce, > - const struct flex *flex, unsigned int count) > +static int > +gen8_modify_self(struct intel_context *ce, > + const struct flex *flex, unsigned int count, > + struct i915_active *active) > { > struct i915_request *rq; > int err; > @@ -2194,8 +2211,17 @@ static int gen8_modify_self(struct intel_context *ce, > if (IS_ERR(rq)) > return PTR_ERR(rq); > > + if (!IS_ERR_OR_NULL(active)) { > + err = i915_active_add_request(active, rq); > + if (err) > + goto err_add_request; > + } > + > err = gen8_load_flex(rq, ce, flex, count); > + if (err) > + goto err_add_request; > > +err_add_request: > i915_request_add(rq); > return err; > } > @@ -2229,7 +2255,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, > return err; > } > > -static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) > +static int gen12_configure_oar_context(struct i915_perf_stream *stream, > + struct i915_active *active) > { > int err; > struct intel_context *ce = stream->pinned_ctx; > @@ -2238,7 +2265,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena > { > GEN8_OACTXCONTROL, > stream->perf->ctx_oactxctrl_offset + 1, > - enable ? GEN8_OA_COUNTER_RESUME : 0, > + active ? GEN8_OA_COUNTER_RESUME : 0, > }, > }; > /* Offsets in regs_lri are not used since this configuration is only > @@ -2250,13 +2277,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena > GEN12_OAR_OACONTROL, > GEN12_OAR_OACONTROL_OFFSET + 1, > (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | > - (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) > + (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) > }, > { > RING_CONTEXT_CONTROL(ce->engine->mmio_base), > CTX_CONTEXT_CONTROL, > _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, > - enable ? > + active ? > GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : > 0) > }, > @@ -2273,7 +2300,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena > return err; > > /* Apply regs_lri using LRI with pinned context */ > - return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); > + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active); > } > > /* > @@ -2301,9 +2328,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena > * Note: it's only the RCS/Render context that has any OA state. > * Note: the first flex register passed must always be R_PWR_CLK_STATE > */ > -static int oa_configure_all_contexts(struct i915_perf_stream *stream, > - struct flex *regs, > - size_t num_regs) > +static int > +oa_configure_all_contexts(struct i915_perf_stream *stream, > + struct flex *regs, > + size_t num_regs, > + struct i915_active *active) > { > struct drm_i915_private *i915 = stream->perf->i915; > struct intel_engine_cs *engine; > @@ -2360,7 +2389,7 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream, > > regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); > > - err = gen8_modify_self(ce, regs, num_regs); > + err = gen8_modify_self(ce, regs, num_regs, active); > if (err) > return err; > } > @@ -2368,8 +2397,10 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream, > return 0; > } > > -static int gen12_configure_all_contexts(struct i915_perf_stream *stream, > - const struct i915_oa_config *oa_config) > +static int > +gen12_configure_all_contexts(struct i915_perf_stream *stream, > + const struct i915_oa_config *oa_config, > + struct i915_active *active) > { > struct flex regs[] = { > { > @@ -2378,11 +2409,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream, > }, > }; > > - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); > + return oa_configure_all_contexts(stream, > + regs, ARRAY_SIZE(regs), > + active); > } > > -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > - const struct i915_oa_config *oa_config) > +static int > +lrc_configure_all_contexts(struct i915_perf_stream *stream, > + const struct i915_oa_config *oa_config, > + struct i915_active *active) > { > /* The MMIO offsets for Flex EU registers aren't contiguous */ > const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; > @@ -2415,11 +2450,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, > for (i = 2; i < ARRAY_SIZE(regs); i++) > regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); > > - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); > + return oa_configure_all_contexts(stream, > + regs, ARRAY_SIZE(regs), > + active); > } > > -static struct i915_request * > -gen8_enable_metric_set(struct i915_perf_stream *stream) > +static int > +gen8_enable_metric_set(struct i915_perf_stream *stream, > + struct i915_active *active) > { > struct intel_uncore *uncore = stream->uncore; > struct i915_oa_config *oa_config = stream->oa_config; > @@ -2459,11 +2497,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream) > * to make sure all slices/subslices are ON before writing to NOA > * registers. > */ > - ret = lrc_configure_all_contexts(stream, oa_config); > + ret = lrc_configure_all_contexts(stream, oa_config, active); > if (ret) > - return ERR_PTR(ret); > + return ret; > > - return emit_oa_config(stream, oa_config, oa_context(stream)); > + return emit_oa_config(stream, > + stream->oa_config, oa_context(stream), > + active); > } > > static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) > @@ -2473,8 +2513,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) > 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); > } > > -static struct i915_request * > -gen12_enable_metric_set(struct i915_perf_stream *stream) > +static int > +gen12_enable_metric_set(struct i915_perf_stream *stream, > + struct i915_active *active) > { > struct intel_uncore *uncore = stream->uncore; > struct i915_oa_config *oa_config = stream->oa_config; > @@ -2503,9 +2544,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) > * to make sure all slices/subslices are ON before writing to NOA > * registers. > */ > - ret = gen12_configure_all_contexts(stream, oa_config); > + ret = gen12_configure_all_contexts(stream, oa_config, active); > if (ret) > - return ERR_PTR(ret); > + return ret; > > /* > * For Gen12, performance counters are context > @@ -2513,12 +2554,14 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) > * requested this. > */ > if (stream->ctx) { > - ret = gen12_configure_oar_context(stream, true); > + ret = gen12_configure_oar_context(stream, active); > if (ret) > - return ERR_PTR(ret); > + return ret; > } > > - return emit_oa_config(stream, oa_config, oa_context(stream)); > + return emit_oa_config(stream, > + stream->oa_config, oa_context(stream), > + active); > } > > static void gen8_disable_metric_set(struct i915_perf_stream *stream) > @@ -2526,7 +2569,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) > struct intel_uncore *uncore = stream->uncore; > > /* Reset all contexts' slices/subslices configurations. */ > - lrc_configure_all_contexts(stream, NULL); > + lrc_configure_all_contexts(stream, NULL, NULL); > > intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); > } > @@ -2536,7 +2579,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) > struct intel_uncore *uncore = stream->uncore; > > /* Reset all contexts' slices/subslices configurations. */ > - lrc_configure_all_contexts(stream, NULL); > + lrc_configure_all_contexts(stream, NULL, NULL); > > /* Make sure we disable noa to save power. */ > intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); > @@ -2547,11 +2590,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) > struct intel_uncore *uncore = stream->uncore; > > /* Reset all contexts' slices/subslices configurations. */ > - gen12_configure_all_contexts(stream, NULL); > + gen12_configure_all_contexts(stream, NULL, NULL); > > /* disable the context save/restore or OAR counters */ > if (stream->ctx) > - gen12_configure_oar_context(stream, false); > + gen12_configure_oar_context(stream, NULL); > > /* Make sure we disable noa to save power. */ > intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); > @@ -2723,16 +2766,19 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = { > > static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) > { > - struct i915_request *rq; > + struct i915_active *active; > + int err; > > - rq = stream->perf->ops.enable_metric_set(stream); > - if (IS_ERR(rq)) > - return PTR_ERR(rq); > + active = i915_active_create(); > + if (!active) > + return -ENOMEM; > > - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); > - i915_request_put(rq); > + err = stream->perf->ops.enable_metric_set(stream, active); > + if (err == 0) > + __i915_active_wait(active, TASK_UNINTERRUPTIBLE); > > - return 0; > + i915_active_put(active); > + return err; > } > > static void > @@ -3217,7 +3263,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, > return -EINVAL; > > if (config != stream->oa_config) { > - struct i915_request *rq; > + int err; > > /* > * If OA is bound to a specific context, emit the > @@ -3228,13 +3274,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, > * When set globally, we use a low priority kernel context, > * so it will effectively take effect when idle. > */ > - rq = emit_oa_config(stream, config, oa_context(stream)); > - if (!IS_ERR(rq)) { > + err = emit_oa_config(stream, config, oa_context(stream), NULL); > + if (!err) > config = xchg(&stream->oa_config, config); > - i915_request_put(rq); > - } else { > - ret = PTR_ERR(rq); > - } > + else > + ret = err; > } > > i915_oa_config_put(config); > diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h > index 32289cbda648..de5cbb40fddf 100644 > --- a/drivers/gpu/drm/i915/i915_perf_types.h > +++ b/drivers/gpu/drm/i915/i915_perf_types.h > @@ -22,6 +22,7 @@ > > struct drm_i915_private; > struct file; > +struct i915_active; > struct i915_gem_context; > struct i915_perf; > struct i915_vma; > @@ -340,8 +341,8 @@ struct i915_oa_ops { > * counter reports being sampled. May apply system constraints such as > * disabling EU clock gating as required. > */ > - struct i915_request * > - (*enable_metric_set)(struct i915_perf_stream *stream); > + int (*enable_metric_set)(struct i915_perf_stream *stream, > + struct i915_active *active); > > /** > * @disable_metric_set: Remove system constraints associated with using
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 3222f6cd8255..faf4b0970775 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1961,10 +1961,11 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) return i915_vma_get(oa_bo->vma); } -static struct i915_request * +static int emit_oa_config(struct i915_perf_stream *stream, struct i915_oa_config *oa_config, - struct intel_context *ce) + struct intel_context *ce, + struct i915_active *active) { struct i915_request *rq; struct i915_vma *vma; @@ -1972,7 +1973,7 @@ emit_oa_config(struct i915_perf_stream *stream, vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) - return ERR_CAST(vma); + return PTR_ERR(vma); err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) @@ -1986,6 +1987,18 @@ emit_oa_config(struct i915_perf_stream *stream, goto err_vma_unpin; } + if (!IS_ERR_OR_NULL(active)) { + /* After all individual context modifications */ + err = i915_request_await_active(rq, active, + I915_ACTIVE_AWAIT_ALL); + if (err) + goto err_add_request; + + err = i915_active_add_request(active, rq); + if (err) + goto err_add_request; + } + i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, 0); if (!err) @@ -2000,14 +2013,13 @@ emit_oa_config(struct i915_perf_stream *stream, if (err) goto err_add_request; - i915_request_get(rq); err_add_request: i915_request_add(rq); err_vma_unpin: i915_vma_unpin(vma); err_vma_put: i915_vma_put(vma); - return err ? ERR_PTR(err) : rq; + return err; } static struct intel_context *oa_context(struct i915_perf_stream *stream) @@ -2015,8 +2027,9 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream) return stream->pinned_ctx ?: stream->engine->kernel_context; } -static struct i915_request * -hsw_enable_metric_set(struct i915_perf_stream *stream) +static int +hsw_enable_metric_set(struct i915_perf_stream *stream, + struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; @@ -2035,7 +2048,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - return emit_oa_config(stream, stream->oa_config, oa_context(stream)); + return emit_oa_config(stream, + stream->oa_config, oa_context(stream), + active); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2182,8 +2197,10 @@ static int gen8_modify_context(struct intel_context *ce, return err; } -static int gen8_modify_self(struct intel_context *ce, - const struct flex *flex, unsigned int count) +static int +gen8_modify_self(struct intel_context *ce, + const struct flex *flex, unsigned int count, + struct i915_active *active) { struct i915_request *rq; int err; @@ -2194,8 +2211,17 @@ static int gen8_modify_self(struct intel_context *ce, if (IS_ERR(rq)) return PTR_ERR(rq); + if (!IS_ERR_OR_NULL(active)) { + err = i915_active_add_request(active, rq); + if (err) + goto err_add_request; + } + err = gen8_load_flex(rq, ce, flex, count); + if (err) + goto err_add_request; +err_add_request: i915_request_add(rq); return err; } @@ -2229,7 +2255,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } -static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) +static int gen12_configure_oar_context(struct i915_perf_stream *stream, + struct i915_active *active) { int err; struct intel_context *ce = stream->pinned_ctx; @@ -2238,7 +2265,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena { GEN8_OACTXCONTROL, stream->perf->ctx_oactxctrl_offset + 1, - enable ? GEN8_OA_COUNTER_RESUME : 0, + active ? GEN8_OA_COUNTER_RESUME : 0, }, }; /* Offsets in regs_lri are not used since this configuration is only @@ -2250,13 +2277,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena GEN12_OAR_OACONTROL, GEN12_OAR_OACONTROL_OFFSET + 1, (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) + (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) }, { RING_CONTEXT_CONTROL(ce->engine->mmio_base), CTX_CONTEXT_CONTROL, _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, - enable ? + active ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0) }, @@ -2273,7 +2300,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena return err; /* Apply regs_lri using LRI with pinned context */ - return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active); } /* @@ -2301,9 +2328,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena * Note: it's only the RCS/Render context that has any OA state. * Note: the first flex register passed must always be R_PWR_CLK_STATE */ -static int oa_configure_all_contexts(struct i915_perf_stream *stream, - struct flex *regs, - size_t num_regs) +static int +oa_configure_all_contexts(struct i915_perf_stream *stream, + struct flex *regs, + size_t num_regs, + struct i915_active *active) { struct drm_i915_private *i915 = stream->perf->i915; struct intel_engine_cs *engine; @@ -2360,7 +2389,7 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, num_regs); + err = gen8_modify_self(ce, regs, num_regs, active); if (err) return err; } @@ -2368,8 +2397,10 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream, return 0; } -static int gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int +gen12_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config, + struct i915_active *active) { struct flex regs[] = { { @@ -2378,11 +2409,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream, }, }; - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); + return oa_configure_all_contexts(stream, + regs, ARRAY_SIZE(regs), + active); } -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int +lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config, + struct i915_active *active) { /* The MMIO offsets for Flex EU registers aren't contiguous */ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; @@ -2415,11 +2450,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); + return oa_configure_all_contexts(stream, + regs, ARRAY_SIZE(regs), + active); } -static struct i915_request * -gen8_enable_metric_set(struct i915_perf_stream *stream) +static int +gen8_enable_metric_set(struct i915_perf_stream *stream, + struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2459,11 +2497,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = lrc_configure_all_contexts(stream, oa_config); + ret = lrc_configure_all_contexts(stream, oa_config, active); if (ret) - return ERR_PTR(ret); + return ret; - return emit_oa_config(stream, oa_config, oa_context(stream)); + return emit_oa_config(stream, + stream->oa_config, oa_context(stream), + active); } static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) @@ -2473,8 +2513,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } -static struct i915_request * -gen12_enable_metric_set(struct i915_perf_stream *stream) +static int +gen12_enable_metric_set(struct i915_perf_stream *stream, + struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2503,9 +2544,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen12_configure_all_contexts(stream, oa_config); + ret = gen12_configure_all_contexts(stream, oa_config, active); if (ret) - return ERR_PTR(ret); + return ret; /* * For Gen12, performance counters are context @@ -2513,12 +2554,14 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) * requested this. */ if (stream->ctx) { - ret = gen12_configure_oar_context(stream, true); + ret = gen12_configure_oar_context(stream, active); if (ret) - return ERR_PTR(ret); + return ret; } - return emit_oa_config(stream, oa_config, oa_context(stream)); + return emit_oa_config(stream, + stream->oa_config, oa_context(stream), + active); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) @@ -2526,7 +2569,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL, NULL); intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } @@ -2536,7 +2579,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2547,11 +2590,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL); + gen12_configure_all_contexts(stream, NULL, NULL); /* disable the context save/restore or OAR counters */ if (stream->ctx) - gen12_configure_oar_context(stream, false); + gen12_configure_oar_context(stream, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2723,16 +2766,19 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = { static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) { - struct i915_request *rq; + struct i915_active *active; + int err; - rq = stream->perf->ops.enable_metric_set(stream); - if (IS_ERR(rq)) - return PTR_ERR(rq); + active = i915_active_create(); + if (!active) + return -ENOMEM; - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + err = stream->perf->ops.enable_metric_set(stream, active); + if (err == 0) + __i915_active_wait(active, TASK_UNINTERRUPTIBLE); - return 0; + i915_active_put(active); + return err; } static void @@ -3217,7 +3263,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, return -EINVAL; if (config != stream->oa_config) { - struct i915_request *rq; + int err; /* * If OA is bound to a specific context, emit the @@ -3228,13 +3274,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - rq = emit_oa_config(stream, config, oa_context(stream)); - if (!IS_ERR(rq)) { + err = emit_oa_config(stream, config, oa_context(stream), NULL); + if (!err) config = xchg(&stream->oa_config, config); - i915_request_put(rq); - } else { - ret = PTR_ERR(rq); - } + else + ret = err; } i915_oa_config_put(config); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 32289cbda648..de5cbb40fddf 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -22,6 +22,7 @@ struct drm_i915_private; struct file; +struct i915_active; struct i915_gem_context; struct i915_perf; struct i915_vma; @@ -340,8 +341,8 @@ struct i915_oa_ops { * counter reports being sampled. May apply system constraints such as * disabling EU clock gating as required. */ - struct i915_request * - (*enable_metric_set)(struct i915_perf_stream *stream); + int (*enable_metric_set)(struct i915_perf_stream *stream, + struct i915_active *active); /** * @disable_metric_set: Remove system constraints associated with using
We wish that the scheduler emit the context modification commands prior to enabling the oa_config, for which we must explicitly inform it of the ordering constraints. This is especially important as we now wait for the final oa_config setup to be completed and as this wait may be on a distinct context to the state modifications, we need that command packet to be always last in the queue. We borrow the i915_active for its ability to track multiple timelines and the last dma_fence on each; a flexible dma_resv. Keeping track of each dma_fence is important for us so that we can efficiently schedule the requests and reprioritise as required. Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 154 ++++++++++++++++--------- drivers/gpu/drm/i915/i915_perf_types.h | 5 +- 2 files changed, 102 insertions(+), 57 deletions(-)