diff mbox series

[2/2] drm/i915/perf: Configure OAR for specific context

Message ID 20191114192114.61547-2-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915/perf: Allow non-privileged access when OA buffer is not sampled | expand

Commit Message

Umesh Nerlige Ramappa Nov. 14, 2019, 7:21 p.m. UTC
Gen12 supports saving/restoring render counters per context. Apply OAR
configuration only for the context that is passed in to perf.

v2:
- Fix OACTXCONTROL value to only stop/resume counters.
- Remove gen12_update_reg_state_unlocked as power state is already
  applied by the caller.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++--------------
 1 file changed, 108 insertions(+), 85 deletions(-)

Comments

Umesh Nerlige Ramappa Nov. 14, 2019, 7:45 p.m. UTC | #1
On Thu, Nov 14, 2019 at 11:21:14AM -0800, Umesh Nerlige Ramappa wrote:
>Gen12 supports saving/restoring render counters per context. Apply OAR
>configuration only for the context that is passed in to perf.
>
>v2:
>- Fix OACTXCONTROL value to only stop/resume counters.
>- Remove gen12_update_reg_state_unlocked as power state is already
>  applied by the caller.
>
>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

IGT Tests in this series:
https://patchwork.freedesktop.org/series/69322/

gen12-mi-rpc
gen12-unprivileged-single-ctx-counters

Thanks,
Umesh

>---
> drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++--------------
> 1 file changed, 108 insertions(+), 85 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index 221c1090ae93..2f0be5fbef4b 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
> 	u32 *reg_state = ce->lrc_reg_state;
> 	int i;
>
>-	if (IS_GEN(stream->perf->i915, 12)) {
>-		u32 format = stream->oa_buffer.format;
>+	reg_state[ctx_oactxctrl + 1] =
>+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>+		GEN8_OA_COUNTER_RESUME;
>
>-		reg_state[ctx_oactxctrl + 1] =
>-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>-			(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>-	} else {
>-		reg_state[ctx_oactxctrl + 1] =
>-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>-			GEN8_OA_COUNTER_RESUME;
>-	}
>-
>-	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
>+	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
> 		reg_state[ctx_flexeu0 + i * 2 + 1] =
> 			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
>
>@@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
> 	return err;
> }
>
>-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
> {
>-	struct i915_request *rq;
>-	u32 *cs;
>-	int err = 0;
>-
>-	rq = i915_request_create(ce);
>-	if (IS_ERR(rq))
>-		return PTR_ERR(rq);
>+	int err;
>+	struct intel_context *ce = stream->pinned_ctx;
>+	struct flex regs_context[] = {
>+		{
>+			GEN8_OACTXCONTROL,
>+			stream->perf->ctx_oactxctrl_offset + 1,
>+			enable ? GEN8_OA_COUNTER_RESUME : 0,
>+		},
>+	};
>+	struct flex regs_lri[] = {
>+		{
>+			GEN12_OAR_OACONTROL,
>+		},
>+		{
>+			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
>+		},
>+	};
>+	u32 format = stream->oa_buffer.format;
>
>-	cs = intel_ring_begin(rq, 4);
>-	if (IS_ERR(cs)) {
>-		err = PTR_ERR(cs);
>-		goto out;
>-	}
>+	regs_lri[0].value =
>+		(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>+		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>
>-	*cs++ = MI_LOAD_REGISTER_IMM(1);
>-	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>-	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>-			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>-	*cs++ = MI_NOOP;
>+	regs_lri[1].value =
>+		_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>+			      enable ?
>+			      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
>+			      0);
>
>-	intel_ring_advance(rq, cs);
>+	/* Modify the context image of pinned context with regs_context*/
>+	err = intel_context_lock_pinned(ce);
>+	if (err)
>+		return err;
>
>-out:
>-	i915_request_add(rq);
>+	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
>+	intel_context_unlock_pinned(ce);
>+	if (err)
>+		return err;
>
>-	return err;
>+	/* Apply regs_lri using LRI with pinned context */
>+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
> }
>
> /*
>@@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>  *   per-context OA state.
>  *
>  * Note: it's only the RCS/Render context that has any OA state.
>+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
>  */
>-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>-				      const struct i915_oa_config *oa_config)
>+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
>+				     struct flex *regs,
>+				     size_t num_regs)
> {
> 	struct drm_i915_private *i915 = stream->perf->i915;
>-	/* The MMIO offsets for Flex EU registers aren't contiguous */
>-	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>-	struct flex regs[] = {
>-		{
>-			GEN8_R_PWR_CLK_STATE,
>-			CTX_R_PWR_CLK_STATE,
>-		},
>-		{
>-			IS_GEN(i915, 12) ?
>-			GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
>-			stream->perf->ctx_oactxctrl_offset + 1,
>-		},
>-		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
>-		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
>-		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
>-		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
>-		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
>-		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
>-		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
>-	};
>-#undef ctx_flexeuN
> 	struct intel_engine_cs *engine;
> 	struct i915_gem_context *ctx, *cn;
>-	size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
>-	int i, err;
>-
>-	if (IS_GEN(i915, 12)) {
>-		u32 format = stream->oa_buffer.format;
>-
>-		regs[1].value =
>-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>-			(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>-	} else {
>-		regs[1].value =
>-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>-			GEN8_OA_COUNTER_RESUME;
>-	}
>-
>-	for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
>-		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>+	int err;
>
> 	lockdep_assert_held(&stream->perf->lock);
>
>@@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>
> 		spin_unlock(&i915->gem.contexts.lock);
>
>-		err = gen8_configure_context(ctx, regs, array_size);
>+		err = gen8_configure_context(ctx, regs, num_regs);
> 		if (err) {
> 			i915_gem_context_put(ctx);
> 			return err;
>@@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>
> 		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>
>-		err = gen8_modify_self(ce, regs, array_size);
>+		err = gen8_modify_self(ce, regs, num_regs);
> 		if (err)
> 			return err;
> 	}
>@@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> 	return 0;
> }
>
>+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
>+					const struct i915_oa_config *oa_config)
>+{
>+	struct flex regs[] = {
>+		{
>+			GEN8_R_PWR_CLK_STATE,
>+			CTX_R_PWR_CLK_STATE,
>+		},
>+	};
>+
>+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
>+}
>+
>+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>+				      const struct i915_oa_config *oa_config)
>+{
>+	/* The MMIO offsets for Flex EU registers aren't contiguous */
>+	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>+	struct flex regs[] = {
>+		{
>+			GEN8_R_PWR_CLK_STATE,
>+			CTX_R_PWR_CLK_STATE,
>+		},
>+		{
>+			GEN8_OACTXCONTROL,
>+			stream->perf->ctx_oactxctrl_offset + 1,
>+		},
>+		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
>+		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
>+		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
>+		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
>+		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
>+		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
>+		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
>+	};
>+#undef ctx_flexeuN
>+	int i;
>+
>+	regs[1].value =
>+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>+		GEN8_OA_COUNTER_RESUME;
>+
>+	for (i = 2; i < ARRAY_SIZE(regs); i++)
>+		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>+
>+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
>+}
>+
> static int gen8_enable_metric_set(struct i915_perf_stream *stream)
> {
> 	struct intel_uncore *uncore = stream->uncore;
>@@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
> 	 * to make sure all slices/subslices are ON before writing to NOA
> 	 * registers.
> 	 */
>-	ret = lrc_configure_all_contexts(stream, oa_config);
>+	ret = gen12_configure_all_contexts(stream, oa_config);
> 	if (ret)
> 		return ret;
>
>@@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
> 	 * requested this.
> 	 */
> 	if (stream->ctx) {
>-		ret = gen12_emit_oar_config(stream->pinned_ctx,
>-					    oa_config != NULL);
>+		ret = gen12_configure_oar_context(stream, oa_config != NULL);
> 		if (ret)
> 			return ret;
> 	}
>@@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
> 	struct intel_uncore *uncore = stream->uncore;
>
> 	/* Reset all contexts' slices/subslices configurations. */
>-	lrc_configure_all_contexts(stream, NULL);
>+	gen12_configure_all_contexts(stream, NULL);
>
> 	/* disable the context save/restore or OAR counters */
> 	if (stream->ctx)
>-		gen12_emit_oar_config(stream->pinned_ctx, false);
>+		gen12_configure_oar_context(stream, false);
>
> 	/* Make sure we disable noa to save power. */
> 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
>@@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
> 		return;
>
> 	stream = engine->i915->perf.exclusive_stream;
>-	if (stream)
>+	/*
>+	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
>+	 * is already doing that, so nothing to be done for gen12 here.
>+	 */
>+	if (stream && INTEL_GEN(stream->perf->i915) < 12)
> 		gen8_update_reg_state_unlocked(ce, stream);
> }
>
>-- 
>2.20.1
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin Nov. 18, 2019, 1:42 p.m. UTC | #2
On 14/11/2019 21:21, Umesh Nerlige Ramappa wrote:
> Gen12 supports saving/restoring render counters per context. Apply OAR
> configuration only for the context that is passed in to perf.
>
> v2:
> - Fix OACTXCONTROL value to only stop/resume counters.
> - Remove gen12_update_reg_state_unlocked as power state is already
>    applied by the caller.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++--------------
>   1 file changed, 108 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 221c1090ae93..2f0be5fbef4b 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
>   	u32 *reg_state = ce->lrc_reg_state;
>   	int i;
>   
> -	if (IS_GEN(stream->perf->i915, 12)) {
> -		u32 format = stream->oa_buffer.format;
> +	reg_state[ctx_oactxctrl + 1] =
> +		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> +		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> +		GEN8_OA_COUNTER_RESUME;
>   
> -		reg_state[ctx_oactxctrl + 1] =
> -			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> -			(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
> -	} else {
> -		reg_state[ctx_oactxctrl + 1] =
> -			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> -			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> -			GEN8_OA_COUNTER_RESUME;
> -	}
> -
> -	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
> +	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
>   		reg_state[ctx_flexeu0 + i * 2 + 1] =
>   			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
>   
> @@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
>   	return err;
>   }
>   
> -static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
> +static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
>   {
> -	struct i915_request *rq;
> -	u32 *cs;
> -	int err = 0;
> -
> -	rq = i915_request_create(ce);
> -	if (IS_ERR(rq))
> -		return PTR_ERR(rq);
> +	int err;
> +	struct intel_context *ce = stream->pinned_ctx;
> +	struct flex regs_context[] = {
> +		{
> +			GEN8_OACTXCONTROL,
> +			stream->perf->ctx_oactxctrl_offset + 1,
> +			enable ? GEN8_OA_COUNTER_RESUME : 0,
> +		},
> +	};


When do we configure the Flex registers?


> +	struct flex regs_lri[] = {
> +		{
> +			GEN12_OAR_OACONTROL,
> +		},
> +		{
> +			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
> +		},
> +	};
> +	u32 format = stream->oa_buffer.format;
>   
> -	cs = intel_ring_begin(rq, 4);
> -	if (IS_ERR(cs)) {
> -		err = PTR_ERR(cs);
> -		goto out;
> -	}
> +	regs_lri[0].value =
> +		(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> +		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(1);
> -	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
> -	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> -			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
> -	*cs++ = MI_NOOP;
> +	regs_lri[1].value =
> +		_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> +			      enable ?
> +			      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
> +			      0);


Can't we put those values in the array above?


>   
> -	intel_ring_advance(rq, cs);
> +	/* Modify the context image of pinned context with regs_context*/
> +	err = intel_context_lock_pinned(ce);
> +	if (err)
> +		return err;
>   
> -out:
> -	i915_request_add(rq);
> +	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
> +	intel_context_unlock_pinned(ce);
> +	if (err)
> +		return err;
>   
> -	return err;
> +	/* Apply regs_lri using LRI with pinned context */
> +	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
>   }
>   
>   /*
> @@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>    *   per-context OA state.
>    *
>    * Note: it's only the RCS/Render context that has any OA state.
> + * Note: the first flex register passed must always be R_PWR_CLK_STATE
>    */
> -static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> -				      const struct i915_oa_config *oa_config)
> +static int oa_configure_all_contexts(struct i915_perf_stream *stream,
> +				     struct flex *regs,
> +				     size_t num_regs)
>   {
>   	struct drm_i915_private *i915 = stream->perf->i915;
> -	/* The MMIO offsets for Flex EU registers aren't contiguous */
> -	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
> -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
> -	struct flex regs[] = {
> -		{
> -			GEN8_R_PWR_CLK_STATE,
> -			CTX_R_PWR_CLK_STATE,
> -		},
> -		{
> -			IS_GEN(i915, 12) ?
> -			GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
> -			stream->perf->ctx_oactxctrl_offset + 1,
> -		},
> -		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
> -		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
> -		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
> -		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
> -		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
> -		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
> -		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
> -	};
> -#undef ctx_flexeuN
>   	struct intel_engine_cs *engine;
>   	struct i915_gem_context *ctx, *cn;
> -	size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
> -	int i, err;
> -
> -	if (IS_GEN(i915, 12)) {
> -		u32 format = stream->oa_buffer.format;
> -
> -		regs[1].value =
> -			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> -			(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
> -	} else {
> -		regs[1].value =
> -			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> -			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> -			GEN8_OA_COUNTER_RESUME;
> -	}
> -
> -	for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
> -		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
> +	int err;
>   
>   	lockdep_assert_held(&stream->perf->lock);
>   
> @@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>   
>   		spin_unlock(&i915->gem.contexts.lock);
>   
> -		err = gen8_configure_context(ctx, regs, array_size);
> +		err = gen8_configure_context(ctx, regs, num_regs);
>   		if (err) {
>   			i915_gem_context_put(ctx);
>   			return err;
> @@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>   
>   		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>   
> -		err = gen8_modify_self(ce, regs, array_size);
> +		err = gen8_modify_self(ce, regs, num_regs);
>   		if (err)
>   			return err;
>   	}
> @@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>   	return 0;
>   }
>   
> +static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
> +					const struct i915_oa_config *oa_config)
> +{
> +	struct flex regs[] = {
> +		{
> +			GEN8_R_PWR_CLK_STATE,
> +			CTX_R_PWR_CLK_STATE,
> +		},
> +	};
> +
> +	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
> +}
> +
> +static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
> +				      const struct i915_oa_config *oa_config)
> +{
> +	/* The MMIO offsets for Flex EU registers aren't contiguous */
> +	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
> +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
> +	struct flex regs[] = {
> +		{
> +			GEN8_R_PWR_CLK_STATE,
> +			CTX_R_PWR_CLK_STATE,
> +		},
> +		{
> +			GEN8_OACTXCONTROL,
> +			stream->perf->ctx_oactxctrl_offset + 1,
> +		},
> +		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
> +		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
> +		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
> +		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
> +		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
> +		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
> +		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
> +	};
> +#undef ctx_flexeuN
> +	int i;
> +
> +	regs[1].value =
> +		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> +		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> +		GEN8_OA_COUNTER_RESUME;
> +
> +	for (i = 2; i < ARRAY_SIZE(regs); i++)
> +		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
> +
> +	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
> +}
> +
>   static int gen8_enable_metric_set(struct i915_perf_stream *stream)
>   {
>   	struct intel_uncore *uncore = stream->uncore;
> @@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>   	 * to make sure all slices/subslices are ON before writing to NOA
>   	 * registers.
>   	 */
> -	ret = lrc_configure_all_contexts(stream, oa_config);
> +	ret = gen12_configure_all_contexts(stream, oa_config);
>   	if (ret)
>   		return ret;
>   
> @@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>   	 * requested this.
>   	 */
>   	if (stream->ctx) {
> -		ret = gen12_emit_oar_config(stream->pinned_ctx,
> -					    oa_config != NULL);
> +		ret = gen12_configure_oar_context(stream, oa_config != NULL);


I think you can assume oa_config is going to be != NULL in the 
enable_metric_set vfunc.


>   		if (ret)
>   			return ret;
>   	}
> @@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>   	struct intel_uncore *uncore = stream->uncore;
>   
>   	/* Reset all contexts' slices/subslices configurations. */
> -	lrc_configure_all_contexts(stream, NULL);
> +	gen12_configure_all_contexts(stream, NULL);
>   
>   	/* disable the context save/restore or OAR counters */
>   	if (stream->ctx)
> -		gen12_emit_oar_config(stream->pinned_ctx, false);
> +		gen12_configure_oar_context(stream, false);
>   
>   	/* Make sure we disable noa to save power. */
>   	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> @@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>   		return;
>   
>   	stream = engine->i915->perf.exclusive_stream;
> -	if (stream)
> +	/*
> +	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
> +	 * is already doing that, so nothing to be done for gen12 here.
> +	 */
> +	if (stream && INTEL_GEN(stream->perf->i915) < 12)
>   		gen8_update_reg_state_unlocked(ce, stream);
>   }
>
Umesh Nerlige Ramappa Nov. 18, 2019, 6:01 p.m. UTC | #3
On Mon, Nov 18, 2019 at 03:42:28PM +0200, Lionel Landwerlin wrote:
>On 14/11/2019 21:21, Umesh Nerlige Ramappa wrote:
>>Gen12 supports saving/restoring render counters per context. Apply OAR
>>configuration only for the context that is passed in to perf.
>>
>>v2:
>>- Fix OACTXCONTROL value to only stop/resume counters.
>>- Remove gen12_update_reg_state_unlocked as power state is already
>>   applied by the caller.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_perf.c | 193 +++++++++++++++++--------------
>>  1 file changed, 108 insertions(+), 85 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 221c1090ae93..2f0be5fbef4b 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
>>  	u32 *reg_state = ce->lrc_reg_state;
>>  	int i;
>>-	if (IS_GEN(stream->perf->i915, 12)) {
>>-		u32 format = stream->oa_buffer.format;
>>+	reg_state[ctx_oactxctrl + 1] =
>>+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>+		GEN8_OA_COUNTER_RESUME;
>>-		reg_state[ctx_oactxctrl + 1] =
>>-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>-			(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>-	} else {
>>-		reg_state[ctx_oactxctrl + 1] =
>>-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>-			GEN8_OA_COUNTER_RESUME;
>>-	}
>>-
>>-	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
>>+	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
>>  		reg_state[ctx_flexeu0 + i * 2 + 1] =
>>  			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
>>@@ -2224,34 +2216,49 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
>>  	return err;
>>  }
>>-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>>+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
>>  {
>>-	struct i915_request *rq;
>>-	u32 *cs;
>>-	int err = 0;
>>-
>>-	rq = i915_request_create(ce);
>>-	if (IS_ERR(rq))
>>-		return PTR_ERR(rq);
>>+	int err;
>>+	struct intel_context *ce = stream->pinned_ctx;
>>+	struct flex regs_context[] = {
>>+		{
>>+			GEN8_OACTXCONTROL,
>>+			stream->perf->ctx_oactxctrl_offset + 1,
>>+			enable ? GEN8_OA_COUNTER_RESUME : 0,
>>+		},
>>+	};
>
>
>When do we configure the Flex registers?

I don't see flex registers in the context image dump or in the spec, so 
I guess we would only configure them if they are part of the metric set.

Will post a new version with below comments.

Thanks,
Umesh

>
>
>>+	struct flex regs_lri[] = {
>>+		{
>>+			GEN12_OAR_OACONTROL,
>>+		},
>>+		{
>>+			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
>>+		},
>>+	};
>>+	u32 format = stream->oa_buffer.format;
>>-	cs = intel_ring_begin(rq, 4);
>>-	if (IS_ERR(cs)) {
>>-		err = PTR_ERR(cs);
>>-		goto out;
>>-	}
>>+	regs_lri[0].value =
>>+		(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>+		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>-	*cs++ = MI_LOAD_REGISTER_IMM(1);
>>-	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>>-	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>>-			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>>-	*cs++ = MI_NOOP;
>>+	regs_lri[1].value =
>>+		_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>>+			      enable ?
>>+			      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
>>+			      0);
>
>
>Can't we put those values in the array above?
>
>
>>-	intel_ring_advance(rq, cs);
>>+	/* Modify the context image of pinned context with regs_context*/
>>+	err = intel_context_lock_pinned(ce);
>>+	if (err)
>>+		return err;
>>-out:
>>-	i915_request_add(rq);
>>+	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
>>+	intel_context_unlock_pinned(ce);
>>+	if (err)
>>+		return err;
>>-	return err;
>>+	/* Apply regs_lri using LRI with pinned context */
>>+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
>>  }
>>  /*
>>@@ -2277,53 +2284,16 @@ static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>>   *   per-context OA state.
>>   *
>>   * Note: it's only the RCS/Render context that has any OA state.
>>+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
>>   */
>>-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>-				      const struct i915_oa_config *oa_config)
>>+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
>>+				     struct flex *regs,
>>+				     size_t num_regs)
>>  {
>>  	struct drm_i915_private *i915 = stream->perf->i915;
>>-	/* The MMIO offsets for Flex EU registers aren't contiguous */
>>-	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>>-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>>-	struct flex regs[] = {
>>-		{
>>-			GEN8_R_PWR_CLK_STATE,
>>-			CTX_R_PWR_CLK_STATE,
>>-		},
>>-		{
>>-			IS_GEN(i915, 12) ?
>>-			GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
>>-			stream->perf->ctx_oactxctrl_offset + 1,
>>-		},
>>-		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
>>-		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
>>-		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
>>-		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
>>-		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
>>-		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
>>-		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
>>-	};
>>-#undef ctx_flexeuN
>>  	struct intel_engine_cs *engine;
>>  	struct i915_gem_context *ctx, *cn;
>>-	size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
>>-	int i, err;
>>-
>>-	if (IS_GEN(i915, 12)) {
>>-		u32 format = stream->oa_buffer.format;
>>-
>>-		regs[1].value =
>>-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>-			(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>-	} else {
>>-		regs[1].value =
>>-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>-			GEN8_OA_COUNTER_RESUME;
>>-	}
>>-
>>-	for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
>>-		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>>+	int err;
>>  	lockdep_assert_held(&stream->perf->lock);
>>@@ -2353,7 +2323,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>  		spin_unlock(&i915->gem.contexts.lock);
>>-		err = gen8_configure_context(ctx, regs, array_size);
>>+		err = gen8_configure_context(ctx, regs, num_regs);
>>  		if (err) {
>>  			i915_gem_context_put(ctx);
>>  			return err;
>>@@ -2378,7 +2348,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>  		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
>>-		err = gen8_modify_self(ce, regs, array_size);
>>+		err = gen8_modify_self(ce, regs, num_regs);
>>  		if (err)
>>  			return err;
>>  	}
>>@@ -2386,6 +2356,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>  	return 0;
>>  }
>>+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
>>+					const struct i915_oa_config *oa_config)
>>+{
>>+	struct flex regs[] = {
>>+		{
>>+			GEN8_R_PWR_CLK_STATE,
>>+			CTX_R_PWR_CLK_STATE,
>>+		},
>>+	};
>>+
>>+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
>>+}
>>+
>>+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
>>+				      const struct i915_oa_config *oa_config)
>>+{
>>+	/* The MMIO offsets for Flex EU registers aren't contiguous */
>>+	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
>>+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
>>+	struct flex regs[] = {
>>+		{
>>+			GEN8_R_PWR_CLK_STATE,
>>+			CTX_R_PWR_CLK_STATE,
>>+		},
>>+		{
>>+			GEN8_OACTXCONTROL,
>>+			stream->perf->ctx_oactxctrl_offset + 1,
>>+		},
>>+		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
>>+		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
>>+		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
>>+		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
>>+		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
>>+		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
>>+		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
>>+	};
>>+#undef ctx_flexeuN
>>+	int i;
>>+
>>+	regs[1].value =
>>+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>+		GEN8_OA_COUNTER_RESUME;
>>+
>>+	for (i = 2; i < ARRAY_SIZE(regs); i++)
>>+		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
>>+
>>+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
>>+}
>>+
>>  static int gen8_enable_metric_set(struct i915_perf_stream *stream)
>>  {
>>  	struct intel_uncore *uncore = stream->uncore;
>>@@ -2469,7 +2489,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>>  	 * to make sure all slices/subslices are ON before writing to NOA
>>  	 * registers.
>>  	 */
>>-	ret = lrc_configure_all_contexts(stream, oa_config);
>>+	ret = gen12_configure_all_contexts(stream, oa_config);
>>  	if (ret)
>>  		return ret;
>>@@ -2479,8 +2499,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>>  	 * requested this.
>>  	 */
>>  	if (stream->ctx) {
>>-		ret = gen12_emit_oar_config(stream->pinned_ctx,
>>-					    oa_config != NULL);
>>+		ret = gen12_configure_oar_context(stream, oa_config != NULL);
>
>
>I think you can assume oa_config is going to be != NULL in the 
>enable_metric_set vfunc.
>
>
>>  		if (ret)
>>  			return ret;
>>  	}
>>@@ -2514,11 +2533,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>>  	struct intel_uncore *uncore = stream->uncore;
>>  	/* Reset all contexts' slices/subslices configurations. */
>>-	lrc_configure_all_contexts(stream, NULL);
>>+	gen12_configure_all_contexts(stream, NULL);
>>  	/* disable the context save/restore or OAR counters */
>>  	if (stream->ctx)
>>-		gen12_emit_oar_config(stream->pinned_ctx, false);
>>+		gen12_configure_oar_context(stream, false);
>>  	/* Make sure we disable noa to save power. */
>>  	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
>>@@ -2860,7 +2879,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
>>  		return;
>>  	stream = engine->i915->perf.exclusive_stream;
>>-	if (stream)
>>+	/*
>>+	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
>>+	 * is already doing that, so nothing to be done for gen12 here.
>>+	 */
>>+	if (stream && INTEL_GEN(stream->perf->i915) < 12)
>>  		gen8_update_reg_state_unlocked(ce, stream);
>>  }
>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 221c1090ae93..2f0be5fbef4b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2078,20 +2078,12 @@  gen8_update_reg_state_unlocked(const struct intel_context *ce,
 	u32 *reg_state = ce->lrc_reg_state;
 	int i;
 
-	if (IS_GEN(stream->perf->i915, 12)) {
-		u32 format = stream->oa_buffer.format;
+	reg_state[ctx_oactxctrl + 1] =
+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME;
 
-		reg_state[ctx_oactxctrl + 1] =
-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-			(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
-	} else {
-		reg_state[ctx_oactxctrl + 1] =
-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-			GEN8_OA_COUNTER_RESUME;
-	}
-
-	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
+	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
 		reg_state[ctx_flexeu0 + i * 2 + 1] =
 			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
 
@@ -2224,34 +2216,49 @@  static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
 {
-	struct i915_request *rq;
-	u32 *cs;
-	int err = 0;
-
-	rq = i915_request_create(ce);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	int err;
+	struct intel_context *ce = stream->pinned_ctx;
+	struct flex regs_context[] = {
+		{
+			GEN8_OACTXCONTROL,
+			stream->perf->ctx_oactxctrl_offset + 1,
+			enable ? GEN8_OA_COUNTER_RESUME : 0,
+		},
+	};
+	struct flex regs_lri[] = {
+		{
+			GEN12_OAR_OACONTROL,
+		},
+		{
+			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+		},
+	};
+	u32 format = stream->oa_buffer.format;
 
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs)) {
-		err = PTR_ERR(cs);
-		goto out;
-	}
+	regs_lri[0].value =
+		(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
-	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
-			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
-	*cs++ = MI_NOOP;
+	regs_lri[1].value =
+		_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+			      enable ?
+			      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+			      0);
 
-	intel_ring_advance(rq, cs);
+	/* Modify the context image of pinned context with regs_context*/
+	err = intel_context_lock_pinned(ce);
+	if (err)
+		return err;
 
-out:
-	i915_request_add(rq);
+	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+	intel_context_unlock_pinned(ce);
+	if (err)
+		return err;
 
-	return err;
+	/* Apply regs_lri using LRI with pinned context */
+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
 }
 
 /*
@@ -2277,53 +2284,16 @@  static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
  *   per-context OA state.
  *
  * Note: it's only the RCS/Render context that has any OA state.
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
  */
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
-				      const struct i915_oa_config *oa_config)
+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
+				     struct flex *regs,
+				     size_t num_regs)
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
-	/* The MMIO offsets for Flex EU registers aren't contiguous */
-	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
-	struct flex regs[] = {
-		{
-			GEN8_R_PWR_CLK_STATE,
-			CTX_R_PWR_CLK_STATE,
-		},
-		{
-			IS_GEN(i915, 12) ?
-			GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
-			stream->perf->ctx_oactxctrl_offset + 1,
-		},
-		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
-		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
-		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
-		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
-		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
-		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
-		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
-	};
-#undef ctx_flexeuN
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx, *cn;
-	size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
-	int i, err;
-
-	if (IS_GEN(i915, 12)) {
-		u32 format = stream->oa_buffer.format;
-
-		regs[1].value =
-			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-			(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
-	} else {
-		regs[1].value =
-			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
-			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-			GEN8_OA_COUNTER_RESUME;
-	}
-
-	for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
-		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+	int err;
 
 	lockdep_assert_held(&stream->perf->lock);
 
@@ -2353,7 +2323,7 @@  static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 
 		spin_unlock(&i915->gem.contexts.lock);
 
-		err = gen8_configure_context(ctx, regs, array_size);
+		err = gen8_configure_context(ctx, regs, num_regs);
 		if (err) {
 			i915_gem_context_put(ctx);
 			return err;
@@ -2378,7 +2348,7 @@  static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 
 		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-		err = gen8_modify_self(ce, regs, array_size);
+		err = gen8_modify_self(ce, regs, num_regs);
 		if (err)
 			return err;
 	}
@@ -2386,6 +2356,56 @@  static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 	return 0;
 }
 
+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
+					const struct i915_oa_config *oa_config)
+{
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+	};
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+				      const struct i915_oa_config *oa_config)
+{
+	/* The MMIO offsets for Flex EU registers aren't contiguous */
+	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
+	struct flex regs[] = {
+		{
+			GEN8_R_PWR_CLK_STATE,
+			CTX_R_PWR_CLK_STATE,
+		},
+		{
+			GEN8_OACTXCONTROL,
+			stream->perf->ctx_oactxctrl_offset + 1,
+		},
+		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
+		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
+		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
+		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
+		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
+		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
+		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
+	};
+#undef ctx_flexeuN
+	int i;
+
+	regs[1].value =
+		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME;
+
+	for (i = 2; i < ARRAY_SIZE(regs); i++)
+		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
 static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
@@ -2469,7 +2489,7 @@  static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = lrc_configure_all_contexts(stream, oa_config);
+	ret = gen12_configure_all_contexts(stream, oa_config);
 	if (ret)
 		return ret;
 
@@ -2479,8 +2499,7 @@  static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * requested this.
 	 */
 	if (stream->ctx) {
-		ret = gen12_emit_oar_config(stream->pinned_ctx,
-					    oa_config != NULL);
+		ret = gen12_configure_oar_context(stream, oa_config != NULL);
 		if (ret)
 			return ret;
 	}
@@ -2514,11 +2533,11 @@  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	lrc_configure_all_contexts(stream, NULL);
+	gen12_configure_all_contexts(stream, NULL);
 
 	/* disable the context save/restore or OAR counters */
 	if (stream->ctx)
-		gen12_emit_oar_config(stream->pinned_ctx, false);
+		gen12_configure_oar_context(stream, false);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2860,7 +2879,11 @@  void i915_oa_init_reg_state(const struct intel_context *ce,
 		return;
 
 	stream = engine->i915->perf.exclusive_stream;
-	if (stream)
+	/*
+	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
+	 * is already doing that, so nothing to be done for gen12 here.
+	 */
+	if (stream && INTEL_GEN(stream->perf->i915) < 12)
 		gen8_update_reg_state_unlocked(ce, stream);
 }