diff mbox

[v6,5/7] drm/i915/perf: lock powergating configuration to default when active

Message ID 20180522180002.11522-6-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin May 22, 2018, 6 p.m. UTC
If some of the contexts submitting workloads to the GPU have been
configured to shutdown slices/subslices, we might loose the NOA
configurations written in the NOA muxes.

One possible solution to this problem is to reprogram the NOA muxes
when we switch to a new context. We initially tried this in the
workaround batchbuffer but some concerns where raised about the cost
of reprogramming at every context switch. This solution is also not
without consequences from the userspace point of view. Reprogramming
of the muxes can only happen once the powergating configuration has
changed (which happens after context switch). This means for a window
of time during the recording, counters recorded by the OA unit might
be invalid. This requires userspace dealing with OA reports to discard
the invalid values.

Minimizing the reprogramming could be implemented by tracking of the
last programmed configuration somewhere in GGTT and use MI_PREDICATE
to discard some of the programming commands, but the command streamer
would still have to parse all the MI_LRI instructions in the
workaround batchbuffer.

Another solution, which this change implements, is to simply disregard
the user requested configuration for the period of time when i915/perf
is active. There is no known issue with this apart from a performance
penality for some media workloads that benefit from running on a
partially powergated GPU. We already prevent RC6 from affecting the
programming so it doesn't sound completely unreasonable to hold on
powergating for the same reason.

v2: Leave RPCS programming in intel_lrc.c (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_perf.c | 24 +++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_lrc.c | 11 +++++++----
 drivers/gpu/drm/i915/intel_lrc.h |  3 +++
 4 files changed, 45 insertions(+), 9 deletions(-)

Comments

Tvrtko Ursulin May 23, 2018, 3:04 p.m. UTC | #1
On 22/05/2018 19:00, Lionel Landwerlin wrote:
> If some of the contexts submitting workloads to the GPU have been
> configured to shutdown slices/subslices, we might loose the NOA
> configurations written in the NOA muxes.
> 
> One possible solution to this problem is to reprogram the NOA muxes
> when we switch to a new context. We initially tried this in the
> workaround batchbuffer but some concerns where raised about the cost
> of reprogramming at every context switch. This solution is also not
> without consequences from the userspace point of view. Reprogramming
> of the muxes can only happen once the powergating configuration has
> changed (which happens after context switch). This means for a window
> of time during the recording, counters recorded by the OA unit might
> be invalid. This requires userspace dealing with OA reports to discard
> the invalid values.
> 
> Minimizing the reprogramming could be implemented by tracking of the
> last programmed configuration somewhere in GGTT and use MI_PREDICATE
> to discard some of the programming commands, but the command streamer
> would still have to parse all the MI_LRI instructions in the
> workaround batchbuffer.
> 
> Another solution, which this change implements, is to simply disregard
> the user requested configuration for the period of time when i915/perf
> is active. There is no known issue with this apart from a performance
> penality for some media workloads that benefit from running on a
> partially powergated GPU. We already prevent RC6 from affecting the
> programming so it doesn't sound completely unreasonable to hold on
> powergating for the same reason.
> 
> v2: Leave RPCS programming in intel_lrc.c (Lionel)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  | 16 ++++++++++++++++
>   drivers/gpu/drm/i915/i915_perf.c | 24 +++++++++++++++++++-----
>   drivers/gpu/drm/i915/intel_lrc.c | 11 +++++++----
>   drivers/gpu/drm/i915/intel_lrc.h |  3 +++
>   4 files changed, 45 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b86ed6401120..21631b51b37b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2743,6 +2743,22 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
>   int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
>   int intel_engines_init(struct drm_i915_private *dev_priv);
>   
> +static inline union intel_sseu
> +intel_engine_prepare_sseu(struct intel_engine_cs *engine,
> +			  union intel_sseu sseu)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	/*
> +	 * If i915/perf is active, we want a stable powergating configuration
> +	 * on the system. The most natural configuration to take in that case
> +	 * is the default (i.e maximum the hardware can do).
> +	 */
> +	return dev_priv->perf.oa.exclusive_stream ?
> +		intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu) :
> +		sseu;
> +}
> +
>   /* intel_hotplug.c */
>   void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
>   			   u32 pin_mask, u32 long_mask);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index a5d98bda5c2e..7ba8a3ff744c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1574,7 +1574,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
>    */
>   static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>   					   u32 *reg_state,
> -					   const struct i915_oa_config *oa_config)
> +					   const struct i915_oa_config *oa_config,
> +					   union intel_sseu sseu)
>   {
>   	struct drm_i915_private *dev_priv = ctx->i915;
>   	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> @@ -1620,6 +1621,9 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>   
>   		CTX_REG(reg_state, state_offset, flex_regs[i], value);
>   	}
> +
> +	CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> +		gen8_make_rpcs(&INTEL_INFO(dev_priv)->sseu, sseu));
>   }
>   
>   /*
> @@ -1751,6 +1755,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>   				       const struct i915_oa_config *oa_config)
>   {
>   	struct intel_engine_cs *engine = dev_priv->engine[RCS];
> +	union intel_sseu default_sseu =
> +		intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu);
>   	struct i915_gem_context *ctx;
>   	int ret;
>   	unsigned int wait_flags = I915_WAIT_LOCKED;
> @@ -1795,7 +1801,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>   		ce->state->obj->mm.dirty = true;
>   		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>   
> -		gen8_update_reg_state_unlocked(ctx, regs, oa_config);
> +		gen8_update_reg_state_unlocked(ctx, regs, oa_config,
> +					       oa_config ? default_sseu : ce->sseu);
>   
>   		i915_gem_object_unpin_map(ce->state->obj);
>   	}
> @@ -2167,14 +2174,21 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>   			    struct i915_gem_context *ctx,
>   			    u32 *reg_state)
>   {
> +	struct drm_i915_private *dev_priv = engine->i915;
>   	struct i915_perf_stream *stream;
>   
>   	if (engine->id != RCS)
>   		return;
>   
> -	stream = engine->i915->perf.oa.exclusive_stream;
> -	if (stream)
> -		gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config);
> +	stream = dev_priv->perf.oa.exclusive_stream;
> +	if (stream) {
> +		union intel_sseu default_sseu =
> +			intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu);
> +
> +		gen8_update_reg_state_unlocked(ctx, reg_state,
> +					       stream->oa_config,
> +					       default_sseu);
> +	}
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f875be03eadb..8fb6e66a7a84 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2481,8 +2481,8 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
>   	return logical_ring_init(engine);
>   }
>   
> -static u32 make_rpcs(const struct sseu_dev_info *sseu,
> -		     union intel_sseu ctx_sseu)
> +u32 gen8_make_rpcs(const struct sseu_dev_info *sseu,
> +		   union intel_sseu ctx_sseu)
>   {
>   	u32 rpcs = 0;
>   
> @@ -2630,10 +2630,13 @@ static void execlists_init_reg_state(u32 *regs,
>   	}
>   
>   	if (rcs) {
> +		union intel_sseu sseu = ctx->__engine[engine->id].sseu;

to_intel_context

> +
>   		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>   		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> -			make_rpcs(&INTEL_INFO(dev_priv)->sseu,
> -				  ctx->__engine[engine->id].sseu));
> +			gen8_make_rpcs(&INTEL_INFO(dev_priv)->sseu,
> +				       intel_engine_prepare_sseu(engine,
> +								 sseu)));
>   
>   		i915_oa_init_reg_state(engine, ctx, regs);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 1593194e930c..265da7228869 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -104,4 +104,7 @@ struct i915_gem_context;
>   
>   void intel_lr_context_resume(struct drm_i915_private *dev_priv);
>   
> +u32 gen8_make_rpcs(const struct sseu_dev_info *sseu,
> +		   union intel_sseu ctx_sseu);
> +
>   #endif /* _INTEL_LRC_H_ */
> 

If you'll want me to review this I'll need to study i915_perf.c a bit. :)

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b86ed6401120..21631b51b37b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2743,6 +2743,22 @@  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
 
+static inline union intel_sseu
+intel_engine_prepare_sseu(struct intel_engine_cs *engine,
+			  union intel_sseu sseu)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	/*
+	 * If i915/perf is active, we want a stable powergating configuration
+	 * on the system. The most natural configuration to take in that case
+	 * is the default (i.e maximum the hardware can do).
+	 */
+	return dev_priv->perf.oa.exclusive_stream ?
+		intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu) :
+		sseu;
+}
+
 /* intel_hotplug.c */
 void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 			   u32 pin_mask, u32 long_mask);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a5d98bda5c2e..7ba8a3ff744c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1574,7 +1574,8 @@  static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
  */
 static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
 					   u32 *reg_state,
-					   const struct i915_oa_config *oa_config)
+					   const struct i915_oa_config *oa_config,
+					   union intel_sseu sseu)
 {
 	struct drm_i915_private *dev_priv = ctx->i915;
 	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
@@ -1620,6 +1621,9 @@  static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
 
 		CTX_REG(reg_state, state_offset, flex_regs[i], value);
 	}
+
+	CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+		gen8_make_rpcs(&INTEL_INFO(dev_priv)->sseu, sseu));
 }
 
 /*
@@ -1751,6 +1755,8 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 				       const struct i915_oa_config *oa_config)
 {
 	struct intel_engine_cs *engine = dev_priv->engine[RCS];
+	union intel_sseu default_sseu =
+		intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu);
 	struct i915_gem_context *ctx;
 	int ret;
 	unsigned int wait_flags = I915_WAIT_LOCKED;
@@ -1795,7 +1801,8 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 		ce->state->obj->mm.dirty = true;
 		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
 
-		gen8_update_reg_state_unlocked(ctx, regs, oa_config);
+		gen8_update_reg_state_unlocked(ctx, regs, oa_config,
+					       oa_config ? default_sseu : ce->sseu);
 
 		i915_gem_object_unpin_map(ce->state->obj);
 	}
@@ -2167,14 +2174,21 @@  void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 			    struct i915_gem_context *ctx,
 			    u32 *reg_state)
 {
+	struct drm_i915_private *dev_priv = engine->i915;
 	struct i915_perf_stream *stream;
 
 	if (engine->id != RCS)
 		return;
 
-	stream = engine->i915->perf.oa.exclusive_stream;
-	if (stream)
-		gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config);
+	stream = dev_priv->perf.oa.exclusive_stream;
+	if (stream) {
+		union intel_sseu default_sseu =
+			intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu);
+
+		gen8_update_reg_state_unlocked(ctx, reg_state,
+					       stream->oa_config,
+					       default_sseu);
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f875be03eadb..8fb6e66a7a84 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2481,8 +2481,8 @@  int logical_xcs_ring_init(struct intel_engine_cs *engine)
 	return logical_ring_init(engine);
 }
 
-static u32 make_rpcs(const struct sseu_dev_info *sseu,
-		     union intel_sseu ctx_sseu)
+u32 gen8_make_rpcs(const struct sseu_dev_info *sseu,
+		   union intel_sseu ctx_sseu)
 {
 	u32 rpcs = 0;
 
@@ -2630,10 +2630,13 @@  static void execlists_init_reg_state(u32 *regs,
 	}
 
 	if (rcs) {
+		union intel_sseu sseu = ctx->__engine[engine->id].sseu;
+
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-			make_rpcs(&INTEL_INFO(dev_priv)->sseu,
-				  ctx->__engine[engine->id].sseu));
+			gen8_make_rpcs(&INTEL_INFO(dev_priv)->sseu,
+				       intel_engine_prepare_sseu(engine,
+								 sseu)));
 
 		i915_oa_init_reg_state(engine, ctx, regs);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194e930c..265da7228869 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,4 +104,7 @@  struct i915_gem_context;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
+u32 gen8_make_rpcs(const struct sseu_dev_info *sseu,
+		   union intel_sseu ctx_sseu);
+
 #endif /* _INTEL_LRC_H_ */