diff mbox series

[v6,2/3] drm/i915: set optimum eu/slice/sub-slice configuration based on load type

Message ID 1574743899-17638-3-git-send-email-ankit.p.navik@intel.com (mailing list archive)
State New, archived
Headers show
Series Dynamic EU configuration of Slice/Sub-slice/EU | expand

Commit Message

Ankit Navik Nov. 26, 2019, 4:51 a.m. UTC
This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).

It also introduce flag update_render_config which can set by any governor.

v2:
 * Move static optimum_config to device init time.
 * Rename function to appropriate name, fix data types and patch ordering.
 * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)

v3:
 * Add safe guard check in i915_gem_context_set_load_type.
 * Rename struct from optimum_config to i915_sseu_optimum_config to
   avoid namespace clashes.
 * Reduces memcpy for space efficient.
 * Rebase.
 * Improved commit message. (Tvrtko Ursulin)

v4:
 * Move optimum config table to file scope. (Tvrtko Ursulin)

v5:
 * Adds optimal table of slice/sub-slice/EU for Gen 9 GT1.
 * Rebase.

v6:
 * Rebase.
 * Fix warnings.

Cc: Vipin Anand <vipin.anand@intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c       | 18 +++++++
 drivers/gpu/drm/i915/gem/i915_gem_context.h       |  2 +
 drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 32 ++++++++++++
 drivers/gpu/drm/i915/gt/intel_lrc.c               | 42 ++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h                   |  5 ++
 drivers/gpu/drm/i915/intel_device_info.c          | 62 ++++++++++++++++++++++-
 6 files changed, 157 insertions(+), 4 deletions(-)

Comments

Tvrtko Ursulin Nov. 26, 2019, 10:41 a.m. UTC | #1
On 26/11/2019 04:51, Ankit Navik wrote:
> This patch will select optimum eu/slice/sub-slice configuration based on
> type of load (low, medium, high) as input.
> Based on our readings and experiments we have predefined set of optimum
> configuration for each platform(CHT, KBL).
> i915_gem_context_set_load_type will select optimum configuration from
> pre-defined optimum configuration table(opt_config).
> 
> It also introduce flag update_render_config which can set by any governor.
> 
> v2:
>   * Move static optimum_config to device init time.
>   * Rename function to appropriate name, fix data types and patch ordering.
>   * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
> 
> v3:
>   * Add safe guard check in i915_gem_context_set_load_type.
>   * Rename struct from optimum_config to i915_sseu_optimum_config to
>     avoid namespace clashes.
>   * Reduces memcpy for space efficient.
>   * Rebase.
>   * Improved commit message. (Tvrtko Ursulin)
> 
> v4:
>   * Move optimum config table to file scope. (Tvrtko Ursulin)
> 
> v5:
>   * Adds optimal table of slice/sub-slice/EU for Gen 9 GT1.
>   * Rebase.
> 
> v6:
>   * Rebase.
>   * Fix warnings.
> 
> Cc: Vipin Anand <vipin.anand@intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c       | 18 +++++++
>   drivers/gpu/drm/i915/gem/i915_gem_context.h       |  2 +
>   drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 32 ++++++++++++
>   drivers/gpu/drm/i915/gt/intel_lrc.c               | 42 ++++++++++++++-
>   drivers/gpu/drm/i915/i915_drv.h                   |  5 ++
>   drivers/gpu/drm/i915/intel_device_info.c          | 62 ++++++++++++++++++++++-
>   6 files changed, 157 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 8288fb9..ac94f92 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -713,10 +713,28 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>   
>   	trace_i915_context_create(ctx);
>   	atomic_set(&ctx->req_cnt, 0);
> +	ctx->slice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.slice_mask);
> +	ctx->subslice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.subslice_mask[0]);
> +	ctx->eu_cnt = RUNTIME_INFO(i915)->sseu.eu_per_subslice;

I wanted to say that you need to wrap this into a named structure from 
which it will be clear these members are about Dynamic EU but let me 
finish the read, there might be a better way.

>   
>   	return ctx;
>   }
>   
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> +				    enum gem_load_type type)
> +{
> +	struct drm_i915_private *dev_priv = ctx->i915;
> +
> +	if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
> +		return;

 >= I think, or bad things can happen.

> +
> +	/* Call opt_config to get correct configuration for eu,slice,subslice */
> +	ctx->slice_cnt = dev_priv->opt_config[type].slice;
> +	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
> +	ctx->eu_cnt = dev_priv->opt_config[type].eu;
> +	ctx->pending_load_type = type;
> +}
> +
>   static void
>   destroy_kernel_context(struct i915_gem_context **ctxp)
>   {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 18e50a7..8677427 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -177,6 +177,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file_priv);
>   int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
>   				       struct drm_file *file);
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> +				enum gem_load_type type);
>   
>   struct i915_gem_context *
>   i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 3931c06..6847d49 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -40,6 +40,19 @@ struct i915_gem_engines_iter {
>   	const struct i915_gem_engines *engines;
>   };
>   
> +enum gem_load_type {
> +	LOAD_TYPE_LOW,
> +	LOAD_TYPE_MEDIUM,
> +	LOAD_TYPE_HIGH,
> +	LOAD_TYPE_LAST
> +};
> +
> +struct i915_sseu_optimum_config {
> +	u8 slice;
> +	u8 subslice;
> +	u8 eu;
> +};
> +
>   /**
>    * struct i915_gem_context - client state
>    *
> @@ -173,6 +186,25 @@ struct i915_gem_context {
>   	 */
>   	atomic_t req_cnt;
>   
> +	/** slice_cnt: used to set the # of slices to be enabled. */
> +	u8 slice_cnt;
> +
> +	/** subslice_cnt: used to set the # of subslices to be enabled. */
> +	u8 subslice_cnt;
> +
> +	/** eu_cnt: used to set the # of eu to be enabled. */
> +	u8 eu_cnt;
> +
> +	/** load_type: The designated load_type (high/medium/low) for a given
> +	 * number of pending commands in the command queue.
> +	 */
> +	enum gem_load_type load_type;
> +
> +	/** pending_load_type: The earlier load type that the GPU was configured
> +	 * for (high/medium/low).
> +	 */
> +	enum gem_load_type pending_load_type;
> +
>   	/** jump_whitelist: Bit array for tracking cmds during cmdparsing
>   	 *  Guarded by struct_mutex
>   	 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 511d5a1..c3f279e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2436,6 +2436,36 @@ static void execlists_context_unpin(struct intel_context *ce)
>   	intel_ring_reset(ce->ring, ce->ring->tail);
>   }
>   
> +static u32
> +get_context_rpcs_config(struct i915_gem_context *ctx)
> +{
> +	u32 rpcs = 0;
> +	struct drm_i915_private *dev_priv = ctx->i915;
> +
> +	if (INTEL_GEN(dev_priv) < 8)
> +		return 0;
> +
> +	if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
> +		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
> +		rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT;
> +		rpcs |= GEN8_RPCS_ENABLE;
> +	}
> +
> +	if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) {
> +		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
> +		rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT;
> +		rpcs |= GEN8_RPCS_ENABLE;
> +	}
> +
> +	if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
> +		rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT;
> +		rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT;
> +		rpcs |= GEN8_RPCS_ENABLE;
> +	}
> +
> +	return rpcs;
> +}
> +
>   static void
>   __execlists_update_reg_state(const struct intel_context *ce,
>   			     const struct intel_engine_cs *engine)
> @@ -2452,8 +2482,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
>   
>   	/* RPCS */
>   	if (engine->class == RENDER_CLASS) {
> -		regs[CTX_R_PWR_CLK_STATE] =
> -			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> +		if (engine->i915->predictive_load_enable) {
> +			regs[CTX_R_PWR_CLK_STATE] =
> +				get_context_rpcs_config(ce->gem_context);

You cannot do it like this because you break OA and Gen11 user 
configured SSEU.

You need to have intel_sseu_make_rpcs below be the central decision 
maker on what is the correct SSEU config to apply.

Order of precedence should be:

1. OA compatible configuration
2. User requested configuration (via context set param
3. Dynamic SSEU suggested configuration
4. Defaults

Perhaps we need a marker on ce saying that user configuration has been 
set and then this code can remain as it were, just when you are applying 
Dynamic SSEU settings you do something like:

   if (!ce->user_sseu_set)
	  ce->sseu = make_dynamic_sseu(your optimal config);

I'll explain in the next patch what I am thinking in more detail.

Regards,

Tvrtko

> +		} else {
> +			regs[CTX_R_PWR_CLK_STATE] =
> +				intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> +		}
>   
>   		i915_oa_init_reg_state(ce, engine);
>   	}
> @@ -2485,6 +2520,9 @@ __execlists_context_pin(struct intel_context *ce,
>   	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
>   	__execlists_update_reg_state(ce, engine);
>   
> +	if (ce->gem_context->load_type != ce->gem_context->pending_load_type)
> +		ce->gem_context->load_type = ce->gem_context->pending_load_type;
> +
>   	return 0;
>   
>   unpin_active:
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fdae5a9..3064ddf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -999,6 +999,11 @@ struct drm_i915_private {
>   	/* protects panel power sequencer state */
>   	struct mutex pps_mutex;
>   
> +	/* optimal slice/subslice/EU configration state */
> +	struct i915_sseu_optimum_config *opt_config;
> +
> +	int predictive_load_enable;
> +
>   	unsigned int fsb_freq, mem_freq, is_ddr3;
>   	unsigned int skl_preferred_vco_freq;
>   	unsigned int max_cdclk_freq;
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index a5b5713..b3c2f92 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -873,6 +873,34 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
>   	RUNTIME_INFO(i915)->platform_mask[pi] |= mask;
>   }
>   
> +/* static table of slice/subslice/EU for Cherryview */
> +static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
> +	{1, 1, 4},	/* Low */
> +	{1, 1, 6},	/* Medium */
> +	{1, 2, 6}	/* High */
> +};
> +
> +/* static table of slice/subslice/EU for GLK GT1 */
> +static const struct i915_sseu_optimum_config glk_gt1_config[LOAD_TYPE_LAST] = {
> +	{1, 2, 2},	/* Low */
> +	{1, 2, 3},	/* Medium */
> +	{1, 2, 6}	/* High */
> +};
> +
> +/* static table of slice/subslice/EU for KBL GT2 */
> +static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
> +	{1, 3, 2},	/* Low */
> +	{1, 3, 4},	/* Medium */
> +	{1, 3, 8}	/* High */
> +};
> +
> +/* static table of slice/subslice/EU for KBL GT3 */
> +static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
> +	{2, 3, 4},	/* Low */
> +	{2, 3, 6},	/* Medium */
> +	{2, 3, 8}	/* High */
> +};
> +
>   /**
>    * intel_device_info_runtime_init - initialize runtime info
>    * @dev_priv: the i915 device
> @@ -894,6 +922,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv);
>   	enum pipe pipe;
> +	struct i915_sseu_optimum_config *opt_config = NULL;
>   
>   	if (INTEL_GEN(dev_priv) >= 10) {
>   		for_each_pipe(dev_priv, pipe)
> @@ -999,12 +1028,38 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   	/* Initialize slice/subslice/EU info */
>   	if (IS_HASWELL(dev_priv))
>   		haswell_sseu_info_init(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> +	else if (IS_CHERRYVIEW(dev_priv)) {
>   		cherryview_sseu_info_init(dev_priv);
> +		opt_config = (struct i915_sseu_optimum_config *)chv_config;
> +		BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
> +	}
>   	else if (IS_BROADWELL(dev_priv))
>   		broadwell_sseu_info_init(dev_priv);
> -	else if (IS_GEN(dev_priv, 9))
> +	else if (IS_GEN(dev_priv, 9)) {
>   		gen9_sseu_info_init(dev_priv);
> +
> +		switch (info->gt) {
> +		default: /* fall through */
> +		case 1:
> +			opt_config = (struct i915_sseu_optimum_config *)
> +						glk_gt1_config;
> +			BUILD_BUG_ON(ARRAY_SIZE(glk_gt1_config)
> +						!= LOAD_TYPE_LAST);
> +		break;
> +		case 2:
> +			opt_config = (struct i915_sseu_optimum_config *)
> +						kbl_gt2_config;
> +			BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config)
> +						!= LOAD_TYPE_LAST);
> +		break;
> +		case 3:
> +			opt_config = (struct i915_sseu_optimum_config *)
> +						kbl_gt3_config;
> +			BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config)
> +						!= LOAD_TYPE_LAST);
> +		break;
> +		}
> +	}
>   	else if (IS_GEN(dev_priv, 10))
>   		gen10_sseu_info_init(dev_priv);
>   	else if (IS_GEN(dev_priv, 11))
> @@ -1017,6 +1072,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   		info->ppgtt_type = INTEL_PPGTT_NONE;
>   	}
>   
> +	if (opt_config)
> +		dev_priv->opt_config = opt_config;
> +
>   	/* Initialize command stream timestamp frequency */
>   	runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
>   }
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 8288fb9..ac94f92 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -713,10 +713,28 @@  i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 
 	trace_i915_context_create(ctx);
 	atomic_set(&ctx->req_cnt, 0);
+	ctx->slice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.slice_mask);
+	ctx->subslice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.subslice_mask[0]);
+	ctx->eu_cnt = RUNTIME_INFO(i915)->sseu.eu_per_subslice;
 
 	return ctx;
 }
 
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+				    enum gem_load_type type)
+{
+	struct drm_i915_private *dev_priv = ctx->i915;
+
+	if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+		return;
+
+	/* Call opt_config to get correct configuration for eu,slice,subslice */
+	ctx->slice_cnt = dev_priv->opt_config[type].slice;
+	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+	ctx->eu_cnt = dev_priv->opt_config[type].eu;
+	ctx->pending_load_type = type;
+}
+
 static void
 destroy_kernel_context(struct i915_gem_context **ctxp)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 18e50a7..8677427 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -177,6 +177,8 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+				enum gem_load_type type);
 
 struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 3931c06..6847d49 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -40,6 +40,19 @@  struct i915_gem_engines_iter {
 	const struct i915_gem_engines *engines;
 };
 
+enum gem_load_type {
+	LOAD_TYPE_LOW,
+	LOAD_TYPE_MEDIUM,
+	LOAD_TYPE_HIGH,
+	LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+	u8 slice;
+	u8 subslice;
+	u8 eu;
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -173,6 +186,25 @@  struct i915_gem_context {
 	 */
 	atomic_t req_cnt;
 
+	/** slice_cnt: used to set the # of slices to be enabled. */
+	u8 slice_cnt;
+
+	/** subslice_cnt: used to set the # of subslices to be enabled. */
+	u8 subslice_cnt;
+
+	/** eu_cnt: used to set the # of eu to be enabled. */
+	u8 eu_cnt;
+
+	/** load_type: The designated load_type (high/medium/low) for a given
+	 * number of pending commands in the command queue.
+	 */
+	enum gem_load_type load_type;
+
+	/** pending_load_type: The earlier load type that the GPU was configured
+	 * for (high/medium/low).
+	 */
+	enum gem_load_type pending_load_type;
+
 	/** jump_whitelist: Bit array for tracking cmds during cmdparsing
 	 *  Guarded by struct_mutex
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 511d5a1..c3f279e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2436,6 +2436,36 @@  static void execlists_context_unpin(struct intel_context *ce)
 	intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
+static u32
+get_context_rpcs_config(struct i915_gem_context *ctx)
+{
+	u32 rpcs = 0;
+	struct drm_i915_private *dev_priv = ctx->i915;
+
+	if (INTEL_GEN(dev_priv) < 8)
+		return 0;
+
+	if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
+		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+		rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT;
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) {
+		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+		rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT;
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
+		rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT;
+		rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT;
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	return rpcs;
+}
+
 static void
 __execlists_update_reg_state(const struct intel_context *ce,
 			     const struct intel_engine_cs *engine)
@@ -2452,8 +2482,13 @@  __execlists_update_reg_state(const struct intel_context *ce,
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS) {
-		regs[CTX_R_PWR_CLK_STATE] =
-			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+		if (engine->i915->predictive_load_enable) {
+			regs[CTX_R_PWR_CLK_STATE] =
+				get_context_rpcs_config(ce->gem_context);
+		} else {
+			regs[CTX_R_PWR_CLK_STATE] =
+				intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+		}
 
 		i915_oa_init_reg_state(ce, engine);
 	}
@@ -2485,6 +2520,9 @@  __execlists_context_pin(struct intel_context *ce,
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
+	if (ce->gem_context->load_type != ce->gem_context->pending_load_type)
+		ce->gem_context->load_type = ce->gem_context->pending_load_type;
+
 	return 0;
 
 unpin_active:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fdae5a9..3064ddf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -999,6 +999,11 @@  struct drm_i915_private {
 	/* protects panel power sequencer state */
 	struct mutex pps_mutex;
 
+	/* optimal slice/subslice/EU configration state */
+	struct i915_sseu_optimum_config *opt_config;
+
+	int predictive_load_enable;
+
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_preferred_vco_freq;
 	unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index a5b5713..b3c2f92 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -873,6 +873,34 @@  void intel_device_info_subplatform_init(struct drm_i915_private *i915)
 	RUNTIME_INFO(i915)->platform_mask[pi] |= mask;
 }
 
+/* static table of slice/subslice/EU for Cherryview */
+static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+	{1, 1, 4},	/* Low */
+	{1, 1, 6},	/* Medium */
+	{1, 2, 6}	/* High */
+};
+
+/* static table of slice/subslice/EU for GLK GT1 */
+static const struct i915_sseu_optimum_config glk_gt1_config[LOAD_TYPE_LAST] = {
+	{1, 2, 2},	/* Low */
+	{1, 2, 3},	/* Medium */
+	{1, 2, 6}	/* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT2 */
+static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+	{1, 3, 2},	/* Low */
+	{1, 3, 4},	/* Medium */
+	{1, 3, 8}	/* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT3 */
+static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+	{2, 3, 4},	/* Low */
+	{2, 3, 6},	/* Medium */
+	{2, 3, 8}	/* High */
+};
+
 /**
  * intel_device_info_runtime_init - initialize runtime info
  * @dev_priv: the i915 device
@@ -894,6 +922,7 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv);
 	enum pipe pipe;
+	struct i915_sseu_optimum_config *opt_config = NULL;
 
 	if (INTEL_GEN(dev_priv) >= 10) {
 		for_each_pipe(dev_priv, pipe)
@@ -999,12 +1028,38 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	/* Initialize slice/subslice/EU info */
 	if (IS_HASWELL(dev_priv))
 		haswell_sseu_info_init(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
+	else if (IS_CHERRYVIEW(dev_priv)) {
 		cherryview_sseu_info_init(dev_priv);
+		opt_config = (struct i915_sseu_optimum_config *)chv_config;
+		BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
+	}
 	else if (IS_BROADWELL(dev_priv))
 		broadwell_sseu_info_init(dev_priv);
-	else if (IS_GEN(dev_priv, 9))
+	else if (IS_GEN(dev_priv, 9)) {
 		gen9_sseu_info_init(dev_priv);
+
+		switch (info->gt) {
+		default: /* fall through */
+		case 1:
+			opt_config = (struct i915_sseu_optimum_config *)
+						glk_gt1_config;
+			BUILD_BUG_ON(ARRAY_SIZE(glk_gt1_config)
+						!= LOAD_TYPE_LAST);
+		break;
+		case 2:
+			opt_config = (struct i915_sseu_optimum_config *)
+						kbl_gt2_config;
+			BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config)
+						!= LOAD_TYPE_LAST);
+		break;
+		case 3:
+			opt_config = (struct i915_sseu_optimum_config *)
+						kbl_gt3_config;
+			BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config)
+						!= LOAD_TYPE_LAST);
+		break;
+		}
+	}
 	else if (IS_GEN(dev_priv, 10))
 		gen10_sseu_info_init(dev_priv);
 	else if (IS_GEN(dev_priv, 11))
@@ -1017,6 +1072,9 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 		info->ppgtt_type = INTEL_PPGTT_NONE;
 	}
 
+	if (opt_config)
+		dev_priv->opt_config = opt_config;
+
 	/* Initialize command stream timestamp frequency */
 	runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
 }