diff mbox series

[v2,3/4] drm/i915: set optimum eu/slice/sub-slice configuration based on load type

Message ID 1541477601-10883-4-git-send-email-ankit.p.navik@intel.com (mailing list archive)
State New, archived
Headers show
Series Dynamic EU configuration of Slice/Subslice/EU. | expand

Commit Message

Ankit Navik Nov. 6, 2018, 4:13 a.m. UTC
From: Praveen Diwakar <praveen.diwakar@intel.com>

This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).

It also introduce flag update_render_config which can set by any governor.

Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.c  | 21 +++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h  | 30 ++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_device_info.c | 44 ++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_lrc.c         |  4 ++-
 5 files changed, 98 insertions(+), 3 deletions(-)

Comments

Tvrtko Ursulin Nov. 6, 2018, 10:34 a.m. UTC | #1
On 06/11/2018 04:13, Ankit Navik wrote:
> From: Praveen Diwakar <praveen.diwakar@intel.com>
> 
> This patch will select optimum eu/slice/sub-slice configuration based on
> type of load (low, medium, high) as input.
> Based on our readings and experiments we have predefined set of optimum
> configuration for each platform(CHT, KBL).
> i915_gem_context_set_load_type will select optimum configuration from
> pre-defined optimum configuration table(opt_config).
> 
> It also introduce flag update_render_config which can set by any governor.

Patch changelog is missing here and in the first patch.

> Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>   drivers/gpu/drm/i915/i915_gem_context.c  | 21 +++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h  | 30 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_device_info.c | 44 ++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/intel_lrc.c         |  4 ++-
>   5 files changed, 98 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 137ec33..f1b16d0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1686,6 +1686,8 @@ struct drm_i915_private {
>   	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
>   	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
>   
> +	struct optimum_config opt_config[LOAD_TYPE_MAX];
> +
>   	unsigned int fsb_freq, mem_freq, is_ddr3;
>   	unsigned int skl_preferred_vco_freq;
>   	unsigned int max_cdclk_freq;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index d040858..28ff868 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -388,14 +388,35 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   
>   	trace_i915_context_create(ctx);
>   	atomic_set(&ctx->req_cnt, 0);
> +	ctx->update_render_config = 0;
>   	ctx->slice_cnt = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask);
>   	ctx->subslice_cnt = hweight8(
>   			INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
>   	ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> +	ctx->load_type = 0;
> +	ctx->pending_load_type = 0;
>   
>   	return ctx;
>   }
>   
> +
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> +		enum gem_load_type type)
> +{
> +	struct drm_i915_private *dev_priv = ctx->i915;
> +

To be safe I suggest:

if (GEM_WARN_ON(type > LOAD_TYPE_MAX))
	return;

> +	/* Call opt_config to get correct configuration for eu,slice,subslice */
> +	ctx->slice_cnt = dev_priv->opt_config[type].slice;
> +	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
> +	ctx->eu_cnt = dev_priv->opt_config[type].eu;
> +
> +	/* Enabling this to update the rpcs */
> +	if (ctx->pending_load_type != type)
> +		ctx->update_render_config = 1;
> +
> +	ctx->pending_load_type = type;
> +}
> +
>   /**
>    * i915_gem_context_create_gvt - create a GVT GEM context
>    * @dev: drm device *
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 2b3bf09..9345aa3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -53,6 +53,19 @@ struct intel_context_ops {
>   	void (*destroy)(struct intel_context *ce);
>   };
>   
> +enum gem_load_type {
> +	LOAD_TYPE_LOW,
> +	LOAD_TYPE_MEDIUM,
> +	LOAD_TYPE_HIGH,
> +	LOAD_TYPE_MAX

Max is actually max plus one. Maybe instead have it as:

	LOAD_TYPE_LAST = LOAD_TYPE_HIGH,

> +};
> +
> +struct optimum_config {

Struct name needs to be more specific to avoid namespace clashes. 
Perhaps i915_sseu_optimum_config?

> +	u8 slice;
> +	u8 subslice;
> +	u8 eu;
> +};
> +
>   /**
>    * struct i915_gem_context - client state
>    *
> @@ -209,6 +222,21 @@ struct i915_gem_context {
>   
>   	/** eu_cnt: used to set the # of eu to be enabled. */
>   	u8 eu_cnt;
> +
> +	/** update_render_config: to track the updates to the render
> +	 * configuration (S/SS/EU Configuration on the GPU)
> +	 */
> +	bool update_render_config;
> +
> +	/** load_type: The designated load_type (high/medium/low) for a given
> +	 * number of pending commands in the command queue.
> +	 */
> +	enum gem_load_type load_type;

load_type looks unused in this patch.

> +
> +	/** pending_load_type: The earlier load type that the GPU was configured
> +	 * for (high/medium/low).
> +	 */
> +	enum gem_load_type pending_load_type;
>   };
>   
>   static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> @@ -337,6 +365,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file_priv);
>   int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
>   				       struct drm_file *file);
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> +		enum gem_load_type type);
>   
>   struct i915_gem_context *
>   i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 0ef0c64..4c6224a 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -741,6 +741,27 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
>   		container_of(info, struct drm_i915_private, info);
>   	enum pipe pipe;
>   
> +	/* static table of slice/subslice/EU for Cherrytrial */

Cherryview? At least a typo in trail.

> +	struct optimum_config chv_config[LOAD_TYPE_MAX] = {
> +		{1, 1, 4},	/* Low */
> +		{1, 1, 6},	/* Medium */
> +		{1, 2, 6}	/* High */
> +	};
> +
> +	/* static table of slice/subslice/EU for KBL GT2 */
> +	struct optimum_config kbl_gt2_config[LOAD_TYPE_MAX] = {
> +		{1, 3, 2},	/* Low */
> +		{1, 3, 4},	/* Medium */
> +		{1, 3, 8}	/* High */
> +	};
> +
> +	/* static table of slice/subslice/EU for KBL GT3 */
> +	struct optimum_config kbl_gt3_config[LOAD_TYPE_MAX] = {
> +		{2, 3, 4},	/* Low */
> +		{2, 3, 6},	/* Medium */
> +		{2, 3, 8}	/* High */
> +	};
> +
>   	if (INTEL_GEN(dev_priv) >= 10) {
>   		for_each_pipe(dev_priv, pipe)
>   			info->num_scalers[pipe] = 2;
> @@ -840,12 +861,31 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
>   	/* Initialize slice/subslice/EU info */
>   	if (IS_HASWELL(dev_priv))
>   		haswell_sseu_info_init(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> +	else if (IS_CHERRYVIEW(dev_priv)) {
>   		cherryview_sseu_info_init(dev_priv);
> +		memcpy(dev_priv->opt_config, chv_config, sizeof(chv_config));
> +	}
>   	else if (IS_BROADWELL(dev_priv))
>   		broadwell_sseu_info_init(dev_priv);
> -	else if (INTEL_GEN(dev_priv) == 9)
> +	else if (INTEL_GEN(dev_priv) == 9) {
>   		gen9_sseu_info_init(dev_priv);
> +
> +		if (IS_KABYLAKE(dev_priv)) {
> +			switch (info->gt) {
> +			default:
> +				MISSING_CASE(info->gt);
> +				/* fall through */
> +			case 2:
> +				memcpy(dev_priv->opt_config, kbl_gt2_config,
> +						sizeof(kbl_gt2_config));
> +			break;
> +			case 3:
> +				memcpy(dev_priv->opt_config, kbl_gt3_config,
> +						sizeof(kbl_gt3_config));

As an alternative to many memcpy you could have:

	opt_config = NULL;
	...
	opt_config = kbl_gt2_config;
	...
	if (opt_config)
		memcpy(dev_priv->opt_config, opt_config, ...);

Or you could actually just store the pointer to rodata table in 
dev_priv, if you moved the tables to global scope as static const.

Both approaches are increasingly space efficient I think.

> +			break;
> +			}
> +		}
> +	}
>   	else if (INTEL_GEN(dev_priv) == 10)
>   		gen10_sseu_info_init(dev_priv);
>   	else if (INTEL_GEN(dev_priv) >= 11)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a8ab71a..4d000eb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request *rq)
>   
>   	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
>   	/* FIXME: To avoid stale rpcs config, move it to context_pin */
> -	if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
> +	if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
> +			ctx->update_render_config) {

Could you do if ctx->load_type != ctx->pending_load_type here and lose 
the need for ctx->update_render_config?

>   		rpcs_config = make_rpcs(ctx->i915);
>   		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>   		CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
>   				rpcs_config);
> +		ctx->update_render_config = 0;

In that case would need to do ctx->load_type = ctx->pending_load_type here.

Regards,

Tvrtko

>   	}
>   
>   	/* True 32b PPGTT with dynamic page allocation: update PDP
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 137ec33..f1b16d0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1686,6 +1686,8 @@  struct drm_i915_private {
 	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
 	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
+	struct optimum_config opt_config[LOAD_TYPE_MAX];
+
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_preferred_vco_freq;
 	unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d040858..28ff868 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -388,14 +388,35 @@  i915_gem_create_context(struct drm_i915_private *dev_priv,
 
 	trace_i915_context_create(ctx);
 	atomic_set(&ctx->req_cnt, 0);
+	ctx->update_render_config = 0;
 	ctx->slice_cnt = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask);
 	ctx->subslice_cnt = hweight8(
 			INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
 	ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+	ctx->load_type = 0;
+	ctx->pending_load_type = 0;
 
 	return ctx;
 }
 
+
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+		enum gem_load_type type)
+{
+	struct drm_i915_private *dev_priv = ctx->i915;
+
+	/* Call opt_config to get correct configuration for eu,slice,subslice */
+	ctx->slice_cnt = dev_priv->opt_config[type].slice;
+	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+	ctx->eu_cnt = dev_priv->opt_config[type].eu;
+
+	/* Enabling this to update the rpcs */
+	if (ctx->pending_load_type != type)
+		ctx->update_render_config = 1;
+
+	ctx->pending_load_type = type;
+}
+
 /**
  * i915_gem_context_create_gvt - create a GVT GEM context
  * @dev: drm device *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 2b3bf09..9345aa3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -53,6 +53,19 @@  struct intel_context_ops {
 	void (*destroy)(struct intel_context *ce);
 };
 
+enum gem_load_type {
+	LOAD_TYPE_LOW,
+	LOAD_TYPE_MEDIUM,
+	LOAD_TYPE_HIGH,
+	LOAD_TYPE_MAX
+};
+
+struct optimum_config {
+	u8 slice;
+	u8 subslice;
+	u8 eu;
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -209,6 +222,21 @@  struct i915_gem_context {
 
 	/** eu_cnt: used to set the # of eu to be enabled. */
 	u8 eu_cnt;
+
+	/** update_render_config: to track the updates to the render
+	 * configuration (S/SS/EU Configuration on the GPU)
+	 */
+	bool update_render_config;
+
+	/** load_type: The designated load_type (high/medium/low) for a given
+	 * number of pending commands in the command queue.
+	 */
+	enum gem_load_type load_type;
+
+	/** pending_load_type: The earlier load type that the GPU was configured
+	 * for (high/medium/low).
+	 */
+	enum gem_load_type pending_load_type;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
@@ -337,6 +365,8 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+		enum gem_load_type type);
 
 struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 0ef0c64..4c6224a 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -741,6 +741,27 @@  void intel_device_info_runtime_init(struct intel_device_info *info)
 		container_of(info, struct drm_i915_private, info);
 	enum pipe pipe;
 
+	/* static table of slice/subslice/EU for Cherrytrial */
+	struct optimum_config chv_config[LOAD_TYPE_MAX] = {
+		{1, 1, 4},	/* Low */
+		{1, 1, 6},	/* Medium */
+		{1, 2, 6}	/* High */
+	};
+
+	/* static table of slice/subslice/EU for KBL GT2 */
+	struct optimum_config kbl_gt2_config[LOAD_TYPE_MAX] = {
+		{1, 3, 2},	/* Low */
+		{1, 3, 4},	/* Medium */
+		{1, 3, 8}	/* High */
+	};
+
+	/* static table of slice/subslice/EU for KBL GT3 */
+	struct optimum_config kbl_gt3_config[LOAD_TYPE_MAX] = {
+		{2, 3, 4},	/* Low */
+		{2, 3, 6},	/* Medium */
+		{2, 3, 8}	/* High */
+	};
+
 	if (INTEL_GEN(dev_priv) >= 10) {
 		for_each_pipe(dev_priv, pipe)
 			info->num_scalers[pipe] = 2;
@@ -840,12 +861,31 @@  void intel_device_info_runtime_init(struct intel_device_info *info)
 	/* Initialize slice/subslice/EU info */
 	if (IS_HASWELL(dev_priv))
 		haswell_sseu_info_init(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
+	else if (IS_CHERRYVIEW(dev_priv)) {
 		cherryview_sseu_info_init(dev_priv);
+		memcpy(dev_priv->opt_config, chv_config, sizeof(chv_config));
+	}
 	else if (IS_BROADWELL(dev_priv))
 		broadwell_sseu_info_init(dev_priv);
-	else if (INTEL_GEN(dev_priv) == 9)
+	else if (INTEL_GEN(dev_priv) == 9) {
 		gen9_sseu_info_init(dev_priv);
+
+		if (IS_KABYLAKE(dev_priv)) {
+			switch (info->gt) {
+			default:
+				MISSING_CASE(info->gt);
+				/* fall through */
+			case 2:
+				memcpy(dev_priv->opt_config, kbl_gt2_config,
+						sizeof(kbl_gt2_config));
+			break;
+			case 3:
+				memcpy(dev_priv->opt_config, kbl_gt3_config,
+						sizeof(kbl_gt3_config));
+			break;
+			}
+		}
+	}
 	else if (INTEL_GEN(dev_priv) == 10)
 		gen10_sseu_info_init(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 11)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a8ab71a..4d000eb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -427,11 +427,13 @@  static u64 execlists_update_context(struct i915_request *rq)
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 	/* FIXME: To avoid stale rpcs config, move it to context_pin */
-	if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
+	if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
+			ctx->update_render_config) {
 		rpcs_config = make_rpcs(ctx->i915);
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 				rpcs_config);
+		ctx->update_render_config = 0;
 	}
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP