diff mbox series

[v3,3/4] drm/i915: set optimum eu/slice/sub-slice configuration based on load type

Message ID 1544523261-26905-4-git-send-email-ankit.p.navik@intel.com (mailing list archive)
State New, archived
Headers show
Series Dynamic EU configuration of Slice/Subslice/EU. | expand

Commit Message

Ankit Navik Dec. 11, 2018, 10:14 a.m. UTC
From: Praveen Diwakar <praveen.diwakar@intel.com>

This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).

It also introduce flag update_render_config which can set by any governor.

v2:
 * Move static optimum_config to device init time.
 * Rename function to appropriate name, fix data types and patch ordering.
 * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)

v3:
 * Add safe guard check in i915_gem_context_set_load_type.
 * Rename struct from optimum_config to i915_sseu_optimum_config to
   avoid namespace clashes.
 * Reduces memcpy for space efficient.
 * Rebase.
 * Improved commit message. (Tvrtko Ursulin)

Cc: Kedar J Karanje <kedar.j.karanje@intel.com>
Cc: Yogesh Marathe <yogesh.marathe@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  3 ++
 drivers/gpu/drm/i915/i915_gem_context.c  | 18 ++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h  | 25 +++++++++++++++++
 drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_lrc.c         |  4 ++-
 5 files changed, 94 insertions(+), 3 deletions(-)

Comments

Tvrtko Ursulin Dec. 11, 2018, 12:47 p.m. UTC | #1
On 11/12/2018 10:14, Ankit Navik wrote:
> From: Praveen Diwakar <praveen.diwakar@intel.com>
> 
> This patch will select optimum eu/slice/sub-slice configuration based on
> type of load (low, medium, high) as input.
> Based on our readings and experiments we have predefined set of optimum
> configuration for each platform(CHT, KBL).
> i915_gem_context_set_load_type will select optimum configuration from
> pre-defined optimum configuration table(opt_config).
> 
> It also introduce flag update_render_config which can set by any governor.
> 
> v2:
>   * Move static optimum_config to device init time.
>   * Rename function to appropriate name, fix data types and patch ordering.
>   * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
> 
> v3:
>   * Add safe guard check in i915_gem_context_set_load_type.
>   * Rename struct from optimum_config to i915_sseu_optimum_config to
>     avoid namespace clashes.
>   * Reduces memcpy for space efficient.
>   * Rebase.
>   * Improved commit message. (Tvrtko Ursulin)
> 
> Cc: Kedar J Karanje <kedar.j.karanje@intel.com>
> Cc: Yogesh Marathe <yogesh.marathe@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>

Again for the record no, I did not r-b this.

> Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  3 ++
>   drivers/gpu/drm/i915/i915_gem_context.c  | 18 ++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h  | 25 +++++++++++++++++
>   drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/intel_lrc.c         |  4 ++-
>   5 files changed, 94 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4aca534..4b9a8c5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1681,6 +1681,9 @@ struct drm_i915_private {
>   	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
>   	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
>   
> +	/* optimal slice/subslice/EU configration state */
> +	struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];

Make it a pointer to struct i915_sseu_optimum_config.

> +
>   	unsigned int fsb_freq, mem_freq, is_ddr3;
>   	unsigned int skl_preferred_vco_freq;
>   	unsigned int max_cdclk_freq;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index d040858..c0ced72 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -392,10 +392,28 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   	ctx->subslice_cnt = hweight8(
>   			INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
>   	ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> +	ctx->load_type = 0;
> +	ctx->pending_load_type = 0;

Not needed because we zero the allocation and probably depend on 
untouched fields being zero elsewhere.

>   
>   	return ctx;
>   }
>   
> +
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> +		enum gem_load_type type)
> +{
> +	struct drm_i915_private *dev_priv = ctx->i915;
> +
> +	if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
> +		return;
> +
> +	/* Call opt_config to get correct configuration for eu,slice,subslice */
> +	ctx->slice_cnt = dev_priv->opt_config[type].slice;
> +	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
> +	ctx->eu_cnt = dev_priv->opt_config[type].eu;
> +	ctx->pending_load_type = type;
> +}
> +
>   /**
>    * i915_gem_context_create_gvt - create a GVT GEM context
>    * @dev: drm device *
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index e000530..a0db13c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -53,6 +53,19 @@ struct intel_context_ops {
>   	void (*destroy)(struct intel_context *ce);
>   };
>   
> +enum gem_load_type {
> +	LOAD_TYPE_LOW,
> +	LOAD_TYPE_MEDIUM,
> +	LOAD_TYPE_HIGH,
> +	LOAD_TYPE_LAST
> +};
> +
> +struct i915_sseu_optimum_config {
> +	u8 slice;
> +	u8 subslice;
> +	u8 eu;
> +};
> +
>   /**
>    * struct i915_gem_context - client state
>    *
> @@ -208,6 +221,16 @@ struct i915_gem_context {
>   
>   	/** eu_cnt: used to set the # of eu to be enabled. */
>   	u8 eu_cnt;
> +
> +	/** load_type: The designated load_type (high/medium/low) for a given
> +	 * number of pending commands in the command queue.
> +	 */
> +	enum gem_load_type load_type;
> +
> +	/** pending_load_type: The earlier load type that the GPU was configured
> +	 * for (high/medium/low).
> +	 */
> +	enum gem_load_type pending_load_type;
>   };
>   
>   static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> @@ -336,6 +359,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file_priv);
>   int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
>   				       struct drm_file *file);
> +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> +		enum gem_load_type type);
>   
>   struct i915_gem_context *
>   i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 0ef0c64..33c6310 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -741,6 +741,28 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
>   		container_of(info, struct drm_i915_private, info);
>   	enum pipe pipe;
>   
> +	struct i915_sseu_optimum_config *opt_config = NULL;
> +	/* static table of slice/subslice/EU for Cherryview */
> +	struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
> +		{1, 1, 4},	/* Low */
> +		{1, 1, 6},	/* Medium */
> +		{1, 2, 6}	/* High */
> +	};
> +
> +	/* static table of slice/subslice/EU for KBL GT2 */
> +	struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
> +		{1, 3, 2},	/* Low */
> +		{1, 3, 4},	/* Medium */
> +		{1, 3, 8}	/* High */
> +	};
> +
> +	/* static table of slice/subslice/EU for KBL GT3 */
> +	struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
> +		{2, 3, 4},	/* Low */
> +		{2, 3, 6},	/* Medium */
> +		{2, 3, 8}	/* High */
> +	};

Move these to file scope as static const.

> +
>   	if (INTEL_GEN(dev_priv) >= 10) {
>   		for_each_pipe(dev_priv, pipe)
>   			info->num_scalers[pipe] = 2;
> @@ -840,17 +862,38 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
>   	/* Initialize slice/subslice/EU info */
>   	if (IS_HASWELL(dev_priv))
>   		haswell_sseu_info_init(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> +	else if (IS_CHERRYVIEW(dev_priv)) {
>   		cherryview_sseu_info_init(dev_priv);
> +		opt_config = chv_config;

For my idea to work here and below add:

BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);

> +	}
>   	else if (IS_BROADWELL(dev_priv))
>   		broadwell_sseu_info_init(dev_priv);
> -	else if (INTEL_GEN(dev_priv) == 9)
> +	else if (INTEL_GEN(dev_priv) == 9) {
>   		gen9_sseu_info_init(dev_priv);
> +
> +		if (IS_KABYLAKE(dev_priv)) {
> +			switch (info->gt) {
> +			default:
> +				MISSING_CASE(info->gt);

This should probably be silent since absence of sseu tuning is not an 
error on other platforms.

> +				/* fall through */
> +			case 2:
> +				opt_config = kbl_gt2_config;
> +			break;
> +			case 3:
> +				opt_config = kbl_gt3_config;

Would Kabylake table work for other Gen9 platforms?

> +			break;
> +			}
> +		}
> +	}
>   	else if (INTEL_GEN(dev_priv) == 10)
>   		gen10_sseu_info_init(dev_priv);
>   	else if (INTEL_GEN(dev_priv) >= 11)
>   		gen11_sseu_info_init(dev_priv);
>   
> +	if (opt_config)
> +		memcpy(dev_priv->opt_config, opt_config, LOAD_TYPE_LAST *
> +			sizeof(struct i915_sseu_optimum_config));

And if you agree with my other suggestions then you can replace a copy with:

	dev_priv->opt_config = opt_config;

Or in fact, even do it directly on assignment sites above.

> +
>   	/* Initialize command stream timestamp frequency */
>   	info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a17f676..7fb9cd2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request *rq)
>   
>   	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
>   	/* FIXME: To avoid stale rpcs config, move it to context_pin */
> -	if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
> +	if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
> +			(ctx->load_type != ctx->pending_load_type)) {
>   		rpcs_config = make_rpcs(ctx->i915);
>   		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>   		CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
>   				rpcs_config);
> +		ctx->load_type = ctx->pending_load_type;
>   	}
>   
>   	/* True 32b PPGTT with dynamic page allocation: update PDP
> 

Regards,

Tvrtko
Ankit Navik Dec. 12, 2018, 7:36 a.m. UTC | #2
Hi Tvrtko, 

> On Tue, Dec 11, 2018 at 6:17 PM Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote:
> 
> 
> On 11/12/2018 10:14, Ankit Navik wrote:
> > From: Praveen Diwakar <praveen.diwakar@intel.com>
> >
> > This patch will select optimum eu/slice/sub-slice configuration based
> > on type of load (low, medium, high) as input.
> > Based on our readings and experiments we have predefined set of
> > optimum configuration for each platform(CHT, KBL).
> > i915_gem_context_set_load_type will select optimum configuration from
> > pre-defined optimum configuration table(opt_config).
> >
> > It also introduce flag update_render_config which can set by any governor.
> >
> > v2:
> >   * Move static optimum_config to device init time.
> >   * Rename function to appropriate name, fix data types and patch ordering.
> >   * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
> >
> > v3:
> >   * Add safe guard check in i915_gem_context_set_load_type.
> >   * Rename struct from optimum_config to i915_sseu_optimum_config to
> >     avoid namespace clashes.
> >   * Reduces memcpy for space efficient.
> >   * Rebase.
> >   * Improved commit message. (Tvrtko Ursulin)
> >
> > Cc: Kedar J Karanje <kedar.j.karanje@intel.com>
> > Cc: Yogesh Marathe <yogesh.marathe@intel.com>
> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> 
> Again for the record no, I did not r-b this.

Sorry, I wasn't aware that r-b has to be added by reviewer. Will take care in next patch sets.
> 
> > Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> > Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> > Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_drv.h          |  3 ++
> >   drivers/gpu/drm/i915/i915_gem_context.c  | 18 ++++++++++++
> >   drivers/gpu/drm/i915/i915_gem_context.h  | 25 +++++++++++++++++
> >   drivers/gpu/drm/i915/intel_device_info.c | 47
> ++++++++++++++++++++++++++++++--
> >   drivers/gpu/drm/i915/intel_lrc.c         |  4 ++-
> >   5 files changed, 94 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h index 4aca534..4b9a8c5 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1681,6 +1681,9 @@ struct drm_i915_private {
> >   	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /*
> assume 965 */
> >   	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
> >
> > +	/* optimal slice/subslice/EU configration state */
> > +	struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];
> 
> Make it a pointer to struct i915_sseu_optimum_config.

Will incorporate this and other reviews in next patch.
> 
> > +
> >   	unsigned int fsb_freq, mem_freq, is_ddr3;
> >   	unsigned int skl_preferred_vco_freq;
> >   	unsigned int max_cdclk_freq;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
> > b/drivers/gpu/drm/i915/i915_gem_context.c
> > index d040858..c0ced72 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -392,10 +392,28 @@ i915_gem_create_context(struct drm_i915_private
> *dev_priv,
> >   	ctx->subslice_cnt = hweight8(
> >   			INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
> >   	ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> > +	ctx->load_type = 0;
> > +	ctx->pending_load_type = 0;
> 
> Not needed because we zero the allocation and probably depend on untouched
> fields being zero elsewhere.
> 
> >
> >   	return ctx;
> >   }
> >
> > +
> > +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> > +		enum gem_load_type type)
> > +{
> > +	struct drm_i915_private *dev_priv = ctx->i915;
> > +
> > +	if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
> > +		return;
> > +
> > +	/* Call opt_config to get correct configuration for eu,slice,subslice */
> > +	ctx->slice_cnt = dev_priv->opt_config[type].slice;
> > +	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
> > +	ctx->eu_cnt = dev_priv->opt_config[type].eu;
> > +	ctx->pending_load_type = type;
> > +}
> > +
> >   /**
> >    * i915_gem_context_create_gvt - create a GVT GEM context
> >    * @dev: drm device *
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h
> > b/drivers/gpu/drm/i915/i915_gem_context.h
> > index e000530..a0db13c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -53,6 +53,19 @@ struct intel_context_ops {
> >   	void (*destroy)(struct intel_context *ce);
> >   };
> >
> > +enum gem_load_type {
> > +	LOAD_TYPE_LOW,
> > +	LOAD_TYPE_MEDIUM,
> > +	LOAD_TYPE_HIGH,
> > +	LOAD_TYPE_LAST
> > +};
> > +
> > +struct i915_sseu_optimum_config {
> > +	u8 slice;
> > +	u8 subslice;
> > +	u8 eu;
> > +};
> > +
> >   /**
> >    * struct i915_gem_context - client state
> >    *
> > @@ -208,6 +221,16 @@ struct i915_gem_context {
> >
> >   	/** eu_cnt: used to set the # of eu to be enabled. */
> >   	u8 eu_cnt;
> > +
> > +	/** load_type: The designated load_type (high/medium/low) for a given
> > +	 * number of pending commands in the command queue.
> > +	 */
> > +	enum gem_load_type load_type;
> > +
> > +	/** pending_load_type: The earlier load type that the GPU was
> configured
> > +	 * for (high/medium/low).
> > +	 */
> > +	enum gem_load_type pending_load_type;
> >   };
> >
> >   static inline bool i915_gem_context_is_closed(const struct
> > i915_gem_context *ctx) @@ -336,6 +359,8 @@ int
> i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >   				    struct drm_file *file_priv);
> >   int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
> >   				       struct drm_file *file);
> > +void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
> > +		enum gem_load_type type);
> >
> >   struct i915_gem_context *
> >   i915_gem_context_create_kernel(struct drm_i915_private *i915, int
> > prio); diff --git a/drivers/gpu/drm/i915/intel_device_info.c
> > b/drivers/gpu/drm/i915/intel_device_info.c
> > index 0ef0c64..33c6310 100644
> > --- a/drivers/gpu/drm/i915/intel_device_info.c
> > +++ b/drivers/gpu/drm/i915/intel_device_info.c
> > @@ -741,6 +741,28 @@ void intel_device_info_runtime_init(struct
> intel_device_info *info)
> >   		container_of(info, struct drm_i915_private, info);
> >   	enum pipe pipe;
> >
> > +	struct i915_sseu_optimum_config *opt_config = NULL;
> > +	/* static table of slice/subslice/EU for Cherryview */
> > +	struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
> > +		{1, 1, 4},	/* Low */
> > +		{1, 1, 6},	/* Medium */
> > +		{1, 2, 6}	/* High */
> > +	};
> > +
> > +	/* static table of slice/subslice/EU for KBL GT2 */
> > +	struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
> > +		{1, 3, 2},	/* Low */
> > +		{1, 3, 4},	/* Medium */
> > +		{1, 3, 8}	/* High */
> > +	};
> > +
> > +	/* static table of slice/subslice/EU for KBL GT3 */
> > +	struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
> > +		{2, 3, 4},	/* Low */
> > +		{2, 3, 6},	/* Medium */
> > +		{2, 3, 8}	/* High */
> > +	};
> 
> Move these to file scope as static const.
> 
> > +
> >   	if (INTEL_GEN(dev_priv) >= 10) {
> >   		for_each_pipe(dev_priv, pipe)
> >   			info->num_scalers[pipe] = 2;
> > @@ -840,17 +862,38 @@ void intel_device_info_runtime_init(struct
> intel_device_info *info)
> >   	/* Initialize slice/subslice/EU info */
> >   	if (IS_HASWELL(dev_priv))
> >   		haswell_sseu_info_init(dev_priv);
> > -	else if (IS_CHERRYVIEW(dev_priv))
> > +	else if (IS_CHERRYVIEW(dev_priv)) {
> >   		cherryview_sseu_info_init(dev_priv);
> > +		opt_config = chv_config;
> 
> For my idea to work here and below add:
> 
> BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
> 
> > +	}
> >   	else if (IS_BROADWELL(dev_priv))
> >   		broadwell_sseu_info_init(dev_priv);
> > -	else if (INTEL_GEN(dev_priv) == 9)
> > +	else if (INTEL_GEN(dev_priv) == 9) {
> >   		gen9_sseu_info_init(dev_priv);
> > +
> > +		if (IS_KABYLAKE(dev_priv)) {
> > +			switch (info->gt) {
> > +			default:
> > +				MISSING_CASE(info->gt);
> 
> This should probably be silent since absence of sseu tuning is not an error on
> other platforms.
> 
> > +				/* fall through */
> > +			case 2:
> > +				opt_config = kbl_gt2_config;
> > +			break;
> > +			case 3:
> > +				opt_config = kbl_gt3_config;
> 
> Would Kabylake table work for other Gen9 platforms?

The table is designed from heuristic data (collected data from gen9 GT3 based).
It should work ideally for other platform which is having 48 EUs
(2-slices, 3-sub-slices, 8-EUs);  but I haven't verified on other gen9 config.
I will check and update further in next patch set.

Thank you for your valuable feedback!

Regards,  Ankit
> 
> > +			break;
> > +			}
> > +		}
> > +	}
> >   	else if (INTEL_GEN(dev_priv) == 10)
> >   		gen10_sseu_info_init(dev_priv);
> >   	else if (INTEL_GEN(dev_priv) >= 11)
> >   		gen11_sseu_info_init(dev_priv);
> >
> > +	if (opt_config)
> > +		memcpy(dev_priv->opt_config, opt_config, LOAD_TYPE_LAST *
> > +			sizeof(struct i915_sseu_optimum_config));
> 
> And if you agree with my other suggestions then you can replace a copy with:
> 
> 	dev_priv->opt_config = opt_config;
> 
> Or in fact, even do it directly on assignment sites above.
> 
> > +
> >   	/* Initialize command stream timestamp frequency */
> >   	info->cs_timestamp_frequency_khz =
> read_timestamp_frequency(dev_priv);
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> > b/drivers/gpu/drm/i915/intel_lrc.c
> > index a17f676..7fb9cd2 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -427,11 +427,13 @@ static u64 execlists_update_context(struct
> > i915_request *rq)
> >
> >   	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
> >   	/* FIXME: To avoid stale rpcs config, move it to context_pin */
> > -	if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
> > +	if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
> > +			(ctx->load_type != ctx->pending_load_type)) {
> >   		rpcs_config = make_rpcs(ctx->i915);
> >   		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> >   		CTX_REG(reg_state, CTX_R_PWR_CLK_STATE,
> GEN8_R_PWR_CLK_STATE,
> >   				rpcs_config);
> > +		ctx->load_type = ctx->pending_load_type;
> >   	}
> >
> >   	/* True 32b PPGTT with dynamic page allocation: update PDP
> >
> 
> Regards,
> 
> Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4aca534..4b9a8c5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1681,6 +1681,9 @@  struct drm_i915_private {
 	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
 	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
+	/* optimal slice/subslice/EU configration state */
+	struct i915_sseu_optimum_config opt_config[LOAD_TYPE_LAST];
+
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_preferred_vco_freq;
 	unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d040858..c0ced72 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -392,10 +392,28 @@  i915_gem_create_context(struct drm_i915_private *dev_priv,
 	ctx->subslice_cnt = hweight8(
 			INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
 	ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+	ctx->load_type = 0;
+	ctx->pending_load_type = 0;
 
 	return ctx;
 }
 
+
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+		enum gem_load_type type)
+{
+	struct drm_i915_private *dev_priv = ctx->i915;
+
+	if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+		return;
+
+	/* Call opt_config to get correct configuration for eu,slice,subslice */
+	ctx->slice_cnt = dev_priv->opt_config[type].slice;
+	ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+	ctx->eu_cnt = dev_priv->opt_config[type].eu;
+	ctx->pending_load_type = type;
+}
+
 /**
  * i915_gem_context_create_gvt - create a GVT GEM context
  * @dev: drm device *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index e000530..a0db13c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -53,6 +53,19 @@  struct intel_context_ops {
 	void (*destroy)(struct intel_context *ce);
 };
 
+enum gem_load_type {
+	LOAD_TYPE_LOW,
+	LOAD_TYPE_MEDIUM,
+	LOAD_TYPE_HIGH,
+	LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+	u8 slice;
+	u8 subslice;
+	u8 eu;
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -208,6 +221,16 @@  struct i915_gem_context {
 
 	/** eu_cnt: used to set the # of eu to be enabled. */
 	u8 eu_cnt;
+
+	/** load_type: The designated load_type (high/medium/low) for a given
+	 * number of pending commands in the command queue.
+	 */
+	enum gem_load_type load_type;
+
+	/** pending_load_type: The earlier load type that the GPU was configured
+	 * for (high/medium/low).
+	 */
+	enum gem_load_type pending_load_type;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
@@ -336,6 +359,8 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+		enum gem_load_type type);
 
 struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 0ef0c64..33c6310 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -741,6 +741,28 @@  void intel_device_info_runtime_init(struct intel_device_info *info)
 		container_of(info, struct drm_i915_private, info);
 	enum pipe pipe;
 
+	struct i915_sseu_optimum_config *opt_config = NULL;
+	/* static table of slice/subslice/EU for Cherryview */
+	struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+		{1, 1, 4},	/* Low */
+		{1, 1, 6},	/* Medium */
+		{1, 2, 6}	/* High */
+	};
+
+	/* static table of slice/subslice/EU for KBL GT2 */
+	struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+		{1, 3, 2},	/* Low */
+		{1, 3, 4},	/* Medium */
+		{1, 3, 8}	/* High */
+	};
+
+	/* static table of slice/subslice/EU for KBL GT3 */
+	struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+		{2, 3, 4},	/* Low */
+		{2, 3, 6},	/* Medium */
+		{2, 3, 8}	/* High */
+	};
+
 	if (INTEL_GEN(dev_priv) >= 10) {
 		for_each_pipe(dev_priv, pipe)
 			info->num_scalers[pipe] = 2;
@@ -840,17 +862,38 @@  void intel_device_info_runtime_init(struct intel_device_info *info)
 	/* Initialize slice/subslice/EU info */
 	if (IS_HASWELL(dev_priv))
 		haswell_sseu_info_init(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
+	else if (IS_CHERRYVIEW(dev_priv)) {
 		cherryview_sseu_info_init(dev_priv);
+		opt_config = chv_config;
+	}
 	else if (IS_BROADWELL(dev_priv))
 		broadwell_sseu_info_init(dev_priv);
-	else if (INTEL_GEN(dev_priv) == 9)
+	else if (INTEL_GEN(dev_priv) == 9) {
 		gen9_sseu_info_init(dev_priv);
+
+		if (IS_KABYLAKE(dev_priv)) {
+			switch (info->gt) {
+			default:
+				MISSING_CASE(info->gt);
+				/* fall through */
+			case 2:
+				opt_config = kbl_gt2_config;
+			break;
+			case 3:
+				opt_config = kbl_gt3_config;
+			break;
+			}
+		}
+	}
 	else if (INTEL_GEN(dev_priv) == 10)
 		gen10_sseu_info_init(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 11)
 		gen11_sseu_info_init(dev_priv);
 
+	if (opt_config)
+		memcpy(dev_priv->opt_config, opt_config, LOAD_TYPE_LAST *
+			sizeof(struct i915_sseu_optimum_config));
+
 	/* Initialize command stream timestamp frequency */
 	info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a17f676..7fb9cd2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -427,11 +427,13 @@  static u64 execlists_update_context(struct i915_request *rq)
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 	/* FIXME: To avoid stale rpcs config, move it to context_pin */
-	if (ctx->pid && ctx->name && (rq->engine->id == RCS)) {
+	if (ctx->pid && ctx->name && (rq->engine->id == RCS) &&
+			(ctx->load_type != ctx->pending_load_type)) {
 		rpcs_config = make_rpcs(ctx->i915);
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 				rpcs_config);
+		ctx->load_type = ctx->pending_load_type;
 	}
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP