Message ID | 1541477601-10883-4-git-send-email-ankit.p.navik@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Dynamic EU configuration of Slice/Subslice/EU. | expand |
On 06/11/2018 04:13, Ankit Navik wrote: > From: Praveen Diwakar <praveen.diwakar@intel.com> > > This patch will select optimum eu/slice/sub-slice configuration based on > type of load (low, medium, high) as input. > Based on our readings and experiments we have predefined set of optimum > configuration for each platform(CHT, KBL). > i915_gem_context_set_load_type will select optimum configuration from > pre-defined optimum configuration table(opt_config). > > It also introduce flag update_render_config which can set by any governor. Patch changelog is missing here and in the first patch. > Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com> > Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com> > Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com> > Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com> > Signed-off-by: Ankit Navik <ankit.p.navik@intel.com> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem_context.c | 21 +++++++++++++++ > drivers/gpu/drm/i915/i915_gem_context.h | 30 ++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_device_info.c | 44 ++++++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/intel_lrc.c | 4 ++- > 5 files changed, 98 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 137ec33..f1b16d0 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1686,6 +1686,8 @@ struct drm_i915_private { > struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */ > int num_fence_regs; /* 8 on pre-965, 16 otherwise */ > > + struct optimum_config opt_config[LOAD_TYPE_MAX]; > + > unsigned int fsb_freq, mem_freq, is_ddr3; > unsigned int skl_preferred_vco_freq; > unsigned int max_cdclk_freq; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index d040858..28ff868 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -388,14 +388,35 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, > > trace_i915_context_create(ctx); > atomic_set(&ctx->req_cnt, 0); > + ctx->update_render_config = 0; > ctx->slice_cnt = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask); > ctx->subslice_cnt = hweight8( > INTEL_INFO(dev_priv)->sseu.subslice_mask[0]); > ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; > + ctx->load_type = 0; > + ctx->pending_load_type = 0; > > return ctx; > } > > + > +void i915_gem_context_set_load_type(struct i915_gem_context *ctx, > + enum gem_load_type type) > +{ > + struct drm_i915_private *dev_priv = ctx->i915; > + To be safe I suggest: if (GEM_WARN_ON(type > LOAD_TYPE_MAX)) return; > + /* Call opt_config to get correct configuration for eu,slice,subslice */ > + ctx->slice_cnt = dev_priv->opt_config[type].slice; > + ctx->subslice_cnt = dev_priv->opt_config[type].subslice; > + ctx->eu_cnt = dev_priv->opt_config[type].eu; > + > + /* Enabling this to update the rpcs */ > + if (ctx->pending_load_type != type) > + ctx->update_render_config = 1; > + > + ctx->pending_load_type = type; > +} > + > /** > * i915_gem_context_create_gvt - create a GVT GEM context > * @dev: drm device * > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h > index 2b3bf09..9345aa3 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/i915_gem_context.h > @@ -53,6 +53,19 @@ struct intel_context_ops { > void (*destroy)(struct intel_context *ce); > }; > > +enum gem_load_type { > + LOAD_TYPE_LOW, > + LOAD_TYPE_MEDIUM, > + LOAD_TYPE_HIGH, > + LOAD_TYPE_MAX Max is actually max plus one. Maybe instead have it as: LOAD_TYPE_LAST = LOAD_TYPE_HIGH, > +}; > + > +struct optimum_config { Struct name needs to be more specific to avoid namespace clashes. Perhaps i915_sseu_optimum_config? > + u8 slice; > + u8 subslice; > + u8 eu; > +}; > + > /** > * struct i915_gem_context - client state > * > @@ -209,6 +222,21 @@ struct i915_gem_context { > > /** eu_cnt: used to set the # of eu to be enabled. */ > u8 eu_cnt; > + > + /** update_render_config: to track the updates to the render > + * configuration (S/SS/EU Configuration on the GPU) > + */ > + bool update_render_config; > + > + /** load_type: The designated load_type (high/medium/low) for a given > + * number of pending commands in the command queue. > + */ > + enum gem_load_type load_type; load_type looks unused in this patch. > + > + /** pending_load_type: The earlier load type that the GPU was configured > + * for (high/medium/low). > + */ > + enum gem_load_type pending_load_type; > }; > > static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) > @@ -337,6 +365,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > +void i915_gem_context_set_load_type(struct i915_gem_context *ctx, > + enum gem_load_type type); > > struct i915_gem_context * > i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index 0ef0c64..4c6224a 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -741,6 +741,27 @@ void intel_device_info_runtime_init(struct intel_device_info *info) > container_of(info, struct drm_i915_private, info); > enum pipe pipe; > > + /* static table of slice/subslice/EU for Cherrytrial */ Cherryview? At least a typo in trail. > + struct optimum_config chv_config[LOAD_TYPE_MAX] = { > + {1, 1, 4}, /* Low */ > + {1, 1, 6}, /* Medium */ > + {1, 2, 6} /* High */ > + }; > + > + /* static table of slice/subslice/EU for KBL GT2 */ > + struct optimum_config kbl_gt2_config[LOAD_TYPE_MAX] = { > + {1, 3, 2}, /* Low */ > + {1, 3, 4}, /* Medium */ > + {1, 3, 8} /* High */ > + }; > + > + /* static table of slice/subslice/EU for KBL GT3 */ > + struct optimum_config kbl_gt3_config[LOAD_TYPE_MAX] = { > + {2, 3, 4}, /* Low */ > + {2, 3, 6}, /* Medium */ > + {2, 3, 8} /* High */ > + }; > + > if (INTEL_GEN(dev_priv) >= 10) { > for_each_pipe(dev_priv, pipe) > info->num_scalers[pipe] = 2; > @@ -840,12 +861,31 @@ void intel_device_info_runtime_init(struct intel_device_info *info) > /* Initialize slice/subslice/EU info */ > if (IS_HASWELL(dev_priv)) > haswell_sseu_info_init(dev_priv); > - else if (IS_CHERRYVIEW(dev_priv)) > + else if (IS_CHERRYVIEW(dev_priv)) { > cherryview_sseu_info_init(dev_priv); > + memcpy(dev_priv->opt_config, chv_config, sizeof(chv_config)); > + } > else if (IS_BROADWELL(dev_priv)) > broadwell_sseu_info_init(dev_priv); > - else if (INTEL_GEN(dev_priv) == 9) > + else if (INTEL_GEN(dev_priv) == 9) { > gen9_sseu_info_init(dev_priv); > + > + if (IS_KABYLAKE(dev_priv)) { > + switch (info->gt) { > + default: > + MISSING_CASE(info->gt); > + /* fall through */ > + case 2: > + memcpy(dev_priv->opt_config, kbl_gt2_config, > + sizeof(kbl_gt2_config)); > + break; > + case 3: > + memcpy(dev_priv->opt_config, kbl_gt3_config, > + sizeof(kbl_gt3_config)); As an alternative to many memcpy you could have: opt_config = NULL; ... opt_config = kbl_gt2_config; ... if (opt_config) memcpy(dev_priv->opt_config, opt_config, ...); Or you could actually just store the pointer to rodata table in dev_priv, if you moved the tables to global scope as static const. Both approaches are increasingly space efficient I think. > + break; > + } > + } > + } > else if (INTEL_GEN(dev_priv) == 10) > gen10_sseu_info_init(dev_priv); > else if (INTEL_GEN(dev_priv) >= 11) > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index a8ab71a..4d000eb 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request *rq) > > reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); > /* FIXME: To avoid stale rpcs config, move it to context_pin */ > - if (ctx->pid && ctx->name && (rq->engine->id == RCS)) { > + if (ctx->pid && ctx->name && (rq->engine->id == RCS) && > + ctx->update_render_config) { Could you do if ctx->load_type != ctx->pending_load_type here and lose the need for ctx->update_render_config? > rpcs_config = make_rpcs(ctx->i915); > reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, > rpcs_config); > + ctx->update_render_config = 0; In that case would need to do ctx->load_type = ctx->pending_load_type here. Regards, Tvrtko > } > > /* True 32b PPGTT with dynamic page allocation: update PDP >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 137ec33..f1b16d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1686,6 +1686,8 @@ struct drm_i915_private { struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */ int num_fence_regs; /* 8 on pre-965, 16 otherwise */ + struct optimum_config opt_config[LOAD_TYPE_MAX]; + unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; unsigned int max_cdclk_freq; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d040858..28ff868 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -388,14 +388,35 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, trace_i915_context_create(ctx); atomic_set(&ctx->req_cnt, 0); + ctx->update_render_config = 0; ctx->slice_cnt = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask); ctx->subslice_cnt = hweight8( INTEL_INFO(dev_priv)->sseu.subslice_mask[0]); ctx->eu_cnt = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + ctx->load_type = 0; + ctx->pending_load_type = 0; return ctx; } + +void i915_gem_context_set_load_type(struct i915_gem_context *ctx, + enum gem_load_type type) +{ + struct drm_i915_private *dev_priv = ctx->i915; + + /* Call opt_config to get correct configuration for eu,slice,subslice */ + ctx->slice_cnt = dev_priv->opt_config[type].slice; + ctx->subslice_cnt = dev_priv->opt_config[type].subslice; + ctx->eu_cnt = dev_priv->opt_config[type].eu; + + /* Enabling this to update the rpcs */ + if (ctx->pending_load_type != type) + ctx->update_render_config = 1; + + ctx->pending_load_type = type; +} + /** * i915_gem_context_create_gvt - create a GVT GEM context * @dev: drm device * diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 2b3bf09..9345aa3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -53,6 +53,19 @@ struct intel_context_ops { void (*destroy)(struct intel_context *ce); }; +enum gem_load_type { + LOAD_TYPE_LOW, + LOAD_TYPE_MEDIUM, + LOAD_TYPE_HIGH, + LOAD_TYPE_MAX +}; + +struct optimum_config { + u8 slice; + u8 subslice; + u8 eu; +}; + /** * struct i915_gem_context - client state * @@ -209,6 +222,21 @@ struct i915_gem_context { /** eu_cnt: used to set the # of eu to be enabled. */ u8 eu_cnt; + + /** update_render_config: to track the updates to the render + * configuration (S/SS/EU Configuration on the GPU) + */ + bool update_render_config; + + /** load_type: The designated load_type (high/medium/low) for a given + * number of pending commands in the command queue. + */ + enum gem_load_type load_type; + + /** pending_load_type: The earlier load type that the GPU was configured + * for (high/medium/low). + */ + enum gem_load_type pending_load_type; }; static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) @@ -337,6 +365,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void i915_gem_context_set_load_type(struct i915_gem_context *ctx, + enum gem_load_type type); struct i915_gem_context * i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0ef0c64..4c6224a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -741,6 +741,27 @@ void intel_device_info_runtime_init(struct intel_device_info *info) container_of(info, struct drm_i915_private, info); enum pipe pipe; + /* static table of slice/subslice/EU for Cherrytrial */ + struct optimum_config chv_config[LOAD_TYPE_MAX] = { + {1, 1, 4}, /* Low */ + {1, 1, 6}, /* Medium */ + {1, 2, 6} /* High */ + }; + + /* static table of slice/subslice/EU for KBL GT2 */ + struct optimum_config kbl_gt2_config[LOAD_TYPE_MAX] = { + {1, 3, 2}, /* Low */ + {1, 3, 4}, /* Medium */ + {1, 3, 8} /* High */ + }; + + /* static table of slice/subslice/EU for KBL GT3 */ + struct optimum_config kbl_gt3_config[LOAD_TYPE_MAX] = { + {2, 3, 4}, /* Low */ + {2, 3, 6}, /* Medium */ + {2, 3, 8} /* High */ + }; + if (INTEL_GEN(dev_priv) >= 10) { for_each_pipe(dev_priv, pipe) info->num_scalers[pipe] = 2; @@ -840,12 +861,31 @@ void intel_device_info_runtime_init(struct intel_device_info *info) /* Initialize slice/subslice/EU info */ if (IS_HASWELL(dev_priv)) haswell_sseu_info_init(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) + else if (IS_CHERRYVIEW(dev_priv)) { cherryview_sseu_info_init(dev_priv); + memcpy(dev_priv->opt_config, chv_config, sizeof(chv_config)); + } else if (IS_BROADWELL(dev_priv)) broadwell_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) == 9) + else if (INTEL_GEN(dev_priv) == 9) { gen9_sseu_info_init(dev_priv); + + if (IS_KABYLAKE(dev_priv)) { + switch (info->gt) { + default: + MISSING_CASE(info->gt); + /* fall through */ + case 2: + memcpy(dev_priv->opt_config, kbl_gt2_config, + sizeof(kbl_gt2_config)); + break; + case 3: + memcpy(dev_priv->opt_config, kbl_gt3_config, + sizeof(kbl_gt3_config)); + break; + } + } + } else if (INTEL_GEN(dev_priv) == 10) gen10_sseu_info_init(dev_priv); else if (INTEL_GEN(dev_priv) >= 11) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a8ab71a..4d000eb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -427,11 +427,13 @@ static u64 execlists_update_context(struct i915_request *rq) reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); /* FIXME: To avoid stale rpcs config, move it to context_pin */ - if (ctx->pid && ctx->name && (rq->engine->id == RCS)) { + if (ctx->pid && ctx->name && (rq->engine->id == RCS) && + ctx->update_render_config) { rpcs_config = make_rpcs(ctx->i915); reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, rpcs_config); + ctx->update_render_config = 0; } /* True 32b PPGTT with dynamic page allocation: update PDP