Message ID | 20180522180002.11522-3-lionel.g.landwerlin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 22/05/2018 18:59, Lionel Landwerlin wrote: > From: Chris Wilson <chris@chris-wilson.co.uk> > > We want to expose the ability to reconfigure the slices, subslice and > eu per context and per engine. To facilitate that, store the current > configuration on the context for each engine, which is initially set > to the device default upon creation. > > v2: record sseu configuration per context & engine (Chris) > > v3: introduce the i915_gem_context_sseu to store powergating > programming, sseu_dev_info has grown quite a bit (Lionel) > > v4: rename i915_gem_sseu into intel_sseu (Chris) > use to_intel_context() (Chris) > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_gem_context.c | 9 +++++++++ > drivers/gpu/drm/i915/i915_gem_context.h | 17 +++++++++++++++++ > drivers/gpu/drm/i915/i915_request.h | 13 +++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 22 +++++++++++----------- > 4 files changed, 50 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index b69b18ef8120..ea9ae1046827 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -260,6 +260,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, > struct drm_i915_file_private *file_priv) > { > struct i915_gem_context *ctx; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > unsigned int n; > int ret; > > @@ -315,6 +317,13 @@ __create_hw_context(struct drm_i915_private *dev_priv, > * is no remap info, it will be a NOP. */ > ctx->remap_slice = ALL_L3_SLICES(dev_priv); > > + /* On all engines, use the whole device by default */ > + for_each_engine(engine, dev_priv, id) { > + struct intel_context *ce = to_intel_context(ctx, engine); > + > + ce->sseu = intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu); > + } > + > i915_gem_context_set_bannable(ctx); > ctx->ring_size = 4 * PAGE_SIZE; > ctx->desc_template = > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h > index c3262b4dd2ee..b18d870c4932 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/i915_gem_context.h > @@ -30,6 +30,7 @@ > #include <linux/radix-tree.h> > > #include "i915_gem.h" > +#include "intel_device_info.h" > > struct pid; > > @@ -157,6 +158,9 @@ struct i915_gem_context { > int pin_count; > > const struct intel_context_ops *ops; > + > + /** sseu: Control eu/slice partitioning */ > + union intel_sseu sseu; > } __engine[I915_NUM_ENGINES]; > > /** ring_size: size for allocating the per-engine ring buffer */ > @@ -335,4 +339,17 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) > kref_put(&ctx->ref, i915_gem_context_release); > } > > +static inline union intel_sseu > +intel_sseu_from_device_sseu(const struct sseu_dev_info *sseu) > +{ > + union intel_sseu value = { > + .slice_mask = sseu->slice_mask, > + .subslice_mask = sseu->subslice_mask[0], > + .min_eus_per_subslice = sseu->max_eus_per_subslice, > + .max_eus_per_subslice = sseu->max_eus_per_subslice, > + }; > + Why do we need two internal representations for SSEU configuration? (Comes back later.) Ok, the INTEL_INFO one is much larger and intel_sseu presumably is designed to only contain data which is dynamically changeable? > + return value; > +} > + > #endif /* !__I915_GEM_CONTEXT_H__ */ > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 1bbbb7a9fa03..aca60895582b 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -39,6 +39,19 @@ struct drm_i915_gem_object; > struct i915_request; > struct i915_timeline; > > +/* > + * Powergating configuration for a particular (context,engine). > + */ > +union intel_sseu { > + struct { > + u8 slice_mask; > + u8 subslice_mask; > + u8 min_eus_per_subslice; > + u8 max_eus_per_subslice; > + }; > + u64 value; Why is the union needed? > +}; > + > struct intel_wait { > struct rb_node node; > struct task_struct *tsk; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index c2500c209c63..f875be03eadb 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -2481,8 +2481,8 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) > return logical_ring_init(engine); > } > > -static u32 > -make_rpcs(struct drm_i915_private *dev_priv) > +static u32 make_rpcs(const struct sseu_dev_info *sseu, > + union intel_sseu ctx_sseu) > { > u32 rpcs = 0; > > @@ -2492,24 +2492,23 @@ make_rpcs(struct drm_i915_private *dev_priv) > * must make an explicit request through RPCS for full > * enablement. > */ > - if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { > + if (sseu->has_slice_pg) { > rpcs |= GEN8_RPCS_S_CNT_ENABLE; > - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << > - GEN8_RPCS_S_CNT_SHIFT; > + rpcs |= hweight8(ctx_sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT; > rpcs |= GEN8_RPCS_ENABLE; > } > > - if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { > + if (sseu->has_subslice_pg) { > rpcs |= GEN8_RPCS_SS_CNT_ENABLE; > - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << > + rpcs |= hweight8(ctx_sseu.subslice_mask) << > GEN8_RPCS_SS_CNT_SHIFT; > rpcs |= GEN8_RPCS_ENABLE; > } > > - if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { > - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << > + if (sseu->has_eu_pg) { > + rpcs |= ctx_sseu.min_eus_per_subslice << > GEN8_RPCS_EU_MIN_SHIFT; > - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << > + rpcs |= ctx_sseu.max_eus_per_subslice << > GEN8_RPCS_EU_MAX_SHIFT; > rpcs |= GEN8_RPCS_ENABLE; > } > @@ -2633,7 +2632,8 @@ static void execlists_init_reg_state(u32 *regs, > if (rcs) { > regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, > - make_rpcs(dev_priv)); > + make_rpcs(&INTEL_INFO(dev_priv)->sseu, > + ctx->__engine[engine->id].sseu)); to_intel_context(ctx, engine)->sseu ? > > i915_oa_init_reg_state(engine, ctx, regs); > } > Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-05-23 15:54:04) > > On 22/05/2018 18:59, Lionel Landwerlin wrote: > > @@ -315,6 +317,13 @@ __create_hw_context(struct drm_i915_private *dev_priv, > > * is no remap info, it will be a NOP. */ > > ctx->remap_slice = ALL_L3_SLICES(dev_priv); > > > > + /* On all engines, use the whole device by default */ > > + for_each_engine(engine, dev_priv, id) { > > + struct intel_context *ce = to_intel_context(ctx, engine); > > + > > + ce->sseu = intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu); > > + } See a few lines above where we iterate to setup all ce. > > @@ -2633,7 +2632,8 @@ static void execlists_init_reg_state(u32 *regs, > > if (rcs) { > > regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > > CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, > > - make_rpcs(dev_priv)); > > + make_rpcs(&INTEL_INFO(dev_priv)->sseu, > > + ctx->__engine[engine->id].sseu)); > > to_intel_context(ctx, engine)->sseu ? ce->sseu. Is it not passed in yet? Or is that part of the virtual engine patch? -Chris
On 23/05/18 15:54, Tvrtko Ursulin wrote: > > On 22/05/2018 18:59, Lionel Landwerlin wrote: >> From: Chris Wilson <chris@chris-wilson.co.uk> >> >> We want to expose the ability to reconfigure the slices, subslice and >> eu per context and per engine. To facilitate that, store the current >> configuration on the context for each engine, which is initially set >> to the device default upon creation. >> >> v2: record sseu configuration per context & engine (Chris) >> >> v3: introduce the i915_gem_context_sseu to store powergating >> programming, sseu_dev_info has grown quite a bit (Lionel) >> >> v4: rename i915_gem_sseu into intel_sseu (Chris) >> use to_intel_context() (Chris) >> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_gem_context.c | 9 +++++++++ >> drivers/gpu/drm/i915/i915_gem_context.h | 17 +++++++++++++++++ >> drivers/gpu/drm/i915/i915_request.h | 13 +++++++++++++ >> drivers/gpu/drm/i915/intel_lrc.c | 22 +++++++++++----------- >> 4 files changed, 50 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >> b/drivers/gpu/drm/i915/i915_gem_context.c >> index b69b18ef8120..ea9ae1046827 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >> @@ -260,6 +260,8 @@ __create_hw_context(struct drm_i915_private >> *dev_priv, >> struct drm_i915_file_private *file_priv) >> { >> struct i915_gem_context *ctx; >> + struct intel_engine_cs *engine; >> + enum intel_engine_id id; >> unsigned int n; >> int ret; >> @@ -315,6 +317,13 @@ __create_hw_context(struct drm_i915_private >> *dev_priv, >> * is no remap info, it will be a NOP. */ >> ctx->remap_slice = ALL_L3_SLICES(dev_priv); >> + /* On all engines, use the whole device by default */ >> + for_each_engine(engine, dev_priv, id) { >> + struct intel_context *ce = to_intel_context(ctx, engine); >> + >> + ce->sseu = >> intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu); >> + } >> + >> i915_gem_context_set_bannable(ctx); >> ctx->ring_size = 4 * PAGE_SIZE; >> ctx->desc_template = >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h >> b/drivers/gpu/drm/i915/i915_gem_context.h >> index c3262b4dd2ee..b18d870c4932 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.h >> +++ b/drivers/gpu/drm/i915/i915_gem_context.h >> @@ -30,6 +30,7 @@ >> #include <linux/radix-tree.h> >> #include "i915_gem.h" >> +#include "intel_device_info.h" >> struct pid; >> @@ -157,6 +158,9 @@ struct i915_gem_context { >> int pin_count; >> const struct intel_context_ops *ops; >> + >> + /** sseu: Control eu/slice partitioning */ >> + union intel_sseu sseu; >> } __engine[I915_NUM_ENGINES]; >> /** ring_size: size for allocating the per-engine ring buffer */ >> @@ -335,4 +339,17 @@ static inline void i915_gem_context_put(struct >> i915_gem_context *ctx) >> kref_put(&ctx->ref, i915_gem_context_release); >> } >> +static inline union intel_sseu >> +intel_sseu_from_device_sseu(const struct sseu_dev_info *sseu) >> +{ >> + union intel_sseu value = { >> + .slice_mask = sseu->slice_mask, >> + .subslice_mask = sseu->subslice_mask[0], >> + .min_eus_per_subslice = sseu->max_eus_per_subslice, >> + .max_eus_per_subslice = sseu->max_eus_per_subslice, >> + }; >> + > > Why do we need two internal representations for SSEU configuration? > > (Comes back later.) Ok, the INTEL_INFO one is much larger and > intel_sseu presumably is designed to only contain data which is > dynamically changeable? Correct. Copied in each context. > >> + return value; >> +} >> + >> #endif /* !__I915_GEM_CONTEXT_H__ */ >> diff --git a/drivers/gpu/drm/i915/i915_request.h >> b/drivers/gpu/drm/i915/i915_request.h >> index 1bbbb7a9fa03..aca60895582b 100644 >> --- a/drivers/gpu/drm/i915/i915_request.h >> +++ b/drivers/gpu/drm/i915/i915_request.h >> @@ -39,6 +39,19 @@ struct drm_i915_gem_object; >> struct i915_request; >> struct i915_timeline; >> +/* >> + * Powergating configuration for a particular (context,engine). >> + */ >> +union intel_sseu { >> + struct { >> + u8 slice_mask; >> + u8 subslice_mask; >> + u8 min_eus_per_subslice; >> + u8 max_eus_per_subslice; >> + }; >> + u64 value; > > Why is the union needed? Oh yeah, I should throw it out. > >> +}; >> + >> struct intel_wait { >> struct rb_node node; >> struct task_struct *tsk; >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index c2500c209c63..f875be03eadb 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -2481,8 +2481,8 @@ int logical_xcs_ring_init(struct >> intel_engine_cs *engine) >> return logical_ring_init(engine); >> } >> -static u32 >> -make_rpcs(struct drm_i915_private *dev_priv) >> +static u32 make_rpcs(const struct sseu_dev_info *sseu, >> + union intel_sseu ctx_sseu) >> { >> u32 rpcs = 0; >> @@ -2492,24 +2492,23 @@ make_rpcs(struct drm_i915_private *dev_priv) >> * must make an explicit request through RPCS for full >> * enablement. >> */ >> - if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { >> + if (sseu->has_slice_pg) { >> rpcs |= GEN8_RPCS_S_CNT_ENABLE; >> - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << >> - GEN8_RPCS_S_CNT_SHIFT; >> + rpcs |= hweight8(ctx_sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT; >> rpcs |= GEN8_RPCS_ENABLE; >> } >> - if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { >> + if (sseu->has_subslice_pg) { >> rpcs |= GEN8_RPCS_SS_CNT_ENABLE; >> - rpcs |= >> hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << >> + rpcs |= hweight8(ctx_sseu.subslice_mask) << >> GEN8_RPCS_SS_CNT_SHIFT; >> rpcs |= GEN8_RPCS_ENABLE; >> } >> - if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { >> - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << >> + if (sseu->has_eu_pg) { >> + rpcs |= ctx_sseu.min_eus_per_subslice << >> GEN8_RPCS_EU_MIN_SHIFT; >> - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << >> + rpcs |= ctx_sseu.max_eus_per_subslice << >> GEN8_RPCS_EU_MAX_SHIFT; >> rpcs |= GEN8_RPCS_ENABLE; >> } >> @@ -2633,7 +2632,8 @@ static void execlists_init_reg_state(u32 *regs, >> if (rcs) { >> regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); >> CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, >> - make_rpcs(dev_priv)); >> + make_rpcs(&INTEL_INFO(dev_priv)->sseu, >> + ctx->__engine[engine->id].sseu)); > > to_intel_context(ctx, engine)->sseu ? Thanks. > >> i915_oa_init_reg_state(engine, ctx, regs); >> } >> > > Regards, > > Tvrtko >
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b69b18ef8120..ea9ae1046827 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -260,6 +260,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) { struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + enum intel_engine_id id; unsigned int n; int ret; @@ -315,6 +317,13 @@ __create_hw_context(struct drm_i915_private *dev_priv, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(dev_priv); + /* On all engines, use the whole device by default */ + for_each_engine(engine, dev_priv, id) { + struct intel_context *ce = to_intel_context(ctx, engine); + + ce->sseu = intel_sseu_from_device_sseu(&INTEL_INFO(dev_priv)->sseu); + } + i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; ctx->desc_template = diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index c3262b4dd2ee..b18d870c4932 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -30,6 +30,7 @@ #include <linux/radix-tree.h> #include "i915_gem.h" +#include "intel_device_info.h" struct pid; @@ -157,6 +158,9 @@ struct i915_gem_context { int pin_count; const struct intel_context_ops *ops; + + /** sseu: Control eu/slice partitioning */ + union intel_sseu sseu; } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -335,4 +339,17 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline union intel_sseu +intel_sseu_from_device_sseu(const struct sseu_dev_info *sseu) +{ + union intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + #endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 1bbbb7a9fa03..aca60895582b 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -39,6 +39,19 @@ struct drm_i915_gem_object; struct i915_request; struct i915_timeline; +/* + * Powergating configuration for a particular (context,engine). + */ +union intel_sseu { + struct { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; + }; + u64 value; +}; + struct intel_wait { struct rb_node node; struct task_struct *tsk; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c2500c209c63..f875be03eadb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2481,8 +2481,8 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } -static u32 -make_rpcs(struct drm_i915_private *dev_priv) +static u32 make_rpcs(const struct sseu_dev_info *sseu, + union intel_sseu ctx_sseu) { u32 rpcs = 0; @@ -2492,24 +2492,23 @@ make_rpcs(struct drm_i915_private *dev_priv) * must make an explicit request through RPCS for full * enablement. */ - if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { + if (sseu->has_slice_pg) { rpcs |= GEN8_RPCS_S_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << - GEN8_RPCS_S_CNT_SHIFT; + rpcs |= hweight8(ctx_sseu.slice_mask) << GEN8_RPCS_S_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } - if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { + if (sseu->has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << + rpcs |= hweight8(ctx_sseu.subslice_mask) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } - if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << + if (sseu->has_eu_pg) { + rpcs |= ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << + rpcs |= ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } @@ -2633,7 +2632,8 @@ static void execlists_init_reg_state(u32 *regs, if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + make_rpcs(&INTEL_INFO(dev_priv)->sseu, + ctx->__engine[engine->id].sseu)); i915_oa_init_reg_state(engine, ctx, regs); }