Message ID | 20190923235152.35979-3-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Engine relative MMIO | expand |
On 24/09/2019 00:51, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > Gen12 introduces a completely new and different scheme for > implementing engine relative MMIO accesses - MI_LRI_MMIO_REMAP. This > requires using the base address of instance zero of the relevant > engine class. And then, it is only valid if the register in > question falls within a certain range as specified by a table. > > v2: Add support for fused parts where instance zero of a given engine > class may be missing. Make aux_tables presence a device flag rather > than just hard coded. Pre-calculate the correct LRI base address > rather than using a per-instance base and then adding on a delta to > the correct base later. Make all the table range checking a debug only > feature - the theory is that all driver access should be within the > remap ranges. [Daniele] > > v3: Re-base on Mika's changes. This removes all the abstraction layer. > Which also means there is no common code path that all LRI accesses go > via. Thus it is not possible to implement the range check. However, as > noted in v2, the range check should not be necessary anyway. > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > CC: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 27 +++++++++++++++++--- > drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 9 ++++--- > drivers/gpu/drm/i915/gt/intel_mocs.c | 6 ++--- > drivers/gpu/drm/i915/i915_perf.c | 3 ++- > drivers/gpu/drm/i915/intel_device_info.c | 14 ++++++++++ > drivers/gpu/drm/i915/intel_device_info.h | 1 + > 7 files changed, 49 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 5aa58e069806..09fbbffce419 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -247,14 +247,32 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine) > return true; > } > > -static void lri_init(struct intel_engine_cs *engine) > +static void lri_init(struct intel_engine_cs *engine, u32 first_instance) > { > if (i915_engine_has_relative_lri(engine)) { > - engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11; > - engine->lri_mmio_base = 0; > + if (INTEL_GEN(engine->i915) < 12) { > + engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11; > + engine->lri_mmio_base = 0;//engine->mmio_base; No C++ style comments please. > + engine->lri_engine = engine; > + } else { > + engine->lri_cmd_mode = MI_LRI_MMIO_REMAP_GEN12; > + > + engine->lri_engine = engine->gt->engine_class > + [engine->class][first_instance]; > + GEM_BUG_ON(!engine->lri_engine); > + engine->lri_mmio_base = engine->lri_engine->mmio_base; > + > + /* According to the bspec, accesses should be compared Preferred style: /* * Blah */ > + * against the remap table and remapping only enabled > + * if found. In practice, all driver accesses should be > + * to remap registers. So, no need for the complication > + * of checking against any device specific tables. > + */ > + } > } else { > engine->lri_cmd_mode = 0; > engine->lri_mmio_base = engine->mmio_base; > + engine->lri_engine = engine; > } If it helps at all you could save one indentation level by doing: if (!supports_lri) { } else if (GEN == 11) { } else { } > } > > @@ -342,7 +360,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) > /* Nothing to do here, execute in order of dependencies */ > engine->schedule = NULL; > > - lri_init(engine); > + lri_init(engine, > + INTEL_INFO(engine->i915)->first_instance[engine->class]); > > seqlock_init(&engine->stats.lock); > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 38f486f7f7e3..62caa74e8558 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -308,6 +308,7 @@ struct intel_engine_cs { > > u32 lri_mmio_base; > u32 lri_cmd_mode; > + struct intel_engine_cs *lri_engine; > > u32 uabi_capabilities; > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > index bfb51d8d7ce2..9c5be384026f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > @@ -133,15 +133,16 @@ > * simply ignores the register load under certain conditions. > * - One can actually load arbitrary many arbitrary registers: Simply issue x > * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! > - * - Newer hardware supports engine relative addressing but older hardware does > - * not. This is required for hw engine load balancing. Hence the MI_LRI > - * instruction itself is prefixed with '__' and should only be used on > - * legacy hardware code paths. Generic code must always use the MI_LRI > + * - Newer hardware supports engine relative addressing but using multiple > + * incompatible schemes. This is required for hw engine load balancing. Hence > + * the MI_LRI instruction itself is prefixed with '__' and should only be Is the double underscore reference out of date now? > + * used on legacy hardware code paths. Generic code must always use the MI_LRI > * and i915_get_lri_reg() helper functions instead. > */ > #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) > /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ > #define MI_LRI_FORCE_POSTED (1<<12) > +#define MI_LRI_MMIO_REMAP_GEN12 BIT(17) > #define MI_LRI_ADD_CS_MMIO_START_GEN11 BIT(19) > #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) > #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > index 8e8fe3deb95c..e121fea5b33c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > @@ -438,7 +438,7 @@ static void intel_mocs_init_global(struct intel_gt *gt) > static int emit_mocs_control_table(struct i915_request *rq, > const struct drm_i915_mocs_table *table) > { > - enum intel_engine_id engine = rq->engine->id; > + enum intel_engine_id engine_id = rq->engine->lri_engine->id; Observation: If you don't rename the local diff is smaller. > unsigned int index; > u32 unused_value; > u32 *cs; > @@ -459,13 +459,13 @@ static int emit_mocs_control_table(struct i915_request *rq, > for (index = 0; index < table->size; index++) { > u32 value = get_entry_control(table, index); > > - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); > + *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index)); > *cs++ = value; > } > > /* All remaining entries are also unused */ > for (; index < table->n_entries; index++) { > - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); > + *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index)); > *cs++ = unused_value; > } > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 8b85cdfada21..fd628ae12343 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1695,7 +1695,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, > > /* > * NB: The LRI instruction is generated by the hardware. > - * Should we read it in and assert that the offset flag is set? > + * Should we read it in and assert that the appropriate > + * offset flag is set? > */ > > CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index 85e480bdc673..dece012d3ad4 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -1030,6 +1030,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) > u32 media_fuse; > u16 vdbox_mask; > u16 vebox_mask; > + bool found; > > if (INTEL_GEN(dev_priv) < 11) > return; > @@ -1040,6 +1041,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) > vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> > GEN11_GT_VEBOX_DISABLE_SHIFT; > > + found = false; > for (i = 0; i < I915_MAX_VCS; i++) { > if (!HAS_ENGINE(dev_priv, _VCS(i))) { > vdbox_mask &= ~BIT(i); > @@ -1059,11 +1061,17 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) > */ > if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0) > RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i); > + > + if (!found) { > + found = true; > + info->first_instance[VIDEO_DECODE_CLASS] = i; > + } > } > DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n", > vdbox_mask, VDBOX_MASK(dev_priv)); > GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv)); > > + found = false; > for (i = 0; i < I915_MAX_VECS; i++) { > if (!HAS_ENGINE(dev_priv, _VECS(i))) { > vebox_mask &= ~BIT(i); > @@ -1073,6 +1081,12 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) > if (!(BIT(i) & vebox_mask)) { > info->engine_mask &= ~BIT(_VECS(i)); > DRM_DEBUG_DRIVER("vecs%u fused off\n", i); > + continue; > + } > + > + if (!found) { > + found = true; > + info->first_instance[VIDEO_ENHANCEMENT_CLASS] = i; > } > } > DRM_DEBUG_DRIVER("vebox enable: %04x, instances: %04lx\n", > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index 0cdc2465534b..75a9950969fb 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -152,6 +152,7 @@ struct intel_device_info { > u8 gen; > u8 gt; /* GT number, 0 if undefined */ > intel_engine_mask_t engine_mask; /* Engines supported by the HW */ > + u32 first_instance[MAX_ENGINE_CLASS]; /* instance 0 might be fused */ u32 is overkill however do you even need to store this persistently? Looks like all that is needed at runtime is engine->lri_engine? So you could lookup the first instance from intel_engine_setup. Or if I missed something and you do need it at runtime, then should go to runtime info since we want to avoid dependencies on mkwrite_device_info where possible. > > enum intel_platform platform; > > Regards, Tvrtko P.S. Just a superficial scan, not a normal review.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5aa58e069806..09fbbffce419 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -247,14 +247,32 @@ static bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine) return true; } -static void lri_init(struct intel_engine_cs *engine) +static void lri_init(struct intel_engine_cs *engine, u32 first_instance) { if (i915_engine_has_relative_lri(engine)) { - engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11; - engine->lri_mmio_base = 0; + if (INTEL_GEN(engine->i915) < 12) { + engine->lri_cmd_mode = MI_LRI_ADD_CS_MMIO_START_GEN11; + engine->lri_mmio_base = 0;//engine->mmio_base; + engine->lri_engine = engine; + } else { + engine->lri_cmd_mode = MI_LRI_MMIO_REMAP_GEN12; + + engine->lri_engine = engine->gt->engine_class + [engine->class][first_instance]; + GEM_BUG_ON(!engine->lri_engine); + engine->lri_mmio_base = engine->lri_engine->mmio_base; + + /* According to the bspec, accesses should be compared + * against the remap table and remapping only enabled + * if found. In practice, all driver accesses should be + * to remap registers. So, no need for the complication + * of checking against any device specific tables. + */ + } } else { engine->lri_cmd_mode = 0; engine->lri_mmio_base = engine->mmio_base; + engine->lri_engine = engine; } } @@ -342,7 +360,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; - lri_init(engine); + lri_init(engine, + INTEL_INFO(engine->i915)->first_instance[engine->class]); seqlock_init(&engine->stats.lock); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 38f486f7f7e3..62caa74e8558 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -308,6 +308,7 @@ struct intel_engine_cs { u32 lri_mmio_base; u32 lri_cmd_mode; + struct intel_engine_cs *lri_engine; u32 uabi_capabilities; diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index bfb51d8d7ce2..9c5be384026f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -133,15 +133,16 @@ * simply ignores the register load under certain conditions. * - One can actually load arbitrary many arbitrary registers: Simply issue x * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! - * - Newer hardware supports engine relative addressing but older hardware does - * not. This is required for hw engine load balancing. Hence the MI_LRI - * instruction itself is prefixed with '__' and should only be used on - * legacy hardware code paths. Generic code must always use the MI_LRI + * - Newer hardware supports engine relative addressing but using multiple + * incompatible schemes. This is required for hw engine load balancing. Hence + * the MI_LRI instruction itself is prefixed with '__' and should only be + * used on legacy hardware code paths. Generic code must always use the MI_LRI * and i915_get_lri_reg() helper functions instead. */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LRI_MMIO_REMAP_GEN12 BIT(17) #define MI_LRI_ADD_CS_MMIO_START_GEN11 BIT(19) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 8e8fe3deb95c..e121fea5b33c 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -438,7 +438,7 @@ static void intel_mocs_init_global(struct intel_gt *gt) static int emit_mocs_control_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { - enum intel_engine_id engine = rq->engine->id; + enum intel_engine_id engine_id = rq->engine->lri_engine->id; unsigned int index; u32 unused_value; u32 *cs; @@ -459,13 +459,13 @@ static int emit_mocs_control_table(struct i915_request *rq, for (index = 0; index < table->size; index++) { u32 value = get_entry_control(table, index); - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index)); *cs++ = value; } /* All remaining entries are also unused */ for (; index < table->n_entries; index++) { - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = i915_mmio_reg_offset(mocs_register(engine_id, index)); *cs++ = unused_value; } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8b85cdfada21..fd628ae12343 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1695,7 +1695,8 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, /* * NB: The LRI instruction is generated by the hardware. - * Should we read it in and assert that the offset flag is set? + * Should we read it in and assert that the appropriate + * offset flag is set? */ CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 85e480bdc673..dece012d3ad4 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -1030,6 +1030,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) u32 media_fuse; u16 vdbox_mask; u16 vebox_mask; + bool found; if (INTEL_GEN(dev_priv) < 11) return; @@ -1040,6 +1041,7 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> GEN11_GT_VEBOX_DISABLE_SHIFT; + found = false; for (i = 0; i < I915_MAX_VCS; i++) { if (!HAS_ENGINE(dev_priv, _VCS(i))) { vdbox_mask &= ~BIT(i); @@ -1059,11 +1061,17 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) */ if (IS_TIGERLAKE(dev_priv) || logical_vdbox++ % 2 == 0) RUNTIME_INFO(dev_priv)->vdbox_sfc_access |= BIT(i); + + if (!found) { + found = true; + info->first_instance[VIDEO_DECODE_CLASS] = i; + } } DRM_DEBUG_DRIVER("vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(dev_priv)); GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv)); + found = false; for (i = 0; i < I915_MAX_VECS; i++) { if (!HAS_ENGINE(dev_priv, _VECS(i))) { vebox_mask &= ~BIT(i); @@ -1073,6 +1081,12 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) if (!(BIT(i) & vebox_mask)) { info->engine_mask &= ~BIT(_VECS(i)); DRM_DEBUG_DRIVER("vecs%u fused off\n", i); + continue; + } + + if (!found) { + found = true; + info->first_instance[VIDEO_ENHANCEMENT_CLASS] = i; } } DRM_DEBUG_DRIVER("vebox enable: %04x, instances: %04lx\n", diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0cdc2465534b..75a9950969fb 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -152,6 +152,7 @@ struct intel_device_info { u8 gen; u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t engine_mask; /* Engines supported by the HW */ + u32 first_instance[MAX_ENGINE_CLASS]; /* instance 0 might be fused */ enum intel_platform platform;