Message ID | 20220916014648.1310346-4-radhakrishna.sripada@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Initial Meteorlake Support | expand |
On Thu, Sep 15, 2022 at 06:46:48PM -0700, Radhakrishna Sripada wrote: >From: Matt Roper <matthew.d.roper@intel.com> > >The part of the media and blitter engine contexts that we care about for >setting up an initial state are the same on MTL as they were on DG2 >(and PVC), so we need to update the driver conditions to re-use the DG2 >context table. this paragraph doesn't match what you are doing in the patch. Which one is correct? From "v2" below it looks like the original intention was to just reuse and now they are changed. > >For render/compute engines, the part of the context images are nearly >the same, although the layout had a very slight change --- one POSH >register was removed and the placement of some LRI/noops adjusted >slightly to compensate. > >v2: > - Dg2, mtl xcs offsets slightly vary. Use a separate offsets array(Bala) > - Add missing nop in xcs offsets(Bala) >v3: > - Fix the spacing for nop in xcs offset(MattR) >v4: > - Fix rcs register offset(MattR) > >Bspec: 46261, 46260, 45585 >Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> >Signed-off-by: Matt Roper <matthew.d.roper@intel.com> >Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com> >--- > drivers/gpu/drm/i915/gt/intel_lrc.c | 84 ++++++++++++++++++++++++++++- > 1 file changed, 82 insertions(+), 2 deletions(-) > >diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >index 3955292483a6..c7937d8d120a 100644 >--- a/drivers/gpu/drm/i915/gt/intel_lrc.c >+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >@@ -264,6 +264,39 @@ static const u8 dg2_xcs_offsets[] = { > END > }; > >+static const u8 mtl_xcs_offsets[] = { >+ NOP(1), >+ LRI(13, POSTED), >+ REG16(0x244), >+ REG(0x034), >+ REG(0x030), >+ REG(0x038), >+ REG(0x03c), >+ REG(0x168), >+ REG(0x140), >+ REG(0x110), >+ REG(0x1c0), >+ REG(0x1c4), >+ REG(0x1c8), >+ REG(0x180), >+ REG16(0x2b4), are we missing 0x120 here? >+ NOP(4), NOP(2), but it seems this is here to overcome the missing 0x120? >+ >+ NOP(1), >+ LRI(9, POSTED), >+ REG16(0x3a8), >+ REG16(0x28c), >+ REG16(0x288), >+ REG16(0x284), >+ REG16(0x280), >+ REG16(0x27c), >+ REG16(0x278), >+ REG16(0x274), >+ REG16(0x270), >+ >+ END >+}; >+ > static const u8 gen8_rcs_offsets[] = { > NOP(1), > LRI(14, POSTED), >@@ -606,6 +639,49 @@ static const u8 dg2_rcs_offsets[] = { > END > }; > >+static const u8 mtl_rcs_offsets[] = { >+ NOP(1), >+ LRI(15, POSTED), >+ REG16(0x244), >+ REG(0x034), >+ REG(0x030), >+ REG(0x038), >+ REG(0x03c), >+ REG(0x168), >+ REG(0x140), >+ REG(0x110), >+ REG(0x1c0), >+ REG(0x1c4), >+ REG(0x1c8), >+ REG(0x180), >+ REG16(0x2b4), >+ REG(0x120), >+ REG(0x124), >+ >+ NOP(1), >+ LRI(9, POSTED), humn... set_offsets() is forcing MI_LRI_LRM_CS_MMIO for ver >= 11, although here bspec shows this MI_LOAD_REGISTER_IMM doesn't have it set. +Mika, +Chris Lucas De Marchi >+ REG16(0x3a8), >+ REG16(0x28c), >+ REG16(0x288), >+ REG16(0x284), >+ REG16(0x280), >+ REG16(0x27c), >+ REG16(0x278), >+ REG16(0x274), >+ REG16(0x270), >+ >+ NOP(2), >+ LRI(2, POSTED), >+ REG16(0x5a8), >+ REG16(0x5ac), >+ >+ NOP(6), >+ LRI(1, 0), >+ REG(0x0c8), >+ >+ END >+}; >+ > #undef END > #undef REG16 > #undef REG >@@ -624,7 +700,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) > !intel_engine_has_relative_mmio(engine)); > > if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { >- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) >+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) >+ return mtl_rcs_offsets; >+ else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) > return dg2_rcs_offsets; > else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) > return xehp_rcs_offsets; >@@ -637,7 +715,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) > else > return gen8_rcs_offsets; > } else { >- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) >+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) >+ return mtl_xcs_offsets; >+ else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) > return dg2_xcs_offsets; > else if (GRAPHICS_VER(engine->i915) >= 12) > return gen12_xcs_offsets; >-- >2.34.1 >
On Fri, Sep 23, 2022 at 03:48:51PM -0700, Lucas De Marchi wrote: > On Thu, Sep 15, 2022 at 06:46:48PM -0700, Radhakrishna Sripada wrote: > > From: Matt Roper <matthew.d.roper@intel.com> > > > > The part of the media and blitter engine contexts that we care about for > > setting up an initial state are the same on MTL as they were on DG2 > > (and PVC), so we need to update the driver conditions to re-use the DG2 > > context table. > > this paragraph doesn't match what you are doing in the patch. Which one > is correct? From "v2" below it looks like the original intention was to > just reuse and now they are changed. > > > > > For render/compute engines, the part of the context images are nearly > > the same, although the layout had a very slight change --- one POSH > > register was removed and the placement of some LRI/noops adjusted > > slightly to compensate. > > > > v2: > > - Dg2, mtl xcs offsets slightly vary. Use a separate offsets array(Bala) > > - Add missing nop in xcs offsets(Bala) > > v3: > > - Fix the spacing for nop in xcs offset(MattR) > > v4: > > - Fix rcs register offset(MattR) > > > > Bspec: 46261, 46260, 45585 > > Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> > > Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com> > > --- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 84 ++++++++++++++++++++++++++++- > > 1 file changed, 82 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 3955292483a6..c7937d8d120a 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -264,6 +264,39 @@ static const u8 dg2_xcs_offsets[] = { > > END > > }; > > > > +static const u8 mtl_xcs_offsets[] = { > > + NOP(1), > > + LRI(13, POSTED), > > + REG16(0x244), > > + REG(0x034), > > + REG(0x030), > > + REG(0x038), > > + REG(0x03c), > > + REG(0x168), > > + REG(0x140), > > + REG(0x110), > > + REG(0x1c0), > > + REG(0x1c4), > > + REG(0x1c8), > > + REG(0x180), > > + REG16(0x2b4), > > are we missing 0x120 here? > > > + NOP(4), > > NOP(2), but it seems this is here to > overcome the missing 0x120? If 0x120 were included, it's a 64-bit register so it would fill both of the NOP slots (i.e., it would be listed as 0x120 and 0x124 in the code here). The bspec tagging/filtering doesn't seem to be working quite right here. The project column of bspec 45585 indicates it should only be included here on PVC (and all other platforms should have a 4-dword NOP instead). But the filter option doesn't actually drop out the row of the table as you'd expect. Also inspecting the tags directly leaves some ambiguity about whether the final platform list is correct or not. I suspect it doesn't actually really matter whether we include that register here or not; the overall "shape" of the context image is correct either way. The hardware should be able to do its initial context switch with load suppressed regardless. After that the next time a context switch happens the full context will get written out to memory in the correct form. And 0x120 isn't a register we're actively doing anything with in the driver that would require special care. If need be, we could always look at a post-context switch context in memory on actual hardware and see whether it includes 0x120 in this spot or not. > > > + > > + NOP(1), > > + LRI(9, POSTED), > > + REG16(0x3a8), > > + REG16(0x28c), > > + REG16(0x288), > > + REG16(0x284), > > + REG16(0x280), > > + REG16(0x27c), > > + REG16(0x278), > > + REG16(0x274), > > + REG16(0x270), > > + > > + END > > +}; > > + > > static const u8 gen8_rcs_offsets[] = { > > NOP(1), > > LRI(14, POSTED), > > @@ -606,6 +639,49 @@ static const u8 dg2_rcs_offsets[] = { > > END > > }; > > > > +static const u8 mtl_rcs_offsets[] = { > > + NOP(1), > > + LRI(15, POSTED), > > + REG16(0x244), > > + REG(0x034), > > + REG(0x030), > > + REG(0x038), > > + REG(0x03c), > > + REG(0x168), > > + REG(0x140), > > + REG(0x110), > > + REG(0x1c0), > > + REG(0x1c4), > > + REG(0x1c8), > > + REG(0x180), > > + REG16(0x2b4), > > + REG(0x120), > > + REG(0x124), > > + > > + NOP(1), > > + LRI(9, POSTED), > > humn... set_offsets() is forcing MI_LRI_LRM_CS_MMIO > for ver >= 11, although here bspec shows this MI_LOAD_REGISTER_IMM > doesn't have it set. Since all of the offsets here are engine-relative offsets, that sounds like a spec bug. Otherwise you'd be loading into absolute register offsets 0x3a8, 0x28c, etc. rather than the proper registers. It's probably a copy/paste from the render engine's page where they document everything with the absolute 0x2XXX offsets. Matt > > +Mika, +Chris > > > Lucas De Marchi > > > + REG16(0x3a8), > > + REG16(0x28c), > > + REG16(0x288), > > + REG16(0x284), > > + REG16(0x280), > > + REG16(0x27c), > > + REG16(0x278), > > + REG16(0x274), > > + REG16(0x270), > > + > > + NOP(2), > > + LRI(2, POSTED), > > + REG16(0x5a8), > > + REG16(0x5ac), > > + > > + NOP(6), > > + LRI(1, 0), > > + REG(0x0c8), > > + > > + END > > +}; > > + > > #undef END > > #undef REG16 > > #undef REG > > @@ -624,7 +700,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) > > !intel_engine_has_relative_mmio(engine)); > > > > if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { > > - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) > > + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) > > + return mtl_rcs_offsets; > > + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) > > return dg2_rcs_offsets; > > else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) > > return xehp_rcs_offsets; > > @@ -637,7 +715,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) > > else > > return gen8_rcs_offsets; > > } else { > > - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) > > + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) > > + return mtl_xcs_offsets; > > + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) > > return dg2_xcs_offsets; > > else if (GRAPHICS_VER(engine->i915) >= 12) > > return gen12_xcs_offsets; > > -- > > 2.34.1 > >
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3955292483a6..c7937d8d120a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -264,6 +264,39 @@ static const u8 dg2_xcs_offsets[] = { END }; +static const u8 mtl_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + NOP(4), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -606,6 +639,49 @@ static const u8 dg2_rcs_offsets[] = { END }; +static const u8 mtl_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -624,7 +700,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) + return mtl_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; @@ -637,7 +715,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) + return mtl_xcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets;