diff mbox series

[v4,6/6] drm/i915/gt: Support aux invalidation on all engines

Message ID 20230717173059.422892-7-andi.shyti@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Update AUX invalidation sequence | expand

Commit Message

Andi Shyti July 17, 2023, 5:30 p.m. UTC
Perform some refactoring with the purpose of keeping in one
single place all the operations around the aux table
invalidation.

With this refactoring add more engines where the invalidation
should be performed.

Fixes: 972282c4cf24 ("drm/i915/gen12: Add aux table invalidate for all engines")
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.8+
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 63 +++++++++++++++---------
 drivers/gpu/drm/i915/gt/gen8_engine_cs.h |  3 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c      | 17 +------
 3 files changed, 44 insertions(+), 39 deletions(-)

Comments

Andrzej Hajda July 17, 2023, 7:11 p.m. UTC | #1
On 17.07.2023 19:30, Andi Shyti wrote:
> Perform some refactoring with the purpose of keeping in one
> single place all the operations around the aux table
> invalidation.
> 
> With this refactoring add more engines where the invalidation
> should be performed.
> 
> Fixes: 972282c4cf24 ("drm/i915/gen12: Add aux table invalidate for all engines")
> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v5.8+
> ---
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 63 +++++++++++++++---------
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.h |  3 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c      | 17 +------
>   3 files changed, 44 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index fbc70f3b7f2fd..6d21a1ac06e73 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -165,7 +165,8 @@ static u32 preparser_disable(bool state)
>   	return MI_ARB_CHECK | 1 << 8 | state;
>   }
>   
> -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
> +static u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs,
> +				     const i915_reg_t inv_reg)
>   {
>   	u32 gsi_offset = gt->uncore->gsi_offset;
>   
> @@ -187,6 +188,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
>   	return cs;
>   }
>   
> +static i915_reg_t intel_get_aux_inv_reg(struct intel_engine_cs *engine)
> +{
> +	if (HAS_FLAT_CCS(engine->i915))
> +		return _MMIO(0);

Why not INVALID_MMIO_REG ? Here and below.

> +
> +	switch (engine->id) {
> +	case RCS0:
> +		return GEN12_CCS_AUX_INV;
> +	case VCS0:
> +		return GEN12_VD0_AUX_INV;
> +	case VCS2:
> +		return GEN12_VD2_AUX_INV;
> +	case VECS0:
> +		return GEN12_VE0_AUX_INV;
> +	default:
> +		return _MMIO(0);
> +	}
> +}
> +
> +static bool intel_engine_has_aux_inv(struct intel_engine_cs *engine)
> +{
> +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> +
> +	return !!reg.reg;

  return i915_mmio_reg_valid(intel_get_aux_inv_reg(engine));

> +}
> +
> +u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
> +{
> +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> +	struct intel_gt *gt = engine->gt;
> +
> +	return reg.reg ? gen12_emit_aux_table_inv(gt, cs, reg) : cs;
> +}
> +

I am not sure about prefixes, IMHO gen12_ instead of intel_ is more 
adequate as this is only gen12 feature, works only on gen12, and is 
called from gen12 context, up to you. In any case we can squash 
intel_emit_aux_table_inv and gen12_emit_aux_table_inv into one function, 
am I right?

Regards
Andrzej

>   static int mtl_dummy_pipe_control(struct i915_request *rq)
>   {
>   	/* Wa_14016712196 */
> @@ -311,11 +346,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>   
>   		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
>   
> -		if (!HAS_FLAT_CCS(rq->engine->i915)) {
> -			/* hsdes: 1809175790 */
> -			cs = gen12_emit_aux_table_inv(rq->engine->gt, cs,
> -						      GEN12_CCS_AUX_INV);
> -		}
> +		cs = intel_emit_aux_table_inv(engine, cs);
>   
>   		*cs++ = preparser_disable(false);
>   		intel_ring_advance(rq, cs);
> @@ -326,21 +357,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>   
>   int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>   {
> -	intel_engine_mask_t aux_inv = 0;
>   	u32 cmd, *cs;
>   
>   	cmd = 4;
>   	if (mode & EMIT_INVALIDATE) {
>   		cmd += 2;
>   
> -		if (!HAS_FLAT_CCS(rq->engine->i915) &&
> -		    (rq->engine->class == VIDEO_DECODE_CLASS ||
> -		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
> -			aux_inv = rq->engine->mask &
> -				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
> -			if (aux_inv)
> -				cmd += 10;
> -		}
> +		if (intel_engine_has_aux_inv(rq->engine))
> +			cmd += 10;
>   	}
>   
>   	cs = intel_ring_begin(rq, cmd);
> @@ -371,14 +395,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>   	*cs++ = 0; /* upper addr */
>   	*cs++ = 0; /* value */
>   
> -	if (aux_inv) { /* hsdes: 1809175790 */
> -		if (rq->engine->class == VIDEO_DECODE_CLASS)
> -			cs = gen12_emit_aux_table_inv(rq->engine->gt,
> -						      cs, GEN12_VD0_AUX_INV);
> -		else
> -			cs = gen12_emit_aux_table_inv(rq->engine->gt,
> -						      cs, GEN12_VE0_AUX_INV);
> -	}
> +	cs = intel_emit_aux_table_inv(rq->engine, cs);
>   
>   	if (mode & EMIT_INVALIDATE)
>   		*cs++ = preparser_disable(false);
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> index 655e5c00ddc27..d938c94524510 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> @@ -13,6 +13,7 @@
>   #include "intel_gt_regs.h"
>   #include "intel_gpu_commands.h"
>   
> +struct intel_engine_cs;
>   struct intel_gt;
>   struct i915_request;
>   
> @@ -46,7 +47,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>   u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>   u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>   
> -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
> +u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
>   
>   static inline u32 *
>   __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 235f3fab60a98..70054767c88c3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1371,10 +1371,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>   	    IS_DG2_G11(ce->engine->i915))
>   		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
>   
> -	/* hsdes: 1809175790 */
> -	if (!HAS_FLAT_CCS(ce->engine->i915))
> -		cs = gen12_emit_aux_table_inv(ce->engine->gt,
> -					      cs, GEN12_CCS_AUX_INV);
> +	cs = intel_emit_aux_table_inv(ce->engine, cs);
>   
>   	/* Wa_16014892111 */
>   	if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
> @@ -1399,17 +1396,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
>   						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
>   						    0);
>   
> -	/* hsdes: 1809175790 */
> -	if (!HAS_FLAT_CCS(ce->engine->i915)) {
> -		if (ce->engine->class == VIDEO_DECODE_CLASS)
> -			cs = gen12_emit_aux_table_inv(ce->engine->gt,
> -						      cs, GEN12_VD0_AUX_INV);
> -		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
> -			cs = gen12_emit_aux_table_inv(ce->engine->gt,
> -						      cs, GEN12_VE0_AUX_INV);
> -	}
> -
> -	return cs;
> +	return intel_emit_aux_table_inv(ce->engine, cs);
>   }
>   
>   static void
Matt Roper July 17, 2023, 8:27 p.m. UTC | #2
On Mon, Jul 17, 2023 at 07:30:59PM +0200, Andi Shyti wrote:
> Perform some refactoring with the purpose of keeping in one
> single place all the operations around the aux table
> invalidation.
> 
> With this refactoring add more engines where the invalidation
> should be performed.
> 
> Fixes: 972282c4cf24 ("drm/i915/gen12: Add aux table invalidate for all engines")
> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v5.8+
> ---
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 63 +++++++++++++++---------
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.h |  3 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c      | 17 +------
>  3 files changed, 44 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index fbc70f3b7f2fd..6d21a1ac06e73 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -165,7 +165,8 @@ static u32 preparser_disable(bool state)
>  	return MI_ARB_CHECK | 1 << 8 | state;
>  }
>  
> -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
> +static u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs,
> +				     const i915_reg_t inv_reg)
>  {
>  	u32 gsi_offset = gt->uncore->gsi_offset;
>  
> @@ -187,6 +188,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
>  	return cs;
>  }
>  
> +static i915_reg_t intel_get_aux_inv_reg(struct intel_engine_cs *engine)

Generally we try to avoid putting "intel_" and "i915_" prefixes on
static functions.

> +{
> +	if (HAS_FLAT_CCS(engine->i915))
> +		return _MMIO(0);
> +
> +	switch (engine->id) {
> +	case RCS0:
> +		return GEN12_CCS_AUX_INV;
> +	case VCS0:
> +		return GEN12_VD0_AUX_INV;
> +	case VCS2:
> +		return GEN12_VD2_AUX_INV;
> +	case VECS0:
> +		return GEN12_VE0_AUX_INV;

We need CCS0 here (0x42c0).  And on graphics versions 12.70 and beyond
we also need BCS0 too (0x4248) since the blitter gained the ability to
interpret CCS compression.

> +	default:
> +		return _MMIO(0);

It might be cleaner to use INVALID_MMIO_REG here (and then check for
i915_mmio_reg_valid() below).

> +	}
> +}
> +
> +static bool intel_engine_has_aux_inv(struct intel_engine_cs *engine)
> +{
> +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> +
> +	return !!reg.reg;
> +}
> +
> +u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
> +{
> +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> +	struct intel_gt *gt = engine->gt;
> +
> +	return reg.reg ? gen12_emit_aux_table_inv(gt, cs, reg) : cs;
> +}

Rather than adding this new wrapper function, can we just do the
register lookup at the top of gen12_emit_aux_table_inv() (and bail out
of that function early if there isn't a valid register)?

Keeping the non-static function as the one with "gen12" in the name also
helps reduce confusion about whether this is something that older
platforms should have been calling as well.


Matt

> +
>  static int mtl_dummy_pipe_control(struct i915_request *rq)
>  {
>  	/* Wa_14016712196 */
> @@ -311,11 +346,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>  
>  		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
>  
> -		if (!HAS_FLAT_CCS(rq->engine->i915)) {
> -			/* hsdes: 1809175790 */
> -			cs = gen12_emit_aux_table_inv(rq->engine->gt, cs,
> -						      GEN12_CCS_AUX_INV);
> -		}
> +		cs = intel_emit_aux_table_inv(engine, cs);
>  
>  		*cs++ = preparser_disable(false);
>  		intel_ring_advance(rq, cs);
> @@ -326,21 +357,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>  
>  int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>  {
> -	intel_engine_mask_t aux_inv = 0;
>  	u32 cmd, *cs;
>  
>  	cmd = 4;
>  	if (mode & EMIT_INVALIDATE) {
>  		cmd += 2;
>  
> -		if (!HAS_FLAT_CCS(rq->engine->i915) &&
> -		    (rq->engine->class == VIDEO_DECODE_CLASS ||
> -		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
> -			aux_inv = rq->engine->mask &
> -				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
> -			if (aux_inv)
> -				cmd += 10;
> -		}
> +		if (intel_engine_has_aux_inv(rq->engine))
> +			cmd += 10;
>  	}
>  
>  	cs = intel_ring_begin(rq, cmd);
> @@ -371,14 +395,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>  	*cs++ = 0; /* upper addr */
>  	*cs++ = 0; /* value */
>  
> -	if (aux_inv) { /* hsdes: 1809175790 */
> -		if (rq->engine->class == VIDEO_DECODE_CLASS)
> -			cs = gen12_emit_aux_table_inv(rq->engine->gt,
> -						      cs, GEN12_VD0_AUX_INV);
> -		else
> -			cs = gen12_emit_aux_table_inv(rq->engine->gt,
> -						      cs, GEN12_VE0_AUX_INV);
> -	}
> +	cs = intel_emit_aux_table_inv(rq->engine, cs);
>  
>  	if (mode & EMIT_INVALIDATE)
>  		*cs++ = preparser_disable(false);
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> index 655e5c00ddc27..d938c94524510 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> @@ -13,6 +13,7 @@
>  #include "intel_gt_regs.h"
>  #include "intel_gpu_commands.h"
>  
> +struct intel_engine_cs;
>  struct intel_gt;
>  struct i915_request;
>  
> @@ -46,7 +47,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>  u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>  u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
>  
> -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
> +u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
>  
>  static inline u32 *
>  __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 235f3fab60a98..70054767c88c3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1371,10 +1371,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
>  	    IS_DG2_G11(ce->engine->i915))
>  		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
>  
> -	/* hsdes: 1809175790 */
> -	if (!HAS_FLAT_CCS(ce->engine->i915))
> -		cs = gen12_emit_aux_table_inv(ce->engine->gt,
> -					      cs, GEN12_CCS_AUX_INV);
> +	cs = intel_emit_aux_table_inv(ce->engine, cs);
>  
>  	/* Wa_16014892111 */
>  	if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
> @@ -1399,17 +1396,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
>  						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
>  						    0);
>  
> -	/* hsdes: 1809175790 */
> -	if (!HAS_FLAT_CCS(ce->engine->i915)) {
> -		if (ce->engine->class == VIDEO_DECODE_CLASS)
> -			cs = gen12_emit_aux_table_inv(ce->engine->gt,
> -						      cs, GEN12_VD0_AUX_INV);
> -		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
> -			cs = gen12_emit_aux_table_inv(ce->engine->gt,
> -						      cs, GEN12_VE0_AUX_INV);
> -	}
> -
> -	return cs;
> +	return intel_emit_aux_table_inv(ce->engine, cs);
>  }
>  
>  static void
> -- 
> 2.40.1
>
Andi Shyti July 17, 2023, 10 p.m. UTC | #3
Hi Andrzej,

On Mon, Jul 17, 2023 at 09:11:26PM +0200, Andrzej Hajda wrote:
> On 17.07.2023 19:30, Andi Shyti wrote:
> > Perform some refactoring with the purpose of keeping in one
> > single place all the operations around the aux table
> > invalidation.
> > 
> > With this refactoring add more engines where the invalidation
> > should be performed.
> > 
> > Fixes: 972282c4cf24 ("drm/i915/gen12: Add aux table invalidate for all engines")
> > Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> > Cc: <stable@vger.kernel.org> # v5.8+
> > ---
> >   drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 63 +++++++++++++++---------
> >   drivers/gpu/drm/i915/gt/gen8_engine_cs.h |  3 +-
> >   drivers/gpu/drm/i915/gt/intel_lrc.c      | 17 +------
> >   3 files changed, 44 insertions(+), 39 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> > index fbc70f3b7f2fd..6d21a1ac06e73 100644
> > --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> > @@ -165,7 +165,8 @@ static u32 preparser_disable(bool state)
> >   	return MI_ARB_CHECK | 1 << 8 | state;
> >   }
> > -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
> > +static u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs,
> > +				     const i915_reg_t inv_reg)
> >   {
> >   	u32 gsi_offset = gt->uncore->gsi_offset;
> > @@ -187,6 +188,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
> >   	return cs;
> >   }
> > +static i915_reg_t intel_get_aux_inv_reg(struct intel_engine_cs *engine)
> > +{
> > +	if (HAS_FLAT_CCS(engine->i915))
> > +		return _MMIO(0);
> 
> Why not INVALID_MMIO_REG ? Here and below.
> 
> > +
> > +	switch (engine->id) {
> > +	case RCS0:
> > +		return GEN12_CCS_AUX_INV;
> > +	case VCS0:
> > +		return GEN12_VD0_AUX_INV;
> > +	case VCS2:
> > +		return GEN12_VD2_AUX_INV;
> > +	case VECS0:
> > +		return GEN12_VE0_AUX_INV;
> > +	default:
> > +		return _MMIO(0);
> > +	}
> > +}
> > +
> > +static bool intel_engine_has_aux_inv(struct intel_engine_cs *engine)
> > +{
> > +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> > +
> > +	return !!reg.reg;
> 
>  return i915_mmio_reg_valid(intel_get_aux_inv_reg(engine));
> 
> > +}
> > +
> > +u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
> > +{
> > +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> > +	struct intel_gt *gt = engine->gt;
> > +
> > +	return reg.reg ? gen12_emit_aux_table_inv(gt, cs, reg) : cs;
> > +}
> > +
> 
> I am not sure about prefixes, IMHO gen12_ instead of intel_ is more adequate
> as this is only gen12 feature, works only on gen12, and is called from gen12
> context, up to you. In any case we can squash intel_emit_aux_table_inv and
> gen12_emit_aux_table_inv into one function, am I right?

you and Matt have made exactly the same comments... will fix
them, Thank you!

Andi
Andi Shyti July 17, 2023, 10:02 p.m. UTC | #4
Hi Matt,

On Mon, Jul 17, 2023 at 01:27:09PM -0700, Matt Roper wrote:
> On Mon, Jul 17, 2023 at 07:30:59PM +0200, Andi Shyti wrote:
> > Perform some refactoring with the purpose of keeping in one
> > single place all the operations around the aux table
> > invalidation.
> > 
> > With this refactoring add more engines where the invalidation
> > should be performed.
> > 
> > Fixes: 972282c4cf24 ("drm/i915/gen12: Add aux table invalidate for all engines")
> > Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> > Cc: <stable@vger.kernel.org> # v5.8+
> > ---
> >  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 63 +++++++++++++++---------
> >  drivers/gpu/drm/i915/gt/gen8_engine_cs.h |  3 +-
> >  drivers/gpu/drm/i915/gt/intel_lrc.c      | 17 +------
> >  3 files changed, 44 insertions(+), 39 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> > index fbc70f3b7f2fd..6d21a1ac06e73 100644
> > --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> > @@ -165,7 +165,8 @@ static u32 preparser_disable(bool state)
> >  	return MI_ARB_CHECK | 1 << 8 | state;
> >  }
> >  
> > -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
> > +static u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs,
> > +				     const i915_reg_t inv_reg)
> >  {
> >  	u32 gsi_offset = gt->uncore->gsi_offset;
> >  
> > @@ -187,6 +188,40 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
> >  	return cs;
> >  }
> >  
> > +static i915_reg_t intel_get_aux_inv_reg(struct intel_engine_cs *engine)
> 
> Generally we try to avoid putting "intel_" and "i915_" prefixes on
> static functions.
> 
> > +{
> > +	if (HAS_FLAT_CCS(engine->i915))
> > +		return _MMIO(0);
> > +
> > +	switch (engine->id) {
> > +	case RCS0:
> > +		return GEN12_CCS_AUX_INV;
> > +	case VCS0:
> > +		return GEN12_VD0_AUX_INV;
> > +	case VCS2:
> > +		return GEN12_VD2_AUX_INV;
> > +	case VECS0:
> > +		return GEN12_VE0_AUX_INV;
> 
> We need CCS0 here (0x42c0).  And on graphics versions 12.70 and beyond
> we also need BCS0 too (0x4248) since the blitter gained the ability to
> interpret CCS compression.

Thanks, will add.

> > +	default:
> > +		return _MMIO(0);
> 
> It might be cleaner to use INVALID_MMIO_REG here (and then check for
> i915_mmio_reg_valid() below).
> 
> > +	}
> > +}
> > +
> > +static bool intel_engine_has_aux_inv(struct intel_engine_cs *engine)
> > +{
> > +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> > +
> > +	return !!reg.reg;
> > +}
> > +
> > +u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
> > +{
> > +	i915_reg_t reg = intel_get_aux_inv_reg(engine);
> > +	struct intel_gt *gt = engine->gt;
> > +
> > +	return reg.reg ? gen12_emit_aux_table_inv(gt, cs, reg) : cs;
> > +}
> 
> Rather than adding this new wrapper function, can we just do the
> register lookup at the top of gen12_emit_aux_table_inv() (and bail out
> of that function early if there isn't a valid register)?
> 
> Keeping the non-static function as the one with "gen12" in the name also
> helps reduce confusion about whether this is something that older
> platforms should have been calling as well.

You and Andrzej have made the same comments, will fix them.

BTW, this set of functions are doing more or less a similar thing
to what you have done here[*]. I will add the PVC flag.

Thank you!
Andi

[*] https://patchwork.freedesktop.org/patch/539304/?series=118334&rev=1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index fbc70f3b7f2fd..6d21a1ac06e73 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -165,7 +165,8 @@  static u32 preparser_disable(bool state)
 	return MI_ARB_CHECK | 1 << 8 | state;
 }
 
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs,
+				     const i915_reg_t inv_reg)
 {
 	u32 gsi_offset = gt->uncore->gsi_offset;
 
@@ -187,6 +188,40 @@  u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
 	return cs;
 }
 
+static i915_reg_t intel_get_aux_inv_reg(struct intel_engine_cs *engine)
+{
+	if (HAS_FLAT_CCS(engine->i915))
+		return _MMIO(0);
+
+	switch (engine->id) {
+	case RCS0:
+		return GEN12_CCS_AUX_INV;
+	case VCS0:
+		return GEN12_VD0_AUX_INV;
+	case VCS2:
+		return GEN12_VD2_AUX_INV;
+	case VECS0:
+		return GEN12_VE0_AUX_INV;
+	default:
+		return _MMIO(0);
+	}
+}
+
+static bool intel_engine_has_aux_inv(struct intel_engine_cs *engine)
+{
+	i915_reg_t reg = intel_get_aux_inv_reg(engine);
+
+	return !!reg.reg;
+}
+
+u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+	i915_reg_t reg = intel_get_aux_inv_reg(engine);
+	struct intel_gt *gt = engine->gt;
+
+	return reg.reg ? gen12_emit_aux_table_inv(gt, cs, reg) : cs;
+}
+
 static int mtl_dummy_pipe_control(struct i915_request *rq)
 {
 	/* Wa_14016712196 */
@@ -311,11 +346,7 @@  int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 
-		if (!HAS_FLAT_CCS(rq->engine->i915)) {
-			/* hsdes: 1809175790 */
-			cs = gen12_emit_aux_table_inv(rq->engine->gt, cs,
-						      GEN12_CCS_AUX_INV);
-		}
+		cs = intel_emit_aux_table_inv(engine, cs);
 
 		*cs++ = preparser_disable(false);
 		intel_ring_advance(rq, cs);
@@ -326,21 +357,14 @@  int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 {
-	intel_engine_mask_t aux_inv = 0;
 	u32 cmd, *cs;
 
 	cmd = 4;
 	if (mode & EMIT_INVALIDATE) {
 		cmd += 2;
 
-		if (!HAS_FLAT_CCS(rq->engine->i915) &&
-		    (rq->engine->class == VIDEO_DECODE_CLASS ||
-		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-			aux_inv = rq->engine->mask &
-				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
-			if (aux_inv)
-				cmd += 10;
-		}
+		if (intel_engine_has_aux_inv(rq->engine))
+			cmd += 10;
 	}
 
 	cs = intel_ring_begin(rq, cmd);
@@ -371,14 +395,7 @@  int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 	*cs++ = 0; /* upper addr */
 	*cs++ = 0; /* value */
 
-	if (aux_inv) { /* hsdes: 1809175790 */
-		if (rq->engine->class == VIDEO_DECODE_CLASS)
-			cs = gen12_emit_aux_table_inv(rq->engine->gt,
-						      cs, GEN12_VD0_AUX_INV);
-		else
-			cs = gen12_emit_aux_table_inv(rq->engine->gt,
-						      cs, GEN12_VE0_AUX_INV);
-	}
+	cs = intel_emit_aux_table_inv(rq->engine, cs);
 
 	if (mode & EMIT_INVALIDATE)
 		*cs++ = preparser_disable(false);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index 655e5c00ddc27..d938c94524510 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -13,6 +13,7 @@ 
 #include "intel_gt_regs.h"
 #include "intel_gpu_commands.h"
 
+struct intel_engine_cs;
 struct intel_gt;
 struct i915_request;
 
@@ -46,7 +47,7 @@  u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
+u32 *intel_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
 
 static inline u32 *
 __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 235f3fab60a98..70054767c88c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1371,10 +1371,7 @@  gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 	    IS_DG2_G11(ce->engine->i915))
 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
 
-	/* hsdes: 1809175790 */
-	if (!HAS_FLAT_CCS(ce->engine->i915))
-		cs = gen12_emit_aux_table_inv(ce->engine->gt,
-					      cs, GEN12_CCS_AUX_INV);
+	cs = intel_emit_aux_table_inv(ce->engine, cs);
 
 	/* Wa_16014892111 */
 	if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
@@ -1399,17 +1396,7 @@  gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
 						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
 						    0);
 
-	/* hsdes: 1809175790 */
-	if (!HAS_FLAT_CCS(ce->engine->i915)) {
-		if (ce->engine->class == VIDEO_DECODE_CLASS)
-			cs = gen12_emit_aux_table_inv(ce->engine->gt,
-						      cs, GEN12_VD0_AUX_INV);
-		else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
-			cs = gen12_emit_aux_table_inv(ce->engine->gt,
-						      cs, GEN12_VE0_AUX_INV);
-	}
-
-	return cs;
+	return intel_emit_aux_table_inv(ce->engine, cs);
 }
 
 static void