diff mbox series

[v5,7/9] drm/i915/gt: Ensure memory quiesced before invalidation for all engines

Message ID 20230718133836.574781-8-andi.shyti@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Update AUX invalidation sequence | expand

Commit Message

Andi Shyti July 18, 2023, 1:38 p.m. UTC
Commit af9e423a8aae ("drm/i915/gt: Ensure memory quiesced before
invalidation") has made sure that the memory is quiesced before
invalidating the AUX CCS table. Do it for all the other engines
and not just RCS.

Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c     | 71 +++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
 2 files changed, 62 insertions(+), 10 deletions(-)

Comments

Nirmoy Das July 18, 2023, 2:12 p.m. UTC | #1
Hi Andi,

On 7/18/2023 3:38 PM, Andi Shyti wrote:
> Commit af9e423a8aae ("drm/i915/gt: Ensure memory quiesced before
> invalidation") has made sure that the memory is quiesced before
> invalidating the AUX CCS table. Do it for all the other engines
> and not just RCS.
>
> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/gen8_engine_cs.c     | 71 +++++++++++++++++---
>   drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
>   2 files changed, 62 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index 3275e55b18d90..2f40cd515cc78 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -225,6 +225,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>   
>   		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
>   
> +		/*
> +		 * When required, in MTL+ platforms we need to
> +		 * set the CCS_FLUSH bit in the pipe control
> +		 */
> +		if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +			bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> +
>   		bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
>   		bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
>   		bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> @@ -309,20 +316,64 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
>   int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
>   {
>   	intel_engine_mask_t aux_inv = 0;
> -	u32 cmd, *cs;
> +	u32 cmd = 4;
> +	u32 *cs;
>   
> -	cmd = 4;
> -	if (mode & EMIT_INVALIDATE) {
> +	if (mode & EMIT_INVALIDATE)
>   		cmd += 2;
>   
> -		if (HAS_AUX_CCS(rq->engine->i915) &&
> -		    (rq->engine->class == VIDEO_DECODE_CLASS ||
> -		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
> -			aux_inv = rq->engine->mask &
> -				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
> -			if (aux_inv)
> -				cmd += 4;
> +	if (HAS_AUX_CCS(rq->engine->i915))
> +		aux_inv = rq->engine->mask &
> +			  ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
> +
> +	/*
> +	 * Aux invalidations on Aux CCS platforms require
> +	 * memory traffic is quiesced prior.
> +	 */
> +	if (aux_inv) {
> +		u32 bit_group_0 = 0;
> +		u32 bit_group_1 = 0;
> +
> +		cmd += 4;
> +
> +		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
> +
> +		switch (rq->engine->class) {
> +		case VIDEO_DECODE_CLASS:
> +			bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +			bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +			bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> +			bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
> +			bit_group_1 |= PIPE_CONTROL_CS_STALL;
> +
> +			/*
> +			 * When required, in MTL+ platforms we need to
> +			 * set the CCS_FLUSH bit in the pipe control
> +			 */
> +			if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +				bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> +
> +			break;
> +
> +		case VIDEO_ENHANCEMENT_CLASS:
> +		case COMPUTE_CLASS:
> +			bit_group_1 |= MI_FLUSH_DW;
> +
> +			break;
> +
> +		case COPY_ENGINE_CLASS:
> +			/*
> +			 * When required, in MTL+ platforms we need to
> +			 * set the CCS_FLUSH bit in the pipe control
> +			 */
> +			if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
> +				bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
> +			break;
>   		}
> +
> +		if (bit_group_1 || bit_group_0)
> +			intel_emit_pipe_control_cs(rq, bit_group_0, bit_group_1,
> +						   LRC_PPHWSP_SCRATCH_ADDR);


The pipe control is limited to render and compute engines only and

gen12_emit_flush_xcs() gets called only for other engines(BCS,VE,VD) AFAIU. So I imagine changes for this patch as:

gen12_emit_flush_rcs()
	pipe_control with CCS_FLUSH
	AUX CCS inval
gen12_emit_flush_xcs()
	MI_FLUSH_DW (with CCS flush for BCS)
	AUX CCS inval

(Note that ccs flush bit for MI_FLUSH_DW is at 16 )

Regards,
Nirmoy

>   	}
>   
>   	cs = intel_ring_begin(rq, cmd);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 5d143e2a8db03..5df7cce23197c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -299,6 +299,7 @@
>   #define   PIPE_CONTROL_QW_WRITE				(1<<14)
>   #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
>   #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
> +#define   PIPE_CONTROL_CCS_FLUSH			(1<<13) /* MTL+ */
>   #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
>   #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
>   #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 3275e55b18d90..2f40cd515cc78 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -225,6 +225,13 @@  int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
 
+		/*
+		 * When required, in MTL+ platforms we need to
+		 * set the CCS_FLUSH bit in the pipe control
+		 */
+		if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+			bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
 		bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
 		bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
 		bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -309,20 +316,64 @@  int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 {
 	intel_engine_mask_t aux_inv = 0;
-	u32 cmd, *cs;
+	u32 cmd = 4;
+	u32 *cs;
 
-	cmd = 4;
-	if (mode & EMIT_INVALIDATE) {
+	if (mode & EMIT_INVALIDATE)
 		cmd += 2;
 
-		if (HAS_AUX_CCS(rq->engine->i915) &&
-		    (rq->engine->class == VIDEO_DECODE_CLASS ||
-		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-			aux_inv = rq->engine->mask &
-				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
-			if (aux_inv)
-				cmd += 4;
+	if (HAS_AUX_CCS(rq->engine->i915))
+		aux_inv = rq->engine->mask &
+			  ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
+
+	/*
+	 * Aux invalidations on Aux CCS platforms require
+	 * memory traffic is quiesced prior.
+	 */
+	if (aux_inv) {
+		u32 bit_group_0 = 0;
+		u32 bit_group_1 = 0;
+
+		cmd += 4;
+
+		bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+		switch (rq->engine->class) {
+		case VIDEO_DECODE_CLASS:
+			bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+			bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+			bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+			bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+			bit_group_1 |= PIPE_CONTROL_CS_STALL;
+
+			/*
+			 * When required, in MTL+ platforms we need to
+			 * set the CCS_FLUSH bit in the pipe control
+			 */
+			if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+				bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+			break;
+
+		case VIDEO_ENHANCEMENT_CLASS:
+		case COMPUTE_CLASS:
+			bit_group_1 |= MI_FLUSH_DW;
+
+			break;
+
+		case COPY_ENGINE_CLASS:
+			/*
+			 * When required, in MTL+ platforms we need to
+			 * set the CCS_FLUSH bit in the pipe control
+			 */
+			if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+				bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+			break;
 		}
+
+		if (bit_group_1 || bit_group_0)
+			intel_emit_pipe_control_cs(rq, bit_group_0, bit_group_1,
+						   LRC_PPHWSP_SCRATCH_ADDR);
 	}
 
 	cs = intel_ring_begin(rq, cmd);
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 5d143e2a8db03..5df7cce23197c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -299,6 +299,7 @@ 
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_CCS_FLUSH			(1<<13) /* MTL+ */
 #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
 #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
 #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */