drm/i915/tgl: Extend MI_SEMAPHORE_WAIT
diff mbox series

Message ID 20190917123055.28965-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • drm/i915/tgl: Extend MI_SEMAPHORE_WAIT
Related show

Commit Message

Chris Wilson Sept. 17, 2019, 12:30 p.m. UTC
On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
v3: Use int instead of bool in the addition so that readers are not left
wondering about the intricacies of the C spec. Now they just have to
worry what the integer value of a boolean operation is...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 71 ++++++++++++++++++--
 drivers/gpu/drm/i915/i915_pci.c              |  1 -
 drivers/gpu/drm/i915/i915_request.c          | 21 ++++--
 4 files changed, 84 insertions(+), 12 deletions(-)

Comments

Mika Kuoppala Sept. 17, 2019, 1:17 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
> update the length field and emit that extra parameter and any padding
> noop as required.
>
> v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
> v3: Use int instead of bool in the addition so that readers are not left
> wondering about the intricacies of the C spec. Now they just have to
> worry what the integer value of a boolean operation is...
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c          | 71 ++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_pci.c              |  1 -
>  drivers/gpu/drm/i915/i915_request.c          | 21 ++++--
>  4 files changed, 84 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index fbad403ab7ac..f78b13d74e17 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -112,6 +112,7 @@
>  #define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
>  #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
>  #define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
> +#define MI_SEMAPHORE_WAIT_TOKEN	MI_INSTR(0x1c, 3) /* GEN12+ */
>  #define   MI_SEMAPHORE_POLL		(1 << 15)
>  #define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
>  #define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
> @@ -119,6 +120,8 @@
>  #define   MI_SEMAPHORE_SAD_LTE_SDD	(3 << 12)
>  #define   MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
>  #define   MI_SEMAPHORE_SAD_NEQ_SDD	(5 << 12)
> +#define   MI_SEMAPHORE_TOKEN_MASK	REG_GENMASK(9, 5)
> +#define   MI_SEMAPHORE_TOKEN_SHIFT	5
>  #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
>  #define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
>  #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a3f0e4999744..a99166a2d2eb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  	return gen8_emit_fini_breadcrumb_footer(request, cs);
>  }
>  
> +static u32 *
> +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +{
> +	cs = gen8_emit_ggtt_write_rcs(cs,
> +				      request->fence.seqno,
> +				      request->timeline->hwsp_offset,
> +				      PIPE_CONTROL_CS_STALL |
> +				      PIPE_CONTROL_TILE_CACHE_FLUSH |
> +				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +				      PIPE_CONTROL_DC_FLUSH_ENABLE |
> +				      PIPE_CONTROL_FLUSH_ENABLE);
> +
> +	return gen8_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
>  /*
>   * Note that the CS instruction pre-parser will not stall on the breadcrumb
>   * flush and will continue pre-fetching the instructions after it before the
> @@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   * All the above applies only to the instructions themselves. Non-inline data
>   * used by the instructions is not pre-fetched.
>   */
> -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> -					   u32 *cs)
> +
> +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
> +{
> +	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
> +		MI_SEMAPHORE_GLOBAL_GTT |
> +		MI_SEMAPHORE_POLL |
> +		MI_SEMAPHORE_SAD_EQ_SDD;
> +	*cs++ = 0;
> +	*cs++ = intel_hws_preempt_address(request->engine);
> +	*cs++ = 0;
> +	*cs++ = 0;
> +	*cs++ = MI_NOOP;
> +
> +	return cs;
> +}
> +
> +static __always_inline u32*
> +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
> +{
> +	*cs++ = MI_USER_INTERRUPT;
> +
> +	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> +	if (intel_engine_has_semaphores(request->engine))
> +		cs = gen12_emit_preempt_busywait(request, cs);
> +
> +	request->tail = intel_ring_offset(request, cs);
> +	assert_ring_tail_valid(request->ring, request->tail);
> +
> +	return gen8_emit_wa_tail(request, cs);
> +}
> +
> +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> +{
> +	cs = gen8_emit_ggtt_write(cs,
> +				  request->fence.seqno,
> +				  request->timeline->hwsp_offset,
> +				  0);
> +
> +	return gen12_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
> +static u32 *
> +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  {
>  	cs = gen8_emit_ggtt_write_rcs(cs,
>  				      request->fence.seqno,
> @@ -2910,7 +2967,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
>  				      PIPE_CONTROL_DC_FLUSH_ENABLE |
>  				      PIPE_CONTROL_FLUSH_ENABLE);
>  
> -	return gen8_emit_fini_breadcrumb_footer(request, cs);
> +	return gen12_emit_fini_breadcrumb_footer(request, cs);
>  }
>  
>  static void execlists_park(struct intel_engine_cs *engine)
> @@ -2939,9 +2996,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>  			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>  	}
>  
> -	if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
> -		engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
> -
>  	if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
>  		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
>  }
> @@ -2971,6 +3025,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>  	engine->emit_flush = gen8_emit_flush;
>  	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
>  	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
> +	if (INTEL_GEN(engine->i915) >= 12)
> +		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
>  
>  	engine->set_default_submission = intel_execlists_set_default_submission;
>  
> @@ -3016,6 +3072,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
>  {
>  	switch (INTEL_GEN(engine->i915)) {
>  	case 12:
> +		engine->emit_flush = gen11_emit_flush_render;
> +		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
> +		break;
>  	case 11:
>  		engine->emit_flush = gen11_emit_flush_render;
>  		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index e4a26bbd8788..fe6941c8fc99 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -797,7 +797,6 @@ static const struct intel_device_info intel_tigerlake_12_info = {
>  	.display.has_modular_fia = 1,
>  	.engine_mask =
>  		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
> -	.has_logical_ring_preemption = false, /* XXX disabled for debugging */
>  	.engine_mask = BIT(RCS0), /* XXX reduced for debugging */
>  };
>  
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 754a78364a63..3ecf92aa5fc1 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to,
>  		    struct i915_request *from,
>  		    gfp_t gfp)
>  {
> +	const int has_token = INTEL_GEN(to->i915) >= 12;
>  	u32 hwsp_offset;
> +	int len;
>  	u32 *cs;
>  	int err;
>  
> @@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to,
>  	if (err)
>  		return err;
>  
> -	cs = intel_ring_begin(to, 4);
> +	len = 4;
> +	if (has_token)
> +		len += 2;
> +
> +	cs = intel_ring_begin(to, len);
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  
> @@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to,
>  	 * (post-wrap) values than they were expecting (and so wait
>  	 * forever).
>  	 */
> -	*cs++ = MI_SEMAPHORE_WAIT |
> -		MI_SEMAPHORE_GLOBAL_GTT |
> -		MI_SEMAPHORE_POLL |
> -		MI_SEMAPHORE_SAD_GTE_SDD;
> +	*cs++ = (MI_SEMAPHORE_WAIT |
> +		 MI_SEMAPHORE_GLOBAL_GTT |
> +		 MI_SEMAPHORE_POLL |
> +		 MI_SEMAPHORE_SAD_GTE_SDD) +
> +		has_token;
>  	*cs++ = from->fence.seqno;
>  	*cs++ = hwsp_offset;
>  	*cs++ = 0;
> +	if (has_token) {
> +		*cs++ = 0;
> +		*cs++ = MI_NOOP;
> +	}
>  
>  	intel_ring_advance(to, cs);
>  	to->sched.semaphores |= from->engine->mask;
> -- 
> 2.23.0

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index fbad403ab7ac..f78b13d74e17 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@ 
 #define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 #define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKEN	MI_INSTR(0x1c, 3) /* GEN12+ */
 #define   MI_SEMAPHORE_POLL		(1 << 15)
 #define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
@@ -119,6 +120,8 @@ 
 #define   MI_SEMAPHORE_SAD_LTE_SDD	(3 << 12)
 #define   MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
 #define   MI_SEMAPHORE_SAD_NEQ_SDD	(5 << 12)
+#define   MI_SEMAPHORE_TOKEN_MASK	REG_GENMASK(9, 5)
+#define   MI_SEMAPHORE_TOKEN_SHIFT	5
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a3f0e4999744..a99166a2d2eb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2879,6 +2879,22 @@  static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->fence.seqno,
+				      request->timeline->hwsp_offset,
+				      PIPE_CONTROL_CS_STALL |
+				      PIPE_CONTROL_TILE_CACHE_FLUSH |
+				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				      PIPE_CONTROL_DC_FLUSH_ENABLE |
+				      PIPE_CONTROL_FLUSH_ENABLE);
+
+	return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
 /*
  * Note that the CS instruction pre-parser will not stall on the breadcrumb
  * flush and will continue pre-fetching the instructions after it before the
@@ -2897,8 +2913,49 @@  static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
  * All the above applies only to the instructions themselves. Non-inline data
  * used by the instructions is not pre-fetched.
  */
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
-					   u32 *cs)
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = intel_hws_preempt_address(request->engine);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_USER_INTERRUPT;
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	if (intel_engine_has_semaphores(request->engine))
+		cs = gen12_emit_preempt_busywait(request, cs);
+
+	request->tail = intel_ring_offset(request, cs);
+	assert_ring_tail_valid(request->ring, request->tail);
+
+	return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+	cs = gen8_emit_ggtt_write(cs,
+				  request->fence.seqno,
+				  request->timeline->hwsp_offset,
+				  0);
+
+	return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
@@ -2910,7 +2967,7 @@  static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
 				      PIPE_CONTROL_DC_FLUSH_ENABLE |
 				      PIPE_CONTROL_FLUSH_ENABLE);
 
-	return gen8_emit_fini_breadcrumb_footer(request, cs);
+	return gen12_emit_fini_breadcrumb_footer(request, cs);
 }
 
 static void execlists_park(struct intel_engine_cs *engine)
@@ -2939,9 +2996,6 @@  void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 	}
 
-	if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
-		engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
-
 	if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
 		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
 }
@@ -2971,6 +3025,8 @@  logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+	if (INTEL_GEN(engine->i915) >= 12)
+		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
 
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -3016,6 +3072,9 @@  static void rcs_submission_override(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
 	case 12:
+		engine->emit_flush = gen11_emit_flush_render;
+		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
+		break;
 	case 11:
 		engine->emit_flush = gen11_emit_flush_render;
 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index e4a26bbd8788..fe6941c8fc99 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -797,7 +797,6 @@  static const struct intel_device_info intel_tigerlake_12_info = {
 	.display.has_modular_fia = 1,
 	.engine_mask =
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
-	.has_logical_ring_preemption = false, /* XXX disabled for debugging */
 	.engine_mask = BIT(RCS0), /* XXX reduced for debugging */
 };
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 754a78364a63..3ecf92aa5fc1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -783,7 +783,9 @@  emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
 		    gfp_t gfp)
 {
+	const int has_token = INTEL_GEN(to->i915) >= 12;
 	u32 hwsp_offset;
+	int len;
 	u32 *cs;
 	int err;
 
@@ -810,7 +812,11 @@  emit_semaphore_wait(struct i915_request *to,
 	if (err)
 		return err;
 
-	cs = intel_ring_begin(to, 4);
+	len = 4;
+	if (has_token)
+		len += 2;
+
+	cs = intel_ring_begin(to, len);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -822,13 +828,18 @@  emit_semaphore_wait(struct i915_request *to,
 	 * (post-wrap) values than they were expecting (and so wait
 	 * forever).
 	 */
-	*cs++ = MI_SEMAPHORE_WAIT |
-		MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_POLL |
-		MI_SEMAPHORE_SAD_GTE_SDD;
+	*cs++ = (MI_SEMAPHORE_WAIT |
+		 MI_SEMAPHORE_GLOBAL_GTT |
+		 MI_SEMAPHORE_POLL |
+		 MI_SEMAPHORE_SAD_GTE_SDD) +
+		has_token;
 	*cs++ = from->fence.seqno;
 	*cs++ = hwsp_offset;
 	*cs++ = 0;
+	if (has_token) {
+		*cs++ = 0;
+		*cs++ = MI_NOOP;
+	}
 
 	intel_ring_advance(to, cs);
 	to->sched.semaphores |= from->engine->mask;