diff mbox series

[2/3] drm/i915: Handle catastrophic error on engine reset

Message ID 20190412153723.31931-2-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/i915: Shortcut readiness to reset check | expand

Commit Message

Mika Kuoppala April 12, 2019, 3:37 p.m. UTC
If cat error is set, we need to clear it by acking it. Further,
if it is set, we must not do a normal request for reset.

Bspec: 12567
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h   |  6 +++--
 drivers/gpu/drm/i915/i915_reset.c | 39 +++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 14 deletions(-)

Comments

Chris Wilson April 12, 2019, 3:49 p.m. UTC | #1
Quoting Mika Kuoppala (2019-04-12 16:37:22)
> If cat error is set, we need to clear it by acking it. Further,
> if it is set, we must not do a normal request for reset.
> 
> Bspec: 12567
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h   |  6 +++--
>  drivers/gpu/drm/i915/i915_reset.c | 39 +++++++++++++++++++++----------
>  2 files changed, 31 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8ad2f0a03f28..c1c0f7ab03e9 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2446,8 +2446,10 @@ enum i915_power_well_id {
>  #define RING_HWS_PGA(base)     _MMIO((base) + 0x80)
>  #define RING_HWS_PGA_GEN6(base)        _MMIO((base) + 0x2080)
>  #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
> -#define   RESET_CTL_REQUEST_RESET  (1 << 0)
> -#define   RESET_CTL_READY_TO_RESET (1 << 1)
> +#define   RESET_CTL_CAT_ERROR     REG_BIT(2)
> +#define   RESET_CTL_READY_TO_RESET REG_BIT(1)
> +#define   RESET_CTL_REQUEST_RESET  REG_BIT(0)
> +
>  #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
>  
>  #define HSW_GTT_CACHE_EN       _MMIO(0x4024)
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index cde1a5309336..06310ee5a68a 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -490,25 +490,40 @@ static int gen11_reset_engines(struct drm_i915_private *i915,
>  static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
>  {
>         struct intel_uncore *uncore = engine->uncore;
> -       u32 ctl;
> +       const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
> +       u32 ctl, ack = 0, mask = 0, request = 0;
>         int ret;
>  
> -       ctl = intel_uncore_read_fw(uncore, RING_RESET_CTL(engine->mmio_base));
> -       if (ctl & RESET_CTL_READY_TO_RESET)
> +       ctl = intel_uncore_read_fw(uncore, reg);
> +
> +       if (INTEL_GEN(engine->i915) > 9 && (ctl & RESET_CTL_CAT_ERROR)) {
> +               request |= RESET_CTL_CAT_ERROR;
> +               mask |= RESET_CTL_CAT_ERROR;
> +
> +               /* HAS#396813: Avoid reset request if cat error */
> +               goto skip_ready_req;
> +       }

Doesn't look like you need a goto here.

if (ctl & CAT_ERROR) { /* CAT_ERROR shouldn't be raised on gen8-9 */
	request = RESET_CTL_CAT_ERROR;
	mask = RESET_CTL_CAT_ERROR;
} else if (!(ctl & RESET_CTL_READY_TO_RESET))) {
	request = RESET_CTL_REQUEST_RESET;
	mask = RESET_CTL_READY_TO_RESET;
	ack = RESET_CTL_READY_TO_RESET;
} else {
	return 0;
}

Right?
	
> +
> +       if (!(ctl & RESET_CTL_READY_TO_RESET)) {
> +               request |= RESET_CTL_REQUEST_RESET;
> +
> +               mask |= RESET_CTL_READY_TO_RESET;
> +               ack |= RESET_CTL_READY_TO_RESET;
> +       }
> +
> +       if (!request)
>                 return 0;
>  
> -       intel_uncore_write_fw(uncore,
> -                             RING_RESET_CTL(engine->mmio_base),
> -                             _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
> +skip_ready_req:
> +       intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
>  
>         ret = __intel_wait_for_register_fw(uncore,
> -                                          RING_RESET_CTL(engine->mmio_base),
> -                                          RESET_CTL_READY_TO_RESET,
> -                                          RESET_CTL_READY_TO_RESET,
> -                                          700, 0,
> -                                          NULL);
> +                                          reg, mask, ack,
> +                                          700, 0, NULL);
>         if (ret)
> -               DRM_ERROR("%s: reset request timeout\n", engine->name);
> +               DRM_ERROR("%s: reset request 0x%08x timeout 0x%08x\n",
> +                         engine->name, request,
> +                         intel_uncore_read_fw(uncore, reg));

Interesting, the only quible I have is with "request". But it works well
enough in context.
-Chris
Mika Kuoppala April 12, 2019, 3:58 p.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-04-12 16:37:22)
>> If cat error is set, we need to clear it by acking it. Further,
>> if it is set, we must not do a normal request for reset.
>> 
>> Bspec: 12567
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h   |  6 +++--
>>  drivers/gpu/drm/i915/i915_reset.c | 39 +++++++++++++++++++++----------
>>  2 files changed, 31 insertions(+), 14 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 8ad2f0a03f28..c1c0f7ab03e9 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -2446,8 +2446,10 @@ enum i915_power_well_id {
>>  #define RING_HWS_PGA(base)     _MMIO((base) + 0x80)
>>  #define RING_HWS_PGA_GEN6(base)        _MMIO((base) + 0x2080)
>>  #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
>> -#define   RESET_CTL_REQUEST_RESET  (1 << 0)
>> -#define   RESET_CTL_READY_TO_RESET (1 << 1)
>> +#define   RESET_CTL_CAT_ERROR     REG_BIT(2)
>> +#define   RESET_CTL_READY_TO_RESET REG_BIT(1)
>> +#define   RESET_CTL_REQUEST_RESET  REG_BIT(0)
>> +
>>  #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
>>  
>>  #define HSW_GTT_CACHE_EN       _MMIO(0x4024)
>> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
>> index cde1a5309336..06310ee5a68a 100644
>> --- a/drivers/gpu/drm/i915/i915_reset.c
>> +++ b/drivers/gpu/drm/i915/i915_reset.c
>> @@ -490,25 +490,40 @@ static int gen11_reset_engines(struct drm_i915_private *i915,
>>  static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
>>  {
>>         struct intel_uncore *uncore = engine->uncore;
>> -       u32 ctl;
>> +       const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
>> +       u32 ctl, ack = 0, mask = 0, request = 0;
>>         int ret;
>>  
>> -       ctl = intel_uncore_read_fw(uncore, RING_RESET_CTL(engine->mmio_base));
>> -       if (ctl & RESET_CTL_READY_TO_RESET)
>> +       ctl = intel_uncore_read_fw(uncore, reg);
>> +
>> +       if (INTEL_GEN(engine->i915) > 9 && (ctl & RESET_CTL_CAT_ERROR)) {
>> +               request |= RESET_CTL_CAT_ERROR;
>> +               mask |= RESET_CTL_CAT_ERROR;
>> +
>> +               /* HAS#396813: Avoid reset request if cat error */
>> +               goto skip_ready_req;
>> +       }
>
> Doesn't look like you need a goto here.
>
> if (ctl & CAT_ERROR) { /* CAT_ERROR shouldn't be raised on gen8-9 */
> 	request = RESET_CTL_CAT_ERROR;
> 	mask = RESET_CTL_CAT_ERROR;
> } else if (!(ctl & RESET_CTL_READY_TO_RESET))) {
> 	request = RESET_CTL_REQUEST_RESET;
> 	mask = RESET_CTL_READY_TO_RESET;
> 	ack = RESET_CTL_READY_TO_RESET;
> } else {
> 	return 0;
> }
>
> Right?

Right no goto needed and ta for writing it out above.

The bit was 'reserved' on previous gen but we can safely assumed
it is zero and stay such.

-Mika

> 	
>> +
>> +       if (!(ctl & RESET_CTL_READY_TO_RESET)) {
>> +               request |= RESET_CTL_REQUEST_RESET;
>> +
>> +               mask |= RESET_CTL_READY_TO_RESET;
>> +               ack |= RESET_CTL_READY_TO_RESET;
>> +       }
>> +
>> +       if (!request)
>>                 return 0;
>>  
>> -       intel_uncore_write_fw(uncore,
>> -                             RING_RESET_CTL(engine->mmio_base),
>> -                             _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
>> +skip_ready_req:
>> +       intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
>>  
>>         ret = __intel_wait_for_register_fw(uncore,
>> -                                          RING_RESET_CTL(engine->mmio_base),
>> -                                          RESET_CTL_READY_TO_RESET,
>> -                                          RESET_CTL_READY_TO_RESET,
>> -                                          700, 0,
>> -                                          NULL);
>> +                                          reg, mask, ack,
>> +                                          700, 0, NULL);
>>         if (ret)
>> -               DRM_ERROR("%s: reset request timeout\n", engine->name);
>> +               DRM_ERROR("%s: reset request 0x%08x timeout 0x%08x\n",
>> +                         engine->name, request,
>> +                         intel_uncore_read_fw(uncore, reg));
>
> Interesting, the only quible I have is with "request". But it works well
> enough in context.
> -Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8ad2f0a03f28..c1c0f7ab03e9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2446,8 +2446,10 @@  enum i915_power_well_id {
 #define RING_HWS_PGA(base)	_MMIO((base) + 0x80)
 #define RING_HWS_PGA_GEN6(base)	_MMIO((base) + 0x2080)
 #define RING_RESET_CTL(base)	_MMIO((base) + 0xd0)
-#define   RESET_CTL_REQUEST_RESET  (1 << 0)
-#define   RESET_CTL_READY_TO_RESET (1 << 1)
+#define   RESET_CTL_CAT_ERROR	   REG_BIT(2)
+#define   RESET_CTL_READY_TO_RESET REG_BIT(1)
+#define   RESET_CTL_REQUEST_RESET  REG_BIT(0)
+
 #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
 
 #define HSW_GTT_CACHE_EN	_MMIO(0x4024)
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index cde1a5309336..06310ee5a68a 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -490,25 +490,40 @@  static int gen11_reset_engines(struct drm_i915_private *i915,
 static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_uncore *uncore = engine->uncore;
-	u32 ctl;
+	const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
+	u32 ctl, ack = 0, mask = 0, request = 0;
 	int ret;
 
-	ctl = intel_uncore_read_fw(uncore, RING_RESET_CTL(engine->mmio_base));
-	if (ctl & RESET_CTL_READY_TO_RESET)
+	ctl = intel_uncore_read_fw(uncore, reg);
+
+	if (INTEL_GEN(engine->i915) > 9 && (ctl & RESET_CTL_CAT_ERROR)) {
+		request |= RESET_CTL_CAT_ERROR;
+		mask |= RESET_CTL_CAT_ERROR;
+
+		/* HAS#396813: Avoid reset request if cat error */
+		goto skip_ready_req;
+	}
+
+	if (!(ctl & RESET_CTL_READY_TO_RESET)) {
+		request |= RESET_CTL_REQUEST_RESET;
+
+		mask |= RESET_CTL_READY_TO_RESET;
+		ack |= RESET_CTL_READY_TO_RESET;
+	}
+
+	if (!request)
 		return 0;
 
-	intel_uncore_write_fw(uncore,
-			      RING_RESET_CTL(engine->mmio_base),
-			      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
+skip_ready_req:
+	intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
 
 	ret = __intel_wait_for_register_fw(uncore,
-					   RING_RESET_CTL(engine->mmio_base),
-					   RESET_CTL_READY_TO_RESET,
-					   RESET_CTL_READY_TO_RESET,
-					   700, 0,
-					   NULL);
+					   reg, mask, ack,
+					   700, 0, NULL);
 	if (ret)
-		DRM_ERROR("%s: reset request timeout\n", engine->name);
+		DRM_ERROR("%s: reset request 0x%08x timeout 0x%08x\n",
+			  engine->name, request,
+			  intel_uncore_read_fw(uncore, reg));
 
 	return ret;
 }