diff mbox

drm/i915: Stop reporting error details in dmesg as well as the error-state

Message ID 20161019125203.28851-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 19, 2016, 12:52 p.m. UTC
As we already capture all the information from the registers into the
error-state, also dumping that to dmesg just generates noise that upsets
CI and users alike (and doesn't provide us with any more information).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 86 ++++++-----------------------------------
 1 file changed, 11 insertions(+), 75 deletions(-)

Comments

Mika Kuoppala Oct. 20, 2016, 10:47 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> As we already capture all the information from the registers into the
> error-state, also dumping that to dmesg just generates noise that upsets
> CI and users alike (and doesn't provide us with any more information).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 86 ++++++-----------------------------------
>  1 file changed, 11 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 4c40a13d26e4..1d4bc67d27d4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2578,90 +2578,26 @@ i915_err_print_instdone(struct drm_i915_private *dev_priv,
>  		       slice, subslice, instdone->row[slice][subslice]);
>  }
>  
> -static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
> +static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
>  {
> -	struct intel_instdone instdone;
> -	u32 eir = I915_READ(EIR);
> -	int pipe;
> -
> -	if (!eir)
> -		return;
> -
> -	pr_err("render error detected, EIR: 0x%08x\n", eir);
> -
> -	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
> -
> -	if (IS_G4X(dev_priv)) {
> -		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
> -			u32 ipeir = I915_READ(IPEIR_I965);
> -
> -			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR_I965));
> -			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR_I965));
> -			i915_err_print_instdone(dev_priv, &instdone);
> -			pr_err("  INSTPS: 0x%08x\n", I915_READ(INSTPS));
> -			pr_err("  ACTHD: 0x%08x\n",
> I915_READ(ACTHD_I965));

But these are what we lose. They are not in the error state.

-Mika


> -			I915_WRITE(IPEIR_I965, ipeir);
> -			POSTING_READ(IPEIR_I965);
> -		}
> -		if (eir & GM45_ERROR_PAGE_TABLE) {
> -			u32 pgtbl_err = I915_READ(PGTBL_ER);
> -			pr_err("page table error\n");
> -			pr_err("  PGTBL_ER: 0x%08x\n", pgtbl_err);
> -			I915_WRITE(PGTBL_ER, pgtbl_err);
> -			POSTING_READ(PGTBL_ER);
> -		}
> -	}
> +	u32 eir;
>  
> -	if (!IS_GEN2(dev_priv)) {
> -		if (eir & I915_ERROR_PAGE_TABLE) {
> -			u32 pgtbl_err = I915_READ(PGTBL_ER);
> -			pr_err("page table error\n");
> -			pr_err("  PGTBL_ER: 0x%08x\n", pgtbl_err);
> -			I915_WRITE(PGTBL_ER, pgtbl_err);
> -			POSTING_READ(PGTBL_ER);
> -		}
> -	}
> +	if (!IS_GEN2(dev_priv))
> +		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
>  
> -	if (eir & I915_ERROR_MEMORY_REFRESH) {
> -		pr_err("memory refresh error:\n");
> -		for_each_pipe(dev_priv, pipe)
> -			pr_err("pipe %c stat: 0x%08x\n",
> -			       pipe_name(pipe), I915_READ(PIPESTAT(pipe)));
> -		/* pipestat has already been acked */
> -	}
> -	if (eir & I915_ERROR_INSTRUCTION) {
> -		pr_err("instruction error\n");
> -		pr_err("  INSTPM: 0x%08x\n", I915_READ(INSTPM));
> -		i915_err_print_instdone(dev_priv, &instdone);
> -		if (INTEL_GEN(dev_priv) < 4) {
> -			u32 ipeir = I915_READ(IPEIR);
> -
> -			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR));
> -			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR));
> -			pr_err("  ACTHD: 0x%08x\n", I915_READ(ACTHD));
> -			I915_WRITE(IPEIR, ipeir);
> -			POSTING_READ(IPEIR);
> -		} else {
> -			u32 ipeir = I915_READ(IPEIR_I965);
> -
> -			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR_I965));
> -			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR_I965));
> -			pr_err("  INSTPS: 0x%08x\n", I915_READ(INSTPS));
> -			pr_err("  ACTHD: 0x%08x\n", I915_READ(ACTHD_I965));
> -			I915_WRITE(IPEIR_I965, ipeir);
> -			POSTING_READ(IPEIR_I965);
> -		}
> -	}
> +	if (INTEL_GEN(dev_priv) < 4)
> +		I915_WRITE(IPEIR, I915_READ(IPEIR));
> +	else
> +		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
>  
> -	I915_WRITE(EIR, eir);
> -	POSTING_READ(EIR);
> +	I915_WRITE(EIR, I915_READ(EIR));
>  	eir = I915_READ(EIR);
>  	if (eir) {
>  		/*
>  		 * some errors might have become stuck,
>  		 * mask them.
>  		 */
> -		DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
> +		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
>  		I915_WRITE(EMR, I915_READ(EMR) | eir);
>  		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
>  	}
> @@ -2690,7 +2626,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	va_end(args);
>  
>  	i915_capture_error_state(dev_priv, engine_mask, error_msg);
> -	i915_report_and_clear_eir(dev_priv);
> +	i915_clear_error_registers(dev_priv);
>  
>  	if (!engine_mask)
>  		return;
> -- 
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 20, 2016, 10:56 a.m. UTC | #2
On Thu, Oct 20, 2016 at 01:47:28PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > As we already capture all the information from the registers into the
> > error-state, also dumping that to dmesg just generates noise that upsets
> > CI and users alike (and doesn't provide us with any more information).
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_irq.c | 86 ++++++-----------------------------------
> >  1 file changed, 11 insertions(+), 75 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 4c40a13d26e4..1d4bc67d27d4 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -2578,90 +2578,26 @@ i915_err_print_instdone(struct drm_i915_private *dev_priv,
> >  		       slice, subslice, instdone->row[slice][subslice]);
> >  }
> >  
> > -static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
> > +static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
> >  {
> > -	struct intel_instdone instdone;
> > -	u32 eir = I915_READ(EIR);
> > -	int pipe;
> > -
> > -	if (!eir)
> > -		return;
> > -
> > -	pr_err("render error detected, EIR: 0x%08x\n", eir);
> > -
> > -	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
> > -
> > -	if (IS_G4X(dev_priv)) {
> > -		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
> > -			u32 ipeir = I915_READ(IPEIR_I965);
> > -
> > -			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR_I965));
> > -			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR_I965));
> > -			i915_err_print_instdone(dev_priv, &instdone);
> > -			pr_err("  INSTPS: 0x%08x\n", I915_READ(INSTPS));
> > -			pr_err("  ACTHD: 0x%08x\n",
> > I915_READ(ACTHD_I965));
> 
> But these are what we lose. They are not in the error state.

EIR, IPEIR, IPEHR, INSTPS, and ACTHD (note these are per-ring) are all in
the error state. What am I missing?
-Chris
Mika Kuoppala Oct. 20, 2016, 11:45 a.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Thu, Oct 20, 2016 at 01:47:28PM +0300, Mika Kuoppala wrote:
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > As we already capture all the information from the registers into the
>> > error-state, also dumping that to dmesg just generates noise that upsets
>> > CI and users alike (and doesn't provide us with any more information).
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/i915_irq.c | 86 ++++++-----------------------------------
>> >  1 file changed, 11 insertions(+), 75 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> > index 4c40a13d26e4..1d4bc67d27d4 100644
>> > --- a/drivers/gpu/drm/i915/i915_irq.c
>> > +++ b/drivers/gpu/drm/i915/i915_irq.c
>> > @@ -2578,90 +2578,26 @@ i915_err_print_instdone(struct drm_i915_private *dev_priv,
>> >  		       slice, subslice, instdone->row[slice][subslice]);
>> >  }
>> >  
>> > -static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
>> > +static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
>> >  {
>> > -	struct intel_instdone instdone;
>> > -	u32 eir = I915_READ(EIR);
>> > -	int pipe;
>> > -
>> > -	if (!eir)
>> > -		return;
>> > -
>> > -	pr_err("render error detected, EIR: 0x%08x\n", eir);
>> > -
>> > -	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
>> > -
>> > -	if (IS_G4X(dev_priv)) {
>> > -		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
>> > -			u32 ipeir = I915_READ(IPEIR_I965);
>> > -
>> > -			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR_I965));
>> > -			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR_I965));
>> > -			i915_err_print_instdone(dev_priv, &instdone);
>> > -			pr_err("  INSTPS: 0x%08x\n", I915_READ(INSTPS));
>> > -			pr_err("  ACTHD: 0x%08x\n",
>> > I915_READ(ACTHD_I965));
>> 
>> But these are what we lose. They are not in the error state.
>
> EIR, IPEIR, IPEHR, INSTPS, and ACTHD (note these are per-ring) are all in
> the error state. What am I missing?

Grep picked up stuff from i810_regs.h and I was fooled. My bad.

Indeed they are already in the error state so the commit message
is correct about the information we already have.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> -Chris
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4c40a13d26e4..1d4bc67d27d4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2578,90 +2578,26 @@  i915_err_print_instdone(struct drm_i915_private *dev_priv,
 		       slice, subslice, instdone->row[slice][subslice]);
 }
 
-static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
+static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
 {
-	struct intel_instdone instdone;
-	u32 eir = I915_READ(EIR);
-	int pipe;
-
-	if (!eir)
-		return;
-
-	pr_err("render error detected, EIR: 0x%08x\n", eir);
-
-	intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
-
-	if (IS_G4X(dev_priv)) {
-		if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
-			u32 ipeir = I915_READ(IPEIR_I965);
-
-			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR_I965));
-			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR_I965));
-			i915_err_print_instdone(dev_priv, &instdone);
-			pr_err("  INSTPS: 0x%08x\n", I915_READ(INSTPS));
-			pr_err("  ACTHD: 0x%08x\n", I915_READ(ACTHD_I965));
-			I915_WRITE(IPEIR_I965, ipeir);
-			POSTING_READ(IPEIR_I965);
-		}
-		if (eir & GM45_ERROR_PAGE_TABLE) {
-			u32 pgtbl_err = I915_READ(PGTBL_ER);
-			pr_err("page table error\n");
-			pr_err("  PGTBL_ER: 0x%08x\n", pgtbl_err);
-			I915_WRITE(PGTBL_ER, pgtbl_err);
-			POSTING_READ(PGTBL_ER);
-		}
-	}
+	u32 eir;
 
-	if (!IS_GEN2(dev_priv)) {
-		if (eir & I915_ERROR_PAGE_TABLE) {
-			u32 pgtbl_err = I915_READ(PGTBL_ER);
-			pr_err("page table error\n");
-			pr_err("  PGTBL_ER: 0x%08x\n", pgtbl_err);
-			I915_WRITE(PGTBL_ER, pgtbl_err);
-			POSTING_READ(PGTBL_ER);
-		}
-	}
+	if (!IS_GEN2(dev_priv))
+		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
 
-	if (eir & I915_ERROR_MEMORY_REFRESH) {
-		pr_err("memory refresh error:\n");
-		for_each_pipe(dev_priv, pipe)
-			pr_err("pipe %c stat: 0x%08x\n",
-			       pipe_name(pipe), I915_READ(PIPESTAT(pipe)));
-		/* pipestat has already been acked */
-	}
-	if (eir & I915_ERROR_INSTRUCTION) {
-		pr_err("instruction error\n");
-		pr_err("  INSTPM: 0x%08x\n", I915_READ(INSTPM));
-		i915_err_print_instdone(dev_priv, &instdone);
-		if (INTEL_GEN(dev_priv) < 4) {
-			u32 ipeir = I915_READ(IPEIR);
-
-			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR));
-			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR));
-			pr_err("  ACTHD: 0x%08x\n", I915_READ(ACTHD));
-			I915_WRITE(IPEIR, ipeir);
-			POSTING_READ(IPEIR);
-		} else {
-			u32 ipeir = I915_READ(IPEIR_I965);
-
-			pr_err("  IPEIR: 0x%08x\n", I915_READ(IPEIR_I965));
-			pr_err("  IPEHR: 0x%08x\n", I915_READ(IPEHR_I965));
-			pr_err("  INSTPS: 0x%08x\n", I915_READ(INSTPS));
-			pr_err("  ACTHD: 0x%08x\n", I915_READ(ACTHD_I965));
-			I915_WRITE(IPEIR_I965, ipeir);
-			POSTING_READ(IPEIR_I965);
-		}
-	}
+	if (INTEL_GEN(dev_priv) < 4)
+		I915_WRITE(IPEIR, I915_READ(IPEIR));
+	else
+		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
 
-	I915_WRITE(EIR, eir);
-	POSTING_READ(EIR);
+	I915_WRITE(EIR, I915_READ(EIR));
 	eir = I915_READ(EIR);
 	if (eir) {
 		/*
 		 * some errors might have become stuck,
 		 * mask them.
 		 */
-		DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
+		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
 		I915_WRITE(EMR, I915_READ(EMR) | eir);
 		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
 	}
@@ -2690,7 +2626,7 @@  void i915_handle_error(struct drm_i915_private *dev_priv,
 	va_end(args);
 
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
-	i915_report_and_clear_eir(dev_priv);
+	i915_clear_error_registers(dev_priv);
 
 	if (!engine_mask)
 		return;