diff mbox

[7/7] drm/i915: print Gen 7 error interrupts

Message ID 1359147462-3902-8-git-send-email-przanoni@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paulo Zanoni Jan. 25, 2013, 8:57 p.m. UTC
From: Paulo Zanoni <paulo.r.zanoni@intel.com>

If we get one of these messages it means we did something wrong, and
the first step to fix wrong things is to detect them and recognize
they exist.

For now, leave most messages as DRM_DEBUG_KMS. After we conclude that
a certain message should not be print since our code is correct, then
we can promote that specific message to DRM_ERROR.

Also notice that on Haswell the only interrupt I ever get is for
"unclaimed registers", so it looks like at least on Haswell we're in a
good shape.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c |   28 +++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h |    2 +-
 2 files changed, 28 insertions(+), 2 deletions(-)

Comments

Ben Widawsky Jan. 26, 2013, 1:24 a.m. UTC | #1
On Fri, 25 Jan 2013 18:57:42 -0200
Paulo Zanoni <przanoni@gmail.com> wrote:

> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> 
> If we get one of these messages it means we did something wrong, and
> the first step to fix wrong things is to detect them and recognize
> they exist.
> 
> For now, leave most messages as DRM_DEBUG_KMS. After we conclude that
> a certain message should not be print since our code is correct, then
> we can promote that specific message to DRM_ERROR.
> 
> Also notice that on Haswell the only interrupt I ever get is for
> "unclaimed registers", so it looks like at least on Haswell we're in a
> good shape.
Please take some of my cynicism, you are way too optimistic :p

I have a lot of concerns with this patch touch FPGA_DEBUG and races.

Third, I think to be super paranoid you might want to disable the ERR_INT while
handling interrupts.

> 
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c |   28 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h |    2 +-
>  2 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 943db10..c2136cd 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -697,6 +697,17 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
>  					 I915_READ(FDI_RX_IIR(pipe)));
>  }
>  
> +static void ivb_err_int_handler(struct drm_device *dev)
> +{
> +	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
> +	u32 err_int = I915_READ(GEN7_ERR_INT);
> +
> +	if (err_int)
> +		DRM_DEBUG_KMS("Error interrupt: 0x%08x\n", err_int);
> +
> +	I915_WRITE(GEN7_ERR_INT, err_int);
> +}
> +
>  static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
>  {
>  	struct drm_device *dev = (struct drm_device *) arg;
> @@ -707,6 +718,14 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
>  
>  	atomic_inc(&dev_priv->irq_received);
>  
> +	/* We get interrupts on unclaimed registers, so check this before we do
> +	 * any I915_{READ,WRITE}. */
> +	if (drm_debug && IS_HASWELL(dev) &&
> +	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
> +		DRM_ERROR("Unclaimed register before interrupt\n");
> +		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
> +	}
> +
>  	/* disable master interrupt before clearing iir  */
>  	de_ier = I915_READ(DEIER);
>  	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);

I don't really understand what you're trying to do with this, and I
think this is quite racy since you're usually protecting FPGA_DBG with
struct_mutex, but not here. Since it's mostly for debug, maybe you can
convince me why it should be here, and I'll drop my complaint.

> @@ -720,6 +739,9 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
>  
>  	de_iir = I915_READ(DEIIR);
>  	if (de_iir) {
> +		if (de_iir & DE_ERR_INT_IVB)
> +			ivb_err_int_handler(dev);
> +
>  		if (de_iir & DE_AUX_CHANNEL_A_IVB)
>  			dp_aux_irq_handler(dev);
>  
> @@ -2006,7 +2028,8 @@ static int ivybridge_irq_postinstall(struct drm_device *dev)
>  		DE_PLANEC_FLIP_DONE_IVB |
>  		DE_PLANEB_FLIP_DONE_IVB |
>  		DE_PLANEA_FLIP_DONE_IVB |
> -		DE_AUX_CHANNEL_A_IVB;
> +		DE_AUX_CHANNEL_A_IVB |
> +		DE_ERR_INT_IVB;
>  	u32 render_irqs;
>  	u32 hotplug_mask;
>  	u32 pch_irq_mask;
> @@ -2014,6 +2037,7 @@ static int ivybridge_irq_postinstall(struct drm_device *dev)
>  	dev_priv->irq_mask = ~display_mask;
>  
>  	/* should always can generate irq */
> +	I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
>  	I915_WRITE(DEIIR, I915_READ(DEIIR));
>  	I915_WRITE(DEIMR, dev_priv->irq_mask);
>  	I915_WRITE(DEIER,
> @@ -2177,6 +2201,8 @@ static void ironlake_irq_uninstall(struct drm_device *dev)
>  	I915_WRITE(DEIMR, 0xffffffff);
>  	I915_WRITE(DEIER, 0x0);
>  	I915_WRITE(DEIIR, I915_READ(DEIIR));
> +	if (IS_GEN7(dev))
> +		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
>  
>  	I915_WRITE(GTIMR, 0xffffffff);
>  	I915_WRITE(GTIER, 0x0);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index ee30fb9..86cace1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3379,7 +3379,7 @@
>  #define DE_PIPEA_FIFO_UNDERRUN  (1 << 0)
>  
>  /* More Ivybridge lolz */
> -#define DE_ERR_DEBUG_IVB		(1<<30)
> +#define DE_ERR_INT_IVB			(1<<30)
>  #define DE_GSE_IVB			(1<<29)
>  #define DE_PCH_EVENT_IVB		(1<<28)
>  #define DE_DP_A_HOTPLUG_IVB		(1<<27)
Paulo Zanoni Feb. 8, 2013, 6:22 p.m. UTC | #2
Hi

2013/1/25 Ben Widawsky <ben@bwidawsk.net>:
> On Fri, 25 Jan 2013 18:57:42 -0200
> Paulo Zanoni <przanoni@gmail.com> wrote:
>
>> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>
>> If we get one of these messages it means we did something wrong, and
>> the first step to fix wrong things is to detect them and recognize
>> they exist.
>>
>> For now, leave most messages as DRM_DEBUG_KMS. After we conclude that
>> a certain message should not be print since our code is correct, then
>> we can promote that specific message to DRM_ERROR.
>>
>> Also notice that on Haswell the only interrupt I ever get is for
>> "unclaimed registers", so it looks like at least on Haswell we're in a
>> good shape.
> Please take some of my cynicism, you are way too optimistic :p

The new patch won't just print everything.

>
> I have a lot of concerns with this patch touch FPGA_DEBUG and races.

Please explain.

>
> Third, I think to be super paranoid you might want to disable the ERR_INT while
> handling interrupts.

Yes, we need to mask ERR_INT inside ivybridge_irq_handler, otherwise
we may get an infinite loop in case we write to an unclaimed register
inside the irq handler. This will be fixed in the next version. The
good thing is that even if ERR_INT is masked, we can still detect
unclaimed registers inside the irq handler because we still check
FPGA_DBG (which doesn't give us interrputs) :)

>
>>
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_irq.c |   28 +++++++++++++++++++++++++++-
>>  drivers/gpu/drm/i915/i915_reg.h |    2 +-
>>  2 files changed, 28 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> index 943db10..c2136cd 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -697,6 +697,17 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
>>                                        I915_READ(FDI_RX_IIR(pipe)));
>>  }
>>
>> +static void ivb_err_int_handler(struct drm_device *dev)
>> +{
>> +     drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
>> +     u32 err_int = I915_READ(GEN7_ERR_INT);
>> +
>> +     if (err_int)
>> +             DRM_DEBUG_KMS("Error interrupt: 0x%08x\n", err_int);
>> +
>> +     I915_WRITE(GEN7_ERR_INT, err_int);
>> +}
>> +
>>  static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
>>  {
>>       struct drm_device *dev = (struct drm_device *) arg;
>> @@ -707,6 +718,14 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
>>
>>       atomic_inc(&dev_priv->irq_received);
>>
>> +     /* We get interrupts on unclaimed registers, so check this before we do
>> +      * any I915_{READ,WRITE}. */
>> +     if (drm_debug && IS_HASWELL(dev) &&
>> +         (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
>> +             DRM_ERROR("Unclaimed register before interrupt\n");
>> +             I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
>> +     }
>> +
>>       /* disable master interrupt before clearing iir  */
>>       de_ier = I915_READ(DEIER);
>>       I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
>
> I don't really understand what you're trying to do with this, and I
> think this is quite racy since you're usually protecting FPGA_DBG with
> struct_mutex, but not here. Since it's mostly for debug, maybe you can
> convince me why it should be here, and I'll drop my complaint.

On i915_write##x we do:
1 - writel(reg_that_doesnt_exist)
2 - read FPGA_DBG
3 - print error message
4 - clear FPGA_DBG

The problem is that sometimes we get the interrupt between steps 1 and
2, or 2 and 3, or 3 and 4. And since the interrupt handler also does
I915_WRITE, we may do the "FPGA_DBG" check inside the IRQ handler
before the "original" i915_WRITE check because the interrupt arrived
too fast.

>
>> @@ -720,6 +739,9 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
>>
>>       de_iir = I915_READ(DEIIR);
>>       if (de_iir) {
>> +             if (de_iir & DE_ERR_INT_IVB)
>> +                     ivb_err_int_handler(dev);
>> +
>>               if (de_iir & DE_AUX_CHANNEL_A_IVB)
>>                       dp_aux_irq_handler(dev);
>>
>> @@ -2006,7 +2028,8 @@ static int ivybridge_irq_postinstall(struct drm_device *dev)
>>               DE_PLANEC_FLIP_DONE_IVB |
>>               DE_PLANEB_FLIP_DONE_IVB |
>>               DE_PLANEA_FLIP_DONE_IVB |
>> -             DE_AUX_CHANNEL_A_IVB;
>> +             DE_AUX_CHANNEL_A_IVB |
>> +             DE_ERR_INT_IVB;
>>       u32 render_irqs;
>>       u32 hotplug_mask;
>>       u32 pch_irq_mask;
>> @@ -2014,6 +2037,7 @@ static int ivybridge_irq_postinstall(struct drm_device *dev)
>>       dev_priv->irq_mask = ~display_mask;
>>
>>       /* should always can generate irq */
>> +     I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
>>       I915_WRITE(DEIIR, I915_READ(DEIIR));
>>       I915_WRITE(DEIMR, dev_priv->irq_mask);
>>       I915_WRITE(DEIER,
>> @@ -2177,6 +2201,8 @@ static void ironlake_irq_uninstall(struct drm_device *dev)
>>       I915_WRITE(DEIMR, 0xffffffff);
>>       I915_WRITE(DEIER, 0x0);
>>       I915_WRITE(DEIIR, I915_READ(DEIIR));
>> +     if (IS_GEN7(dev))
>> +             I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
>>
>>       I915_WRITE(GTIMR, 0xffffffff);
>>       I915_WRITE(GTIER, 0x0);
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index ee30fb9..86cace1 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -3379,7 +3379,7 @@
>>  #define DE_PIPEA_FIFO_UNDERRUN  (1 << 0)
>>
>>  /* More Ivybridge lolz */
>> -#define DE_ERR_DEBUG_IVB             (1<<30)
>> +#define DE_ERR_INT_IVB                       (1<<30)
>>  #define DE_GSE_IVB                   (1<<29)
>>  #define DE_PCH_EVENT_IVB             (1<<28)
>>  #define DE_DP_A_HOTPLUG_IVB          (1<<27)
>
>
>
> --
> Ben Widawsky, Intel Open Source Technology Center
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 943db10..c2136cd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -697,6 +697,17 @@  static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
 					 I915_READ(FDI_RX_IIR(pipe)));
 }
 
+static void ivb_err_int_handler(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	u32 err_int = I915_READ(GEN7_ERR_INT);
+
+	if (err_int)
+		DRM_DEBUG_KMS("Error interrupt: 0x%08x\n", err_int);
+
+	I915_WRITE(GEN7_ERR_INT, err_int);
+}
+
 static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -707,6 +718,14 @@  static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 
 	atomic_inc(&dev_priv->irq_received);
 
+	/* We get interrupts on unclaimed registers, so check this before we do
+	 * any I915_{READ,WRITE}. */
+	if (drm_debug && IS_HASWELL(dev) &&
+	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
+		DRM_ERROR("Unclaimed register before interrupt\n");
+		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	}
+
 	/* disable master interrupt before clearing iir  */
 	de_ier = I915_READ(DEIER);
 	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
@@ -720,6 +739,9 @@  static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 
 	de_iir = I915_READ(DEIIR);
 	if (de_iir) {
+		if (de_iir & DE_ERR_INT_IVB)
+			ivb_err_int_handler(dev);
+
 		if (de_iir & DE_AUX_CHANNEL_A_IVB)
 			dp_aux_irq_handler(dev);
 
@@ -2006,7 +2028,8 @@  static int ivybridge_irq_postinstall(struct drm_device *dev)
 		DE_PLANEC_FLIP_DONE_IVB |
 		DE_PLANEB_FLIP_DONE_IVB |
 		DE_PLANEA_FLIP_DONE_IVB |
-		DE_AUX_CHANNEL_A_IVB;
+		DE_AUX_CHANNEL_A_IVB |
+		DE_ERR_INT_IVB;
 	u32 render_irqs;
 	u32 hotplug_mask;
 	u32 pch_irq_mask;
@@ -2014,6 +2037,7 @@  static int ivybridge_irq_postinstall(struct drm_device *dev)
 	dev_priv->irq_mask = ~display_mask;
 
 	/* should always can generate irq */
+	I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
 	I915_WRITE(DEIIR, I915_READ(DEIIR));
 	I915_WRITE(DEIMR, dev_priv->irq_mask);
 	I915_WRITE(DEIER,
@@ -2177,6 +2201,8 @@  static void ironlake_irq_uninstall(struct drm_device *dev)
 	I915_WRITE(DEIMR, 0xffffffff);
 	I915_WRITE(DEIER, 0x0);
 	I915_WRITE(DEIIR, I915_READ(DEIIR));
+	if (IS_GEN7(dev))
+		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
 
 	I915_WRITE(GTIMR, 0xffffffff);
 	I915_WRITE(GTIER, 0x0);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ee30fb9..86cace1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3379,7 +3379,7 @@ 
 #define DE_PIPEA_FIFO_UNDERRUN  (1 << 0)
 
 /* More Ivybridge lolz */
-#define DE_ERR_DEBUG_IVB		(1<<30)
+#define DE_ERR_INT_IVB			(1<<30)
 #define DE_GSE_IVB			(1<<29)
 #define DE_PCH_EVENT_IVB		(1<<28)
 #define DE_DP_A_HOTPLUG_IVB		(1<<27)