diff mbox

drm/i915/icl: Correctly clear lost ctx-switch interrupts across reset for Gen11

Message ID 1524605995-22324-1-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com April 24, 2018, 9:39 p.m. UTC
Interrupt handling in Gen11 is quite different from previous platforms.

v2: Rebased (Michel)
v3: Rebased with wiggle
v4: Rebased, remove TODO warning correctly (Daniele)
v5: Rebased, made gen11_gtiir const while at it (Michel)
v6: Rebased
v7: Adapt to the style currently in upstream

Suggested-by: Michel Thierry <michel.thierry@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c  |  6 ++--
 drivers/gpu/drm/i915/intel_drv.h |  3 ++
 drivers/gpu/drm/i915/intel_lrc.c | 60 ++++++++++++++++++++++++++++------------
 3 files changed, 48 insertions(+), 21 deletions(-)

Comments

Michel Thierry April 24, 2018, 10:27 p.m. UTC | #1
On 4/24/2018 2:39 PM, Oscar Mateo wrote:
> Interrupt handling in Gen11 is quite different from previous platforms.
> 
> v2: Rebased (Michel)
> v3: Rebased with wiggle
> v4: Rebased, remove TODO warning correctly (Daniele)
> v5: Rebased, made gen11_gtiir const while at it (Michel)
> v6: Rebased
> v7: Adapt to the style currently in upstream
> 
> Suggested-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c  |  6 ++--
>   drivers/gpu/drm/i915/intel_drv.h |  3 ++
>   drivers/gpu/drm/i915/intel_lrc.c | 60 ++++++++++++++++++++++++++++------------
>   3 files changed, 48 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 96547e0..f9bc3aa 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -247,9 +247,9 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
>   gen11_gt_engine_identity(struct drm_i915_private * const i915,
>   			 const unsigned int bank, const unsigned int bit);
>   
> -static bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> -				const unsigned int bank,
> -				const unsigned int bit)
> +bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> +			 const unsigned int bank,
> +			 const unsigned int bit)
>   {
>   	void __iomem * const regs = i915->regs;
>   	u32 dw;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 58868b9..9bba035 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1333,6 +1333,9 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
>   void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
>   
>   /* i915_irq.c */
> +bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> +			 const unsigned int bank,
> +			 const unsigned int bit);
>   void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>   void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>   void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2d6572a..7ea5f36 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -789,22 +789,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   
>   static void clear_gtiir(struct intel_engine_cs *engine)
>   {
> -	static const u8 gtiir[] = {
> -		[RCS]  = 0,
> -		[BCS]  = 0,
> -		[VCS]  = 1,
> -		[VCS2] = 1,
> -		[VECS] = 3,
> -	};
>   	struct drm_i915_private *dev_priv = engine->i915;
>   	int i;
>   
> -	/* TODO: correctly reset irqs for gen11 */
> -	if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
> -		return;
> -
> -	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
> -
>   	/*
>   	 * Clear any pending interrupt state.
>   	 *
> @@ -812,13 +799,50 @@ static void clear_gtiir(struct intel_engine_cs *engine)
>   	 * double buffered, and so if we only reset it once there may
>   	 * still be an interrupt pending.
>   	 */
> -	for (i = 0; i < 2; i++) {
> -		I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
> +	if (INTEL_GEN(dev_priv) >= 11) {
> +		static const struct {
> +			u8 bank;
> +			u8 bit;
> +		} gen11_gtiir[] = {
> +			[RCS] = {0, GEN11_RCS0},
> +			[BCS] = {0, GEN11_BCS},
> +			[_VCS(0)] = {1, GEN11_VCS(0)},
> +			[_VCS(1)] = {1, GEN11_VCS(1)},
> +			[_VCS(2)] = {1, GEN11_VCS(2)},
> +			[_VCS(3)] = {1, GEN11_VCS(3)},
> +			[_VECS(0)] = {1, GEN11_VECS(0)},
> +			[_VECS(1)] = {1, GEN11_VECS(1)},
> +		};
bit,bank values are correct so

Reviewed-by: Michel Thierry <michel.thierry@intel.com>

> +		unsigned long irqflags;
> +
> +		GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
> +
> +		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
> +		for (i = 0; i < 2; i++) {
> +			gen11_reset_one_iir(dev_priv,
> +					    gen11_gtiir[engine->id].bank,
> +					    gen11_gtiir[engine->id].bit);
> +		}
> +		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
> +	} else {
> +		static const u8 gtiir[] = {
> +			[RCS]  = 0,
> +			[BCS]  = 0,
> +			[VCS]  = 1,
> +			[VCS2] = 1,
> +			[VECS] = 3,
> +		};
> +
> +		GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
> +
> +		for (i = 0; i < 2; i++) {
> +			I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
> +				   engine->irq_keep_mask);
> +			POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
> +		}
> +		GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
>   			   engine->irq_keep_mask);
> -		POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
>   	}
> -	GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
> -		   engine->irq_keep_mask);
>   }
>   
>   static void reset_irq(struct intel_engine_cs *engine)
>
Chris Wilson April 25, 2018, 3:12 p.m. UTC | #2
Quoting Michel Thierry (2018-04-24 23:27:41)
> On 4/24/2018 2:39 PM, Oscar Mateo wrote:
> > Interrupt handling in Gen11 is quite different from previous platforms.
> > 
> > v2: Rebased (Michel)
> > v3: Rebased with wiggle
> > v4: Rebased, remove TODO warning correctly (Daniele)
> > v5: Rebased, made gen11_gtiir const while at it (Michel)
> > v6: Rebased
> > v7: Adapt to the style currently in upstream
> > 
> > Suggested-by: Michel Thierry <michel.thierry@intel.com>
> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_irq.c  |  6 ++--
> >   drivers/gpu/drm/i915/intel_drv.h |  3 ++
> >   drivers/gpu/drm/i915/intel_lrc.c | 60 ++++++++++++++++++++++++++++------------
> >   3 files changed, 48 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 96547e0..f9bc3aa 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -247,9 +247,9 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
> >   gen11_gt_engine_identity(struct drm_i915_private * const i915,
> >                        const unsigned int bank, const unsigned int bit);
> >   
> > -static bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> > -                             const unsigned int bank,
> > -                             const unsigned int bit)
> > +bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> > +                      const unsigned int bank,
> > +                      const unsigned int bit)
> >   {
> >       void __iomem * const regs = i915->regs;
> >       u32 dw;
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 58868b9..9bba035 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -1333,6 +1333,9 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
> >   void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
> >   
> >   /* i915_irq.c */
> > +bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> > +                      const unsigned int bank,
> > +                      const unsigned int bit);
> >   void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
> >   void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
> >   void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 2d6572a..7ea5f36 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -789,22 +789,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >   
> >   static void clear_gtiir(struct intel_engine_cs *engine)
> >   {
> > -     static const u8 gtiir[] = {
> > -             [RCS]  = 0,
> > -             [BCS]  = 0,
> > -             [VCS]  = 1,
> > -             [VCS2] = 1,
> > -             [VECS] = 3,
> > -     };
> >       struct drm_i915_private *dev_priv = engine->i915;
> >       int i;
> >   
> > -     /* TODO: correctly reset irqs for gen11 */
> > -     if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
> > -             return;
> > -
> > -     GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
> > -
> >       /*
> >        * Clear any pending interrupt state.
> >        *
> > @@ -812,13 +799,50 @@ static void clear_gtiir(struct intel_engine_cs *engine)
> >        * double buffered, and so if we only reset it once there may
> >        * still be an interrupt pending.
> >        */
> > -     for (i = 0; i < 2; i++) {
> > -             I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
> > +     if (INTEL_GEN(dev_priv) >= 11) {
> > +             static const struct {
> > +                     u8 bank;
> > +                     u8 bit;
> > +             } gen11_gtiir[] = {
> > +                     [RCS] = {0, GEN11_RCS0},
> > +                     [BCS] = {0, GEN11_BCS},
> > +                     [_VCS(0)] = {1, GEN11_VCS(0)},
> > +                     [_VCS(1)] = {1, GEN11_VCS(1)},
> > +                     [_VCS(2)] = {1, GEN11_VCS(2)},
> > +                     [_VCS(3)] = {1, GEN11_VCS(3)},
> > +                     [_VECS(0)] = {1, GEN11_VECS(0)},
> > +                     [_VECS(1)] = {1, GEN11_VECS(1)},
> > +             };
> bit,bank values are correct so
> 
> Reviewed-by: Michel Thierry <michel.thierry@intel.com>
> 
> > +             unsigned long irqflags;
> > +
> > +             GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
> > +
> > +             spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
> > +             for (i = 0; i < 2; i++) {
> > +                     gen11_reset_one_iir(dev_priv,
> > +                                         gen11_gtiir[engine->id].bank,
> > +                                         gen11_gtiir[engine->id].bit);
> > +             }
> > +             spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);

Strictly speaking we are already inside an irq-safe section.

Applied as is.
-Chris
Chris Wilson July 13, 2018, 8:28 a.m. UTC | #3
Quoting Oscar Mateo (2018-04-24 22:39:55)
> Interrupt handling in Gen11 is quite different from previous platforms.
> 
> v2: Rebased (Michel)
> v3: Rebased with wiggle
> v4: Rebased, remove TODO warning correctly (Daniele)
> v5: Rebased, made gen11_gtiir const while at it (Michel)
> v6: Rebased
> v7: Adapt to the style currently in upstream
> 
> Suggested-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c  |  6 ++--
>  drivers/gpu/drm/i915/intel_drv.h |  3 ++
>  drivers/gpu/drm/i915/intel_lrc.c | 60 ++++++++++++++++++++++++++++------------
>  3 files changed, 48 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 96547e0..f9bc3aa 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -247,9 +247,9 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
>  gen11_gt_engine_identity(struct drm_i915_private * const i915,
>                          const unsigned int bank, const unsigned int bit);
>  
> -static bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> -                               const unsigned int bank,
> -                               const unsigned int bit)
> +bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> +                        const unsigned int bank,
> +                        const unsigned int bit)
>  {
>         void __iomem * const regs = i915->regs;
>         u32 dw;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 58868b9..9bba035 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1333,6 +1333,9 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
>  void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
>  
>  /* i915_irq.c */
> +bool gen11_reset_one_iir(struct drm_i915_private * const i915,
> +                        const unsigned int bank,
> +                        const unsigned int bit);
>  void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>  void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>  void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2d6572a..7ea5f36 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -789,22 +789,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  
>  static void clear_gtiir(struct intel_engine_cs *engine)
>  {
> -       static const u8 gtiir[] = {
> -               [RCS]  = 0,
> -               [BCS]  = 0,
> -               [VCS]  = 1,
> -               [VCS2] = 1,
> -               [VECS] = 3,
> -       };
>         struct drm_i915_private *dev_priv = engine->i915;
>         int i;
>  
> -       /* TODO: correctly reset irqs for gen11 */
> -       if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
> -               return;
> -
> -       GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
> -
>         /*
>          * Clear any pending interrupt state.
>          *
> @@ -812,13 +799,50 @@ static void clear_gtiir(struct intel_engine_cs *engine)
>          * double buffered, and so if we only reset it once there may
>          * still be an interrupt pending.
>          */
> -       for (i = 0; i < 2; i++) {
> -               I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
> +       if (INTEL_GEN(dev_priv) >= 11) {
> +               static const struct {
> +                       u8 bank;
> +                       u8 bit;
> +               } gen11_gtiir[] = {
> +                       [RCS] = {0, GEN11_RCS0},
> +                       [BCS] = {0, GEN11_BCS},
> +                       [_VCS(0)] = {1, GEN11_VCS(0)},
> +                       [_VCS(1)] = {1, GEN11_VCS(1)},
> +                       [_VCS(2)] = {1, GEN11_VCS(2)},
> +                       [_VCS(3)] = {1, GEN11_VCS(3)},
> +                       [_VECS(0)] = {1, GEN11_VECS(0)},
> +                       [_VECS(1)] = {1, GEN11_VECS(1)},
> +               };
> +               unsigned long irqflags;
> +
> +               GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
> +
> +               spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
> +               for (i = 0; i < 2; i++) {
> +                       gen11_reset_one_iir(dev_priv,
> +                                           gen11_gtiir[engine->id].bank,
> +                                           gen11_gtiir[engine->id].bit);
> +               }
> +               spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);

This is sunk by

[   57.409776] ======================================================
[   57.409779] WARNING: possible circular locking dependency detected
[   57.409783] 4.18.0-rc4-CI-CI_DII_1137+ #1 Tainted: G     U  W        
[   57.409785] ------------------------------------------------------
[   57.409788] swapper/6/0 is trying to acquire lock:
[   57.409790] 000000004f304ee5 (&engine->timeline.lock/1){-.-.}, at: execlists_submit_request+0x2b/0x1a0 [i915]
[   57.409841] 
               but task is already holding lock:
[   57.409844] 00000000aad89594 (&(&rq->lock)->rlock#2){-.-.}, at: notify_ring+0x2b2/0x480 [i915]
[   57.409869] 
               which lock already depends on the new lock.

[   57.409872] 
               the existing dependency chain (in reverse order) is:
[   57.409876] 
               -> #2 (&(&rq->lock)->rlock#2){-.-.}:
[   57.409900]        notify_ring+0x2b2/0x480 [i915]
[   57.409922]        gen8_cs_irq_handler+0x39/0xa0 [i915]
[   57.409943]        gen11_irq_handler+0x2f0/0x420 [i915]
[   57.409949]        __handle_irq_event_percpu+0x42/0x370
[   57.409952]        handle_irq_event_percpu+0x2b/0x70
[   57.409956]        handle_irq_event+0x2f/0x50
[   57.409959]        handle_edge_irq+0xe7/0x190
[   57.409964]        handle_irq+0x67/0x160
[   57.409967]        do_IRQ+0x5e/0x120
[   57.409971]        ret_from_intr+0x0/0x1d
[   57.409974]        _raw_spin_unlock_irqrestore+0x4e/0x60
[   57.409979]        tasklet_action_common.isra.5+0x47/0xb0
[   57.409982]        __do_softirq+0xd9/0x505
[   57.409985]        irq_exit+0xa9/0xc0
[   57.409988]        do_IRQ+0x9a/0x120
[   57.409991]        ret_from_intr+0x0/0x1d
[   57.409995]        cpuidle_enter_state+0xac/0x360
[   57.409999]        do_idle+0x1f3/0x250
[   57.410004]        cpu_startup_entry+0x6a/0x70
[   57.410010]        start_secondary+0x19d/0x1f0
[   57.410015]        secondary_startup_64+0xa5/0xb0
[   57.410018] 
               -> #1 (&(&dev_priv->irq_lock)->rlock){-.-.}:
[   57.410081]        clear_gtiir+0x30/0x200 [i915]
[   57.410116]        execlists_reset+0x6e/0x2b0 [i915]
[   57.410140]        i915_reset_engine+0x111/0x190 [i915]
[   57.410165]        i915_handle_error+0x11a/0x4a0 [i915]
[   57.410198]        i915_hangcheck_elapsed+0x378/0x530 [i915]
[   57.410204]        process_one_work+0x248/0x6c0
[   57.410207]        worker_thread+0x37/0x380
[   57.410211]        kthread+0x119/0x130
[   57.410215]        ret_from_fork+0x3a/0x50
[   57.410217] 
               -> #0 (&engine->timeline.lock/1){-.-.}:
[   57.410224]        _raw_spin_lock_irqsave+0x33/0x50
[   57.410256]        execlists_submit_request+0x2b/0x1a0 [i915]
[   57.410289]        submit_notify+0x8d/0x124 [i915]
[   57.410314]        __i915_sw_fence_complete+0x81/0x250 [i915]
[   57.410339]        dma_i915_sw_fence_wake+0xd/0x20 [i915]
[   57.410344]        dma_fence_signal_locked+0x79/0x200
[   57.410368]        notify_ring+0x2ba/0x480 [i915]
[   57.410392]        gen8_cs_irq_handler+0x39/0xa0 [i915]
[   57.410416]        gen11_irq_handler+0x2f0/0x420 [i915]
[   57.410421]        __handle_irq_event_percpu+0x42/0x370
[   57.410425]        handle_irq_event_percpu+0x2b/0x70
[   57.410428]        handle_irq_event+0x2f/0x50
[   57.410432]        handle_edge_irq+0xe7/0x190
[   57.410436]        handle_irq+0x67/0x160
[   57.410439]        do_IRQ+0x5e/0x120
[   57.410445]        ret_from_intr+0x0/0x1d
[   57.410449]        cpuidle_enter_state+0xac/0x360
[   57.410453]        do_idle+0x1f3/0x250
[   57.410456]        cpu_startup_entry+0x6a/0x70
[   57.410460]        start_secondary+0x19d/0x1f0
[   57.410464]        secondary_startup_64+0xa5/0xb0
[   57.410466] 
               other info that might help us debug this:

[   57.410471] Chain exists of:
                 &engine->timeline.lock/1 --> &(&dev_priv->irq_lock)->rlock --> &(&rq->lock)->rlock#2

[   57.410481]  Possible unsafe locking scenario:

[   57.410485]        CPU0                    CPU1
[   57.410487]        ----                    ----
[   57.410490]   lock(&(&rq->lock)->rlock#2);
[   57.410494]                                lock(&(&dev_priv->irq_lock)->rlock);
[   57.410498]                                lock(&(&rq->lock)->rlock#2);
[   57.410503]   lock(&engine->timeline.lock/1);
[   57.410506] 
                *** DEADLOCK ***

[   57.410511] 4 locks held by swapper/6/0:
[   57.410514]  #0: 0000000074575789 (&(&dev_priv->irq_lock)->rlock){-.-.}, at: gen11_irq_handler+0x8a/0x420 [i915]
[   57.410542]  #1: 000000009b29b30e (rcu_read_lock){....}, at: notify_ring+0x1a/0x480 [i915]
[   57.410573]  #2: 00000000aad89594 (&(&rq->lock)->rlock#2){-.-.}, at: notify_ring+0x2b2/0x480 [i915]
[   57.410601]  #3: 000000009b29b30e (rcu_read_lock){....}, at: submit_notify+0x35/0x124 [i915]
[   57.410635] 
               stack backtrace:
[   57.410640] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G     U  W         4.18.0-rc4-CI-CI_DII_1137+ #1
[   57.410644] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP, BIOS ICLSFWR1.R00.2222.A01.1805300339 05/30/2018
[   57.410650] Call Trace:
[   57.410652]  <IRQ>
[   57.410657]  dump_stack+0x67/0x9b
[   57.410662]  print_circular_bug.isra.16+0x1c8/0x2b0
[   57.410666]  __lock_acquire+0x1897/0x1b50
[   57.410671]  ? lock_acquire+0xa6/0x210
[   57.410674]  lock_acquire+0xa6/0x210
[   57.410706]  ? execlists_submit_request+0x2b/0x1a0 [i915]
[   57.410711]  _raw_spin_lock_irqsave+0x33/0x50
[   57.410741]  ? execlists_submit_request+0x2b/0x1a0 [i915]
[   57.410769]  execlists_submit_request+0x2b/0x1a0 [i915]
[   57.410774]  ? _raw_spin_unlock_irqrestore+0x39/0x60
[   57.410804]  submit_notify+0x8d/0x124 [i915]
[   57.410828]  __i915_sw_fence_complete+0x81/0x250 [i915]
[   57.410854]  dma_i915_sw_fence_wake+0xd/0x20 [i915]
[   57.410858]  dma_fence_signal_locked+0x79/0x200
[   57.410882]  notify_ring+0x2ba/0x480 [i915]
[   57.410907]  gen8_cs_irq_handler+0x39/0xa0 [i915]
[   57.410933]  gen11_irq_handler+0x2f0/0x420 [i915]
[   57.410938]  __handle_irq_event_percpu+0x42/0x370
[   57.410943]  handle_irq_event_percpu+0x2b/0x70
[   57.410947]  handle_irq_event+0x2f/0x50
[   57.410951]  handle_edge_irq+0xe7/0x190
[   57.410955]  handle_irq+0x67/0x160
[   57.410958]  do_IRQ+0x5e/0x120
[   57.410962]  common_interrupt+0xf/0xf
[   57.410965]  </IRQ>
[   57.410969] RIP: 0010:cpuidle_enter_state+0xac/0x360
[   57.410972] Code: 44 00 00 31 ff e8 84 93 91 ff 45 84 f6 74 12 9c 58 f6 c4 02 0f 85 31 02 00 00 31 ff e8 7d 30 98 ff e8 e8 0e 94 ff fb 4c 29 fb <48> ba cf f7 53 e3 a5 9b c4 20 48 89 d8 48 c1 fb 3f 48 f7 ea b8 ff 
[   57.411015] RSP: 0018:ffffc90000133e90 EFLAGS: 00000216 ORIG_RAX: ffffffffffffffdd
[   57.411023] RAX: ffff8804ae748040 RBX: 000000000002a97d RCX: 0000000000000000
[   57.411029] RDX: 0000000000000046 RSI: ffffffff82141263 RDI: ffffffff820f05a7
[   57.411035] RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000
[   57.411041] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff8229f078
[   57.411045] R13: ffff8804ab2adfa8 R14: 0000000000000000 R15: 0000000d5de092e3
[   57.411052]  do_idle+0x1f3/0x250
[   57.411055]  cpu_startup_entry+0x6a/0x70
[   57.411059]  start_secondary+0x19d/0x1f0
[   57.411064]  secondary_startup_64+0xa5/0xb0

And that looks quite genuine, and I can't immediately see a way around
it. It's a global iir bank, and interrupts are only disabled on the
local cpu.

Fortunately in the current state of the CSB processing we should now
avoid the need to clear_gtiir.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 96547e0..f9bc3aa 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -247,9 +247,9 @@  void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
 gen11_gt_engine_identity(struct drm_i915_private * const i915,
 			 const unsigned int bank, const unsigned int bit);
 
-static bool gen11_reset_one_iir(struct drm_i915_private * const i915,
-				const unsigned int bank,
-				const unsigned int bit)
+bool gen11_reset_one_iir(struct drm_i915_private * const i915,
+			 const unsigned int bank,
+			 const unsigned int bit)
 {
 	void __iomem * const regs = i915->regs;
 	u32 dw;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 58868b9..9bba035 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1333,6 +1333,9 @@  void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
 void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
 
 /* i915_irq.c */
+bool gen11_reset_one_iir(struct drm_i915_private * const i915,
+			 const unsigned int bank,
+			 const unsigned int bit);
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2d6572a..7ea5f36 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -789,22 +789,9 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 
 static void clear_gtiir(struct intel_engine_cs *engine)
 {
-	static const u8 gtiir[] = {
-		[RCS]  = 0,
-		[BCS]  = 0,
-		[VCS]  = 1,
-		[VCS2] = 1,
-		[VECS] = 3,
-	};
 	struct drm_i915_private *dev_priv = engine->i915;
 	int i;
 
-	/* TODO: correctly reset irqs for gen11 */
-	if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
-		return;
-
-	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
-
 	/*
 	 * Clear any pending interrupt state.
 	 *
@@ -812,13 +799,50 @@  static void clear_gtiir(struct intel_engine_cs *engine)
 	 * double buffered, and so if we only reset it once there may
 	 * still be an interrupt pending.
 	 */
-	for (i = 0; i < 2; i++) {
-		I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+	if (INTEL_GEN(dev_priv) >= 11) {
+		static const struct {
+			u8 bank;
+			u8 bit;
+		} gen11_gtiir[] = {
+			[RCS] = {0, GEN11_RCS0},
+			[BCS] = {0, GEN11_BCS},
+			[_VCS(0)] = {1, GEN11_VCS(0)},
+			[_VCS(1)] = {1, GEN11_VCS(1)},
+			[_VCS(2)] = {1, GEN11_VCS(2)},
+			[_VCS(3)] = {1, GEN11_VCS(3)},
+			[_VECS(0)] = {1, GEN11_VECS(0)},
+			[_VECS(1)] = {1, GEN11_VECS(1)},
+		};
+		unsigned long irqflags;
+
+		GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+		for (i = 0; i < 2; i++) {
+			gen11_reset_one_iir(dev_priv,
+					    gen11_gtiir[engine->id].bank,
+					    gen11_gtiir[engine->id].bit);
+		}
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	} else {
+		static const u8 gtiir[] = {
+			[RCS]  = 0,
+			[BCS]  = 0,
+			[VCS]  = 1,
+			[VCS2] = 1,
+			[VECS] = 3,
+		};
+
+		GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+		for (i = 0; i < 2; i++) {
+			I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+				   engine->irq_keep_mask);
+			POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+		}
+		GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
 			   engine->irq_keep_mask);
-		POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
 	}
-	GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
-		   engine->irq_keep_mask);
 }
 
 static void reset_irq(struct intel_engine_cs *engine)