diff mbox series

[RFC,2/2] drm/i915/gt: Use spin_lock_irq() instead of local_irq_disable() + spin_lock()

Message ID 20210908185703.2989414-3-bigeasy@linutronix.de (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Locking splats PREEMPT_RT | expand

Commit Message

Sebastian Andrzej Siewior Sept. 8, 2021, 6:57 p.m. UTC
execlists_dequeue() is invoked from a function which uses
local_irq_disable() to disable interrupts so the spin_lock() behaves
like spin_lock_irq().
This breaks PREEMPT_RT because local_irq_disable() + spin_lock() is not
the same as spin_lock_irq().

execlists_dequeue_irq() and execlists_dequeue() has each one caller
only. If intel_engine_cs::active::lock is acquired and released with the
_irq suffix then it behaves almost as if execlists_dequeue() would be
invoked with disabled interrupts. The difference is the last part of the
function which is then invoked with enabled interrupts.
I can't tell if this makes a difference. From looking at it, it might
work to move the last unlock at the end of the function as I didn't find
anything that would acquire the lock again.

Reported-by: Clark Williams <williams@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 .../drm/i915/gt/intel_execlists_submission.c    | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

Comments

Maarten Lankhorst Sept. 16, 2021, 9:38 a.m. UTC | #1
Op 08-09-2021 om 20:57 schreef Sebastian Andrzej Siewior:
> execlists_dequeue() is invoked from a function which uses
> local_irq_disable() to disable interrupts so the spin_lock() behaves
> like spin_lock_irq().
> This breaks PREEMPT_RT because local_irq_disable() + spin_lock() is not
> the same as spin_lock_irq().
>
> execlists_dequeue_irq() and execlists_dequeue() has each one caller
> only. If intel_engine_cs::active::lock is acquired and released with the
> _irq suffix then it behaves almost as if execlists_dequeue() would be
> invoked with disabled interrupts. The difference is the last part of the
> function which is then invoked with enabled interrupts.
> I can't tell if this makes a difference. From looking at it, it might
> work to move the last unlock at the end of the function as I didn't find
> anything that would acquire the lock again.
>
> Reported-by: Clark Williams <williams@redhat.com>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  .../drm/i915/gt/intel_execlists_submission.c    | 17 +++++------------
>  1 file changed, 5 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index fc77592d88a96..2ec1dd352960b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -1265,7 +1265,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  	 * and context switches) submission.
>  	 */
>  
> -	spin_lock(&engine->active.lock);
> +	spin_lock_irq(&engine->active.lock);
>  
>  	/*
>  	 * If the queue is higher priority than the last
> @@ -1365,7 +1365,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  				 * Even if ELSP[1] is occupied and not worthy
>  				 * of timeslices, our queue might be.
>  				 */
> -				spin_unlock(&engine->active.lock);
> +				spin_unlock_irq(&engine->active.lock);
>  				return;
>  			}
>  		}
> @@ -1391,7 +1391,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  
>  		if (last && !can_merge_rq(last, rq)) {
>  			spin_unlock(&ve->base.active.lock);
> -			spin_unlock(&engine->active.lock);
> +			spin_unlock_irq(&engine->active.lock);
>  			return; /* leave this for another sibling */
>  		}
>  
> @@ -1552,7 +1552,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  	 * interrupt for secondary ports).
>  	 */
>  	execlists->queue_priority_hint = queue_prio(execlists);
> -	spin_unlock(&engine->active.lock);
> +	spin_unlock_irq(&engine->active.lock);
>  
>  	/*
>  	 * We can skip poking the HW if we ended up with exactly the same set
> @@ -1578,13 +1578,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  	}
>  }
>  
> -static void execlists_dequeue_irq(struct intel_engine_cs *engine)
> -{
> -	local_irq_disable(); /* Suspend interrupts across request submission */
> -	execlists_dequeue(engine);
> -	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
> -}
> -
>  static void clear_ports(struct i915_request **ports, int count)
>  {
>  	memset_p((void **)ports, NULL, count);
> @@ -2377,7 +2370,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
>  	}
>  
>  	if (!engine->execlists.pending[0]) {
> -		execlists_dequeue_irq(engine);
> +		execlists_dequeue(engine);
>  		start_timeslice(engine);
>  	}
>  

Patches look good.

For both patches:

Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

I've been looking at running i915 with the -rt patch series, and noticed i915_request_submit fails with GEM_BUG_ON(!irqs_disabled()); presumably same failure exists for i915_request_unsubmit().

Might be worth removing those checks as well? Seems double with lockdep_assert_held on an irq lock anyway.

I've also noticed the local_irq_disable/enable is removed from intel_pipe_update_(start/end) in the rt series. It might make sense from a -rt point of view, but that code needs to run without interruptions, or i915 may show visual glitches or even locks up the system.

It should just be a set of registers hammered in, but the code might needs to be fixed to take the mmio lock as outer lock, and become a strict set of register read/writes only.

~Maarten
Sebastian Andrzej Siewior Oct. 1, 2021, 9:29 a.m. UTC | #2
On 2021-09-16 11:38:55 [+0200], Maarten Lankhorst wrote:
> Patches look good.
Thank you for looking.

> For both patches:
> 
> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> 
> I've been looking at running i915 with the -rt patch series, and
> noticed i915_request_submit fails with GEM_BUG_ON(!irqs_disabled());
> presumably same failure exists for i915_request_unsubmit().
> 
> Might be worth removing those checks as well? Seems double with
> lockdep_assert_held on an irq lock anyway.

yes, let me prepare something in a few.

> I've also noticed the local_irq_disable/enable is removed from
> intel_pipe_update_(start/end) in the rt series. It might make sense
> from a -rt point of view, but that code needs to run without
> interruptions, or i915 may show visual glitches or even locks up the
> system.
>
> It should just be a set of registers hammered in, but the code might
> needs to be fixed to take the mmio lock as outer lock, and become a
> strict set of register read/writes only.

Let me see. So Anton Lundin (Cc:) reported glitches due to _this_ patch
on -RT. I have just a Sandybridge around with a i915 and it does not get
near that code here. 

> ~Maarten

Sebastian
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index fc77592d88a96..2ec1dd352960b 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1265,7 +1265,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock(&engine->active.lock);
+	spin_lock_irq(&engine->active.lock);
 
 	/*
 	 * If the queue is higher priority than the last
@@ -1365,7 +1365,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * Even if ELSP[1] is occupied and not worthy
 				 * of timeslices, our queue might be.
 				 */
-				spin_unlock(&engine->active.lock);
+				spin_unlock_irq(&engine->active.lock);
 				return;
 			}
 		}
@@ -1391,7 +1391,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 
 		if (last && !can_merge_rq(last, rq)) {
 			spin_unlock(&ve->base.active.lock);
-			spin_unlock(&engine->active.lock);
+			spin_unlock_irq(&engine->active.lock);
 			return; /* leave this for another sibling */
 		}
 
@@ -1552,7 +1552,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * interrupt for secondary ports).
 	 */
 	execlists->queue_priority_hint = queue_prio(execlists);
-	spin_unlock(&engine->active.lock);
+	spin_unlock_irq(&engine->active.lock);
 
 	/*
 	 * We can skip poking the HW if we ended up with exactly the same set
@@ -1578,13 +1578,6 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	}
 }
 
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
-	local_irq_disable(); /* Suspend interrupts across request submission */
-	execlists_dequeue(engine);
-	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
 static void clear_ports(struct i915_request **ports, int count)
 {
 	memset_p((void **)ports, NULL, count);
@@ -2377,7 +2370,7 @@  static void execlists_submission_tasklet(struct tasklet_struct *t)
 	}
 
 	if (!engine->execlists.pending[0]) {
-		execlists_dequeue_irq(engine);
+		execlists_dequeue(engine);
 		start_timeslice(engine);
 	}