diff mbox

[1/7] drm/i915: Use seqlock in engine stats

Message ID 20180417122736.11603-2-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 17, 2018, 12:27 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We can convert engine stats from a spinlock to seqlock to ensure interrupt
processing is never even a tiny bit delayed by parallel readers.

There is a smidgen bit more cost on the write lock side, and an extremely
unlikely chance that readers will have to retry a few times in face of
heavy interrupt load.Bbut it should be extremely unlikely given how
lightweight read side section is compared to the interrupt processing side,
and also compared to the rest of the code paths which can lead into it.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 19 ++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++++-----
 2 files changed, 16 insertions(+), 14 deletions(-)

Comments

Chris Wilson April 17, 2018, 12:47 p.m. UTC | #1
Quoting Tvrtko Ursulin (2018-04-17 13:27:30)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We can convert engine stats from a spinlock to seqlock to ensure interrupt
> processing is never even a tiny bit delayed by parallel readers.
> 
> There is a smidgen bit more cost on the write lock side, and an extremely
> unlikely chance that readers will have to retry a few times in face of
> heavy interrupt load.Bbut it should be extremely unlikely given how
> lightweight read side section is compared to the interrupt processing side,
> and also compared to the rest of the code paths which can lead into it.

Also mention that readers are informative only, its the writers that are
doing the work/submission.

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 19 ++++++++++---------
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++++-----
>  2 files changed, 16 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index e4992c2e23a4..de61d3d1653d 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -306,7 +306,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>         /* Nothing to do here, execute in order of dependencies */
>         engine->schedule = NULL;
>  
> -       spin_lock_init(&engine->stats.lock);
> +       seqlock_init(&engine->stats.lock);
>  
>         ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
>  
> @@ -2065,7 +2065,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>                 return -ENODEV;
>  
>         tasklet_disable(&execlists->tasklet);
> -       spin_lock_irqsave(&engine->stats.lock, flags);
> +       write_seqlock_irqsave(&engine->stats.lock, flags);
>  
>         if (unlikely(engine->stats.enabled == ~0)) {
>                 err = -EBUSY;
> @@ -2089,7 +2089,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>         }
>  
>  unlock:
> -       spin_unlock_irqrestore(&engine->stats.lock, flags);
> +       write_sequnlock_irqrestore(&engine->stats.lock, flags);
>         tasklet_enable(&execlists->tasklet);
>  
>         return err;
> @@ -2118,12 +2118,13 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
>   */
>  ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
>  {
> +       unsigned int seq;
>         ktime_t total;
> -       unsigned long flags;
>  
> -       spin_lock_irqsave(&engine->stats.lock, flags);
> -       total = __intel_engine_get_busy_time(engine);
> -       spin_unlock_irqrestore(&engine->stats.lock, flags);
> +       do {
> +               seq = read_seqbegin(&engine->stats.lock);
> +               total = __intel_engine_get_busy_time(engine);
> +       } while (read_seqretry(&engine->stats.lock, seq));
>  
>         return total;
>  }
> @@ -2141,13 +2142,13 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
>         if (!intel_engine_supports_stats(engine))
>                 return;
>  
> -       spin_lock_irqsave(&engine->stats.lock, flags);
> +       write_seqlock_irqsave(&engine->stats.lock, flags);
>         WARN_ON_ONCE(engine->stats.enabled == 0);
>         if (--engine->stats.enabled == 0) {
>                 engine->stats.total = __intel_engine_get_busy_time(engine);
>                 engine->stats.active = 0;
>         }
> -       spin_unlock_irqrestore(&engine->stats.lock, flags);
> +       write_sequnlock_irqrestore(&engine->stats.lock, flags);
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d50b31eb43a5..f24ea9826037 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -3,6 +3,7 @@
>  #define _INTEL_RINGBUFFER_H_
>  
>  #include <linux/hashtable.h>
> +#include <linux/seqlock.h>
>  
>  #include "i915_gem_batch_pool.h"
>  #include "i915_gem_timeline.h"
> @@ -610,7 +611,7 @@ struct intel_engine_cs {
>                 /**
>                  * @lock: Lock protecting the below fields.
>                  */
> -               spinlock_t lock;
> +               seqlock_t lock;
>                 /**
>                  * @enabled: Reference count indicating number of listeners.
>                  */
> @@ -1082,7 +1083,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine)
>         if (READ_ONCE(engine->stats.enabled) == 0)
>                 return;
>  
> -       spin_lock_irqsave(&engine->stats.lock, flags);
> +       write_seqlock_irqsave(&engine->stats.lock, flags);
>  
>         if (engine->stats.enabled > 0) {
>                 if (engine->stats.active++ == 0)
> @@ -1090,7 +1091,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine)
>                 GEM_BUG_ON(engine->stats.active == 0);
>         }
>  
> -       spin_unlock_irqrestore(&engine->stats.lock, flags);
> +       write_sequnlock_irqrestore(&engine->stats.lock, flags);
>  }
>  
>  static inline void intel_engine_context_out(struct intel_engine_cs *engine)
> @@ -1100,7 +1101,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
>         if (READ_ONCE(engine->stats.enabled) == 0)
>                 return;
>  
> -       spin_lock_irqsave(&engine->stats.lock, flags);
> +       write_seqlock_irqsave(&engine->stats.lock, flags);
>  
>         if (engine->stats.enabled > 0) {
>                 ktime_t last;
> @@ -1127,7 +1128,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
>                 }
>         }
>  
> -       spin_unlock_irqrestore(&engine->stats.lock, flags);
> +       write_sequnlock_irqrestore(&engine->stats.lock, flags);
>  }

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

I think that's good enough as a stand-alone patch, even though we were
unable to realise the no irq_disable gains.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e4992c2e23a4..de61d3d1653d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -306,7 +306,7 @@  intel_engine_setup(struct drm_i915_private *dev_priv,
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
 
-	spin_lock_init(&engine->stats.lock);
+	seqlock_init(&engine->stats.lock);
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 
@@ -2065,7 +2065,7 @@  int intel_enable_engine_stats(struct intel_engine_cs *engine)
 		return -ENODEV;
 
 	tasklet_disable(&execlists->tasklet);
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
 		err = -EBUSY;
@@ -2089,7 +2089,7 @@  int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	}
 
 unlock:
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 	tasklet_enable(&execlists->tasklet);
 
 	return err;
@@ -2118,12 +2118,13 @@  static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
  */
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
 {
+	unsigned int seq;
 	ktime_t total;
-	unsigned long flags;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
-	total = __intel_engine_get_busy_time(engine);
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	do {
+		seq = read_seqbegin(&engine->stats.lock);
+		total = __intel_engine_get_busy_time(engine);
+	} while (read_seqretry(&engine->stats.lock, seq));
 
 	return total;
 }
@@ -2141,13 +2142,13 @@  void intel_disable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 	WARN_ON_ONCE(engine->stats.enabled == 0);
 	if (--engine->stats.enabled == 0) {
 		engine->stats.total = __intel_engine_get_busy_time(engine);
 		engine->stats.active = 0;
 	}
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d50b31eb43a5..f24ea9826037 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -3,6 +3,7 @@ 
 #define _INTEL_RINGBUFFER_H_
 
 #include <linux/hashtable.h>
+#include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
 #include "i915_gem_timeline.h"
@@ -610,7 +611,7 @@  struct intel_engine_cs {
 		/**
 		 * @lock: Lock protecting the below fields.
 		 */
-		spinlock_t lock;
+		seqlock_t lock;
 		/**
 		 * @enabled: Reference count indicating number of listeners.
 		 */
@@ -1082,7 +1083,7 @@  static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 	if (READ_ONCE(engine->stats.enabled) == 0)
 		return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (engine->stats.enabled > 0) {
 		if (engine->stats.active++ == 0)
@@ -1090,7 +1091,7 @@  static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 		GEM_BUG_ON(engine->stats.active == 0);
 	}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
@@ -1100,7 +1101,7 @@  static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 	if (READ_ONCE(engine->stats.enabled) == 0)
 		return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (engine->stats.enabled > 0) {
 		ktime_t last;
@@ -1127,7 +1128,7 @@  static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 		}
 	}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
 int intel_enable_engine_stats(struct intel_engine_cs *engine);