diff mbox

[1/2] kernel/latencytop: Add non-scheduler interface for latency reporting

Message ID 1448983768-22324-1-git-send-email-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Vetter Dec. 1, 2015, 3:29 p.m. UTC
Some sources of significant amounts of latency aren't simple sleeps
but instead busy-loops or a series of hundreds of small sleeps simply
because the hardware can't do better. Unfortunately latencytop doesn't
register these and so they slip under the radar. Hence expose a
simplified interface to report additional latencies and export the
underlying function so that modules can use this.

The example I have in mind are edid reads. The drm subsystem exposes
both interfaces to do full probes and to just get at the cached state
from the last probe and often userspace developers don't know about
the difference and incur unecessary big latencies. And usually the i2c
transfer is done with busy-looping or if there is a hw engine it might
only be able to transfer a few bytes per sleep/irq cycle. And edid
reads take at least 12ms and with crappy hw can easily be a few
hundred ms.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 include/linux/latencytop.h | 15 +++++++++++++++
 kernel/latencytop.c        |  2 ++
 2 files changed, 17 insertions(+)

Comments

Chris Wilson Dec. 1, 2015, 3:46 p.m. UTC | #1
On Tue, Dec 01, 2015 at 04:29:27PM +0100, Daniel Vetter wrote:
> Some sources of significant amounts of latency aren't simple sleeps
> but instead busy-loops or a series of hundreds of small sleeps simply
> because the hardware can't do better. Unfortunately latencytop doesn't
> register these and so they slip under the radar. Hence expose a
> simplified interface to report additional latencies and export the
> underlying function so that modules can use this.
> 
> The example I have in mind are edid reads. The drm subsystem exposes
> both interfaces to do full probes and to just get at the cached state
> from the last probe and often userspace developers don't know about
> the difference and incur unecessary big latencies. And usually the i2c
> transfer is done with busy-looping or if there is a hw engine it might
> only be able to transfer a few bytes per sleep/irq cycle. And edid
> reads take at least 12ms and with crappy hw can easily be a few
> hundred ms.
> 
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Arjan van de Ven <arjan@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  include/linux/latencytop.h | 15 +++++++++++++++
>  kernel/latencytop.c        |  2 ++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
> index e23121f9d82a..46b69bc35f02 100644
> --- a/include/linux/latencytop.h
> +++ b/include/linux/latencytop.h
> @@ -10,6 +10,9 @@
>  #define _INCLUDE_GUARD_LATENCYTOP_H_
>  
>  #include <linux/compiler.h>
> +
> +#include <asm/current.h>
> +
>  struct task_struct;
>  
>  #ifdef CONFIG_LATENCYTOP
> @@ -35,6 +38,13 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
>  		__account_scheduler_latency(task, usecs, inter);
>  }
>  
> +static inline void
> +account_latency(int usecs)
> +{
> +	if (unlikely(latencytop_enabled))
> +		__account_scheduler_latency(current, usecs, 0);

Just

	account_scheduler_latency(current, usecs, 0);
> +}

And then that can be used for both ifdef paths, i.e. move account_latency() to after the #endif.
-Chris
diff mbox

Patch

diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index e23121f9d82a..46b69bc35f02 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -10,6 +10,9 @@ 
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 
 #include <linux/compiler.h>
+
+#include <asm/current.h>
+
 struct task_struct;
 
 #ifdef CONFIG_LATENCYTOP
@@ -35,6 +38,13 @@  account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 		__account_scheduler_latency(task, usecs, inter);
 }
 
+static inline void
+account_latency(int usecs)
+{
+	if (unlikely(latencytop_enabled))
+		__account_scheduler_latency(current, usecs, 0);
+}
+
 void clear_all_latency_tracing(struct task_struct *p);
 
 #else
@@ -44,6 +54,11 @@  account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 {
 }
 
+static inline void
+account_latency(int usecs)
+{
+}
+
 static inline void clear_all_latency_tracing(struct task_struct *p)
 {
 }
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index a02812743a7e..b066a19fc52a 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -64,6 +64,7 @@  static DEFINE_RAW_SPINLOCK(latency_lock);
 static struct latency_record latency_record[MAXLR];
 
 int latencytop_enabled;
+EXPORT_SYMBOL_GPL(latencytop_enabled);
 
 void clear_all_latency_tracing(struct task_struct *p)
 {
@@ -234,6 +235,7 @@  __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
 out_unlock:
 	raw_spin_unlock_irqrestore(&latency_lock, flags);
 }
+EXPORT_SYMBOL_GPL(__account_scheduler_latency);
 
 static int lstats_show(struct seq_file *m, void *v)
 {