diff mbox series

[v8,3/3] binder: add transaction latency tracer

Message ID 1599480055-25781-4-git-send-email-Frankie.Chang@mediatek.com (mailing list archive)
State New, archived
Headers show
Series [v8,1/3] binder: move structs from core file to header file | expand

Commit Message

Frankie Chang Sept. 7, 2020, noon UTC
From: "Frankie.Chang" <Frankie.Chang@mediatek.com>

Record start/end timestamp for binder transaction.
When transaction is completed or transaction is free,
it would be checked if transaction latency over threshold
(default 2 sec), if yes, printing related information for tracing.

/* Implement details */
- Add latency tracer module to monitor transaction
  by attaching to new tracepoints introduced
  when transactions are allocated and freed.
  The trace_binder_txn_latency_free would not be enabled
  by default. Monitoring which transaction is too slow to
  cause some of exceptions is important. So we hook the
  tracepoint to call the monitor function.

- Since some of modules would trigger timeout NE
  if their binder transaction don't finish in time,
  such as audio timeout (5 sec), even BT command
  timeout (2 sec), etc.
  Therefore, setting the timeout threshold as default
  2 seconds could be helpful to debug.
  But this timeout threshold is configurable, to let
  all users determine the more suitable threshold.

- The reason why printing the related information to
  kernel information log but not trace buffer is that
  some abnormal transactions may be pending for a long
  time ago, they could not be recorded due to buffer
  limited.

Signed-off-by: Frankie.Chang <Frankie.Chang@mediatek.com>
Acked-by: Todd Kjos <tkjos@google.com>
---
 drivers/android/Kconfig                 |    8 +++
 drivers/android/Makefile                |    1 +
 drivers/android/binder.c                |    6 ++
 drivers/android/binder_internal.h       |   13 ++++
 drivers/android/binder_latency_tracer.c |  112 +++++++++++++++++++++++++++++++
 drivers/android/binder_trace.h          |   26 ++++++-
 6 files changed, 163 insertions(+), 3 deletions(-)
 create mode 100644 drivers/android/binder_latency_tracer.c

Comments

Greg Kroah-Hartman Sept. 7, 2020, 12:25 p.m. UTC | #1
On Mon, Sep 07, 2020 at 08:00:55PM +0800, Frankie Chang wrote:
> From: "Frankie.Chang" <Frankie.Chang@mediatek.com>
> 
> Record start/end timestamp for binder transaction.
> When transaction is completed or transaction is free,
> it would be checked if transaction latency over threshold
> (default 2 sec), if yes, printing related information for tracing.
> 
> /* Implement details */
> - Add latency tracer module to monitor transaction
>   by attaching to new tracepoints introduced
>   when transactions are allocated and freed.
>   The trace_binder_txn_latency_free would not be enabled
>   by default. Monitoring which transaction is too slow to
>   cause some of exceptions is important. So we hook the
>   tracepoint to call the monitor function.
> 
> - Since some of modules would trigger timeout NE
>   if their binder transaction don't finish in time,
>   such as audio timeout (5 sec), even BT command
>   timeout (2 sec), etc.
>   Therefore, setting the timeout threshold as default
>   2 seconds could be helpful to debug.
>   But this timeout threshold is configurable, to let
>   all users determine the more suitable threshold.
> 
> - The reason why printing the related information to
>   kernel information log but not trace buffer is that
>   some abnormal transactions may be pending for a long
>   time ago, they could not be recorded due to buffer
>   limited.
> 
> Signed-off-by: Frankie.Chang <Frankie.Chang@mediatek.com>
> Acked-by: Todd Kjos <tkjos@google.com>
> ---
>  drivers/android/Kconfig                 |    8 +++
>  drivers/android/Makefile                |    1 +
>  drivers/android/binder.c                |    6 ++
>  drivers/android/binder_internal.h       |   13 ++++
>  drivers/android/binder_latency_tracer.c |  112 +++++++++++++++++++++++++++++++
>  drivers/android/binder_trace.h          |   26 ++++++-
>  6 files changed, 163 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/android/binder_latency_tracer.c
> 
> diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> index 53b22e2..8aadaf4 100644
> --- a/drivers/android/Kconfig
> +++ b/drivers/android/Kconfig
> @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
>  	  exhaustively with combinations of various buffer sizes and
>  	  alignments.
>  
> +config BINDER_TRANSACTION_LATENCY_TRACKING
> +	tristate "Android Binder transaction tracking"
> +	help
> +	  Used for track abnormal binder transaction which is over threshold,
> +	  when the transaction is done or be free, this transaction would be
> +	  checked whether it executed overtime.
> +	  If yes, printing out the detailed info.
> +
>  endif # if ANDROID
>  
>  endmenu
> diff --git a/drivers/android/Makefile b/drivers/android/Makefile
> index c9d3d0c9..c2ffdb6 100644
> --- a/drivers/android/Makefile
> +++ b/drivers/android/Makefile
> @@ -4,3 +4,4 @@ ccflags-y += -I$(src)			# needed for trace events
>  obj-$(CONFIG_ANDROID_BINDERFS)		+= binderfs.o
>  obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
>  obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
> +obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)	+= binder_latency_tracer.o
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 0cab900..65ad38c 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -2674,6 +2674,7 @@ static void binder_transaction(struct binder_proc *proc,
>  		return_error_line = __LINE__;
>  		goto err_alloc_t_failed;
>  	}
> +	trace_binder_txn_latency_alloc(t);
>  	INIT_LIST_HEAD(&t->fd_fixups);
>  	binder_stats_created(BINDER_STAT_TRANSACTION);
>  	spin_lock_init(&t->lock);
> @@ -5177,6 +5178,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
>  		   to_proc ? to_proc->pid : 0,
>  		   t->to_thread ? t->to_thread->pid : 0,
>  		   t->code, t->flags, t->priority, t->need_reply);
> +	trace_binder_txn_latency_info(m, t);
>  	spin_unlock(&t->lock);
>  
>  	if (proc != to_proc) {
> @@ -5818,4 +5820,8 @@ static int __init binder_init(void)
>  #define CREATE_TRACE_POINTS
>  #include "binder_trace.h"
>  
> +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_alloc);
> +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_info);
> +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_free);
> +
>  MODULE_LICENSE("GPL v2");
> diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> index 872232f..309d1b10 100644
> --- a/drivers/android/binder_internal.h
> +++ b/drivers/android/binder_internal.h
> @@ -15,6 +15,11 @@
>  #include <uapi/linux/android/binderfs.h>
>  #include "binder_alloc.h"
>  
> +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> +#include <uapi/linux/time_types.h>
> +#include <linux/rtc.h>
> +#endif
> +
>  struct binder_context {
>  	struct binder_node *binder_context_mgr_node;
>  	struct mutex context_mgr_node_lock;
> @@ -524,6 +529,14 @@ struct binder_transaction {
>  	 * during thread teardown
>  	 */
>  	spinlock_t lock;
> +	/**
> +	 * @timestamp and @tv are used to record the time
> +	 * that the binder transaction startup
> +	 */
> +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> +	struct timespec64 timestamp;
> +	struct __kernel_old_timeval tv;

As I stated in the 0/3 email, please use the correct in-kernel structure
for this.  That structure is _ONLY_ for old userspace api compatibility
issues.  Which is not the case here at all.

thanks,

greg k-h
Frankie Chang Sept. 7, 2020, 1:51 p.m. UTC | #2
On Mon, 2020-09-07 at 14:25 +0200, Greg Kroah-Hartman wrote:
> On Mon, Sep 07, 2020 at 08:00:55PM +0800, Frankie Chang wrote:
> > From: "Frankie.Chang" <Frankie.Chang@mediatek.com>
> > 
> > Record start/end timestamp for binder transaction.
> > When transaction is completed or transaction is free,
> > it would be checked if transaction latency over threshold
> > (default 2 sec), if yes, printing related information for tracing.
> > 
> > /* Implement details */
> > - Add latency tracer module to monitor transaction
> >   by attaching to new tracepoints introduced
> >   when transactions are allocated and freed.
> >   The trace_binder_txn_latency_free would not be enabled
> >   by default. Monitoring which transaction is too slow to
> >   cause some of exceptions is important. So we hook the
> >   tracepoint to call the monitor function.
> > 
> > - Since some of modules would trigger timeout NE
> >   if their binder transaction don't finish in time,
> >   such as audio timeout (5 sec), even BT command
> >   timeout (2 sec), etc.
> >   Therefore, setting the timeout threshold as default
> >   2 seconds could be helpful to debug.
> >   But this timeout threshold is configurable, to let
> >   all users determine the more suitable threshold.
> > 
> > - The reason why printing the related information to
> >   kernel information log but not trace buffer is that
> >   some abnormal transactions may be pending for a long
> >   time ago, they could not be recorded due to buffer
> >   limited.
> > 
> > Signed-off-by: Frankie.Chang <Frankie.Chang@mediatek.com>
> > Acked-by: Todd Kjos <tkjos@google.com>
> > ---
> >  drivers/android/Kconfig                 |    8 +++
> >  drivers/android/Makefile                |    1 +
> >  drivers/android/binder.c                |    6 ++
> >  drivers/android/binder_internal.h       |   13 ++++
> >  drivers/android/binder_latency_tracer.c |  112 +++++++++++++++++++++++++++++++
> >  drivers/android/binder_trace.h          |   26 ++++++-
> >  6 files changed, 163 insertions(+), 3 deletions(-)
> >  create mode 100644 drivers/android/binder_latency_tracer.c
> > 
> > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > index 53b22e2..8aadaf4 100644
> > --- a/drivers/android/Kconfig
> > +++ b/drivers/android/Kconfig
> > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> >  	  exhaustively with combinations of various buffer sizes and
> >  	  alignments.
> >  
> > +config BINDER_TRANSACTION_LATENCY_TRACKING
> > +	tristate "Android Binder transaction tracking"
> > +	help
> > +	  Used for track abnormal binder transaction which is over threshold,
> > +	  when the transaction is done or be free, this transaction would be
> > +	  checked whether it executed overtime.
> > +	  If yes, printing out the detailed info.
> > +
> >  endif # if ANDROID
> >  
> >  endmenu
> > diff --git a/drivers/android/Makefile b/drivers/android/Makefile
> > index c9d3d0c9..c2ffdb6 100644
> > --- a/drivers/android/Makefile
> > +++ b/drivers/android/Makefile
> > @@ -4,3 +4,4 @@ ccflags-y += -I$(src)			# needed for trace events
> >  obj-$(CONFIG_ANDROID_BINDERFS)		+= binderfs.o
> >  obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
> >  obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
> > +obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)	+= binder_latency_tracer.o
> > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > index 0cab900..65ad38c 100644
> > --- a/drivers/android/binder.c
> > +++ b/drivers/android/binder.c
> > @@ -2674,6 +2674,7 @@ static void binder_transaction(struct binder_proc *proc,
> >  		return_error_line = __LINE__;
> >  		goto err_alloc_t_failed;
> >  	}
> > +	trace_binder_txn_latency_alloc(t);
> >  	INIT_LIST_HEAD(&t->fd_fixups);
> >  	binder_stats_created(BINDER_STAT_TRANSACTION);
> >  	spin_lock_init(&t->lock);
> > @@ -5177,6 +5178,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
> >  		   to_proc ? to_proc->pid : 0,
> >  		   t->to_thread ? t->to_thread->pid : 0,
> >  		   t->code, t->flags, t->priority, t->need_reply);
> > +	trace_binder_txn_latency_info(m, t);
> >  	spin_unlock(&t->lock);
> >  
> >  	if (proc != to_proc) {
> > @@ -5818,4 +5820,8 @@ static int __init binder_init(void)
> >  #define CREATE_TRACE_POINTS
> >  #include "binder_trace.h"
> >  
> > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_alloc);
> > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_info);
> > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_free);
> > +
> >  MODULE_LICENSE("GPL v2");
> > diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> > index 872232f..309d1b10 100644
> > --- a/drivers/android/binder_internal.h
> > +++ b/drivers/android/binder_internal.h
> > @@ -15,6 +15,11 @@
> >  #include <uapi/linux/android/binderfs.h>
> >  #include "binder_alloc.h"
> >  
> > +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> > +#include <uapi/linux/time_types.h>
> > +#include <linux/rtc.h>
> > +#endif
> > +
> >  struct binder_context {
> >  	struct binder_node *binder_context_mgr_node;
> >  	struct mutex context_mgr_node_lock;
> > @@ -524,6 +529,14 @@ struct binder_transaction {
> >  	 * during thread teardown
> >  	 */
> >  	spinlock_t lock;
> > +	/**
> > +	 * @timestamp and @tv are used to record the time
> > +	 * that the binder transaction startup
> > +	 */
> > +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> > +	struct timespec64 timestamp;
> > +	struct __kernel_old_timeval tv;
> 
> As I stated in the 0/3 email, please use the correct in-kernel structure
> for this.  That structure is _ONLY_ for old userspace api compatibility
> issues.  Which is not the case here at all.
> 
> thanks,
> greg k-h

Since it's not proper to use timeval & __kernel_old_timeval.
The remaining structure with tv_usec that I found is
__kernel_timex_timeval, but I'm not sure if this is proper one or not.
All I can see is in
https://lwn.net/ml/linux-kernel/20180705213604.18883-6-deepa.kernel@gmail.com/

I'm afraid of using wrong structure again.
Could you give me some advices about this?

Or is there another suitable way to replace timeval

Many thanks,
Frankie
Greg Kroah-Hartman Sept. 7, 2020, 2:09 p.m. UTC | #3
On Mon, Sep 07, 2020 at 09:51:15PM +0800, Frankie Chang wrote:
> On Mon, 2020-09-07 at 14:25 +0200, Greg Kroah-Hartman wrote:
> > On Mon, Sep 07, 2020 at 08:00:55PM +0800, Frankie Chang wrote:
> > > From: "Frankie.Chang" <Frankie.Chang@mediatek.com>
> > > 
> > > Record start/end timestamp for binder transaction.
> > > When transaction is completed or transaction is free,
> > > it would be checked if transaction latency over threshold
> > > (default 2 sec), if yes, printing related information for tracing.
> > > 
> > > /* Implement details */
> > > - Add latency tracer module to monitor transaction
> > >   by attaching to new tracepoints introduced
> > >   when transactions are allocated and freed.
> > >   The trace_binder_txn_latency_free would not be enabled
> > >   by default. Monitoring which transaction is too slow to
> > >   cause some of exceptions is important. So we hook the
> > >   tracepoint to call the monitor function.
> > > 
> > > - Since some of modules would trigger timeout NE
> > >   if their binder transaction don't finish in time,
> > >   such as audio timeout (5 sec), even BT command
> > >   timeout (2 sec), etc.
> > >   Therefore, setting the timeout threshold as default
> > >   2 seconds could be helpful to debug.
> > >   But this timeout threshold is configurable, to let
> > >   all users determine the more suitable threshold.
> > > 
> > > - The reason why printing the related information to
> > >   kernel information log but not trace buffer is that
> > >   some abnormal transactions may be pending for a long
> > >   time ago, they could not be recorded due to buffer
> > >   limited.
> > > 
> > > Signed-off-by: Frankie.Chang <Frankie.Chang@mediatek.com>
> > > Acked-by: Todd Kjos <tkjos@google.com>
> > > ---
> > >  drivers/android/Kconfig                 |    8 +++
> > >  drivers/android/Makefile                |    1 +
> > >  drivers/android/binder.c                |    6 ++
> > >  drivers/android/binder_internal.h       |   13 ++++
> > >  drivers/android/binder_latency_tracer.c |  112 +++++++++++++++++++++++++++++++
> > >  drivers/android/binder_trace.h          |   26 ++++++-
> > >  6 files changed, 163 insertions(+), 3 deletions(-)
> > >  create mode 100644 drivers/android/binder_latency_tracer.c
> > > 
> > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > > index 53b22e2..8aadaf4 100644
> > > --- a/drivers/android/Kconfig
> > > +++ b/drivers/android/Kconfig
> > > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> > >  	  exhaustively with combinations of various buffer sizes and
> > >  	  alignments.
> > >  
> > > +config BINDER_TRANSACTION_LATENCY_TRACKING
> > > +	tristate "Android Binder transaction tracking"
> > > +	help
> > > +	  Used for track abnormal binder transaction which is over threshold,
> > > +	  when the transaction is done or be free, this transaction would be
> > > +	  checked whether it executed overtime.
> > > +	  If yes, printing out the detailed info.
> > > +
> > >  endif # if ANDROID
> > >  
> > >  endmenu
> > > diff --git a/drivers/android/Makefile b/drivers/android/Makefile
> > > index c9d3d0c9..c2ffdb6 100644
> > > --- a/drivers/android/Makefile
> > > +++ b/drivers/android/Makefile
> > > @@ -4,3 +4,4 @@ ccflags-y += -I$(src)			# needed for trace events
> > >  obj-$(CONFIG_ANDROID_BINDERFS)		+= binderfs.o
> > >  obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
> > >  obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
> > > +obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)	+= binder_latency_tracer.o
> > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > > index 0cab900..65ad38c 100644
> > > --- a/drivers/android/binder.c
> > > +++ b/drivers/android/binder.c
> > > @@ -2674,6 +2674,7 @@ static void binder_transaction(struct binder_proc *proc,
> > >  		return_error_line = __LINE__;
> > >  		goto err_alloc_t_failed;
> > >  	}
> > > +	trace_binder_txn_latency_alloc(t);
> > >  	INIT_LIST_HEAD(&t->fd_fixups);
> > >  	binder_stats_created(BINDER_STAT_TRANSACTION);
> > >  	spin_lock_init(&t->lock);
> > > @@ -5177,6 +5178,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
> > >  		   to_proc ? to_proc->pid : 0,
> > >  		   t->to_thread ? t->to_thread->pid : 0,
> > >  		   t->code, t->flags, t->priority, t->need_reply);
> > > +	trace_binder_txn_latency_info(m, t);
> > >  	spin_unlock(&t->lock);
> > >  
> > >  	if (proc != to_proc) {
> > > @@ -5818,4 +5820,8 @@ static int __init binder_init(void)
> > >  #define CREATE_TRACE_POINTS
> > >  #include "binder_trace.h"
> > >  
> > > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_alloc);
> > > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_info);
> > > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_free);
> > > +
> > >  MODULE_LICENSE("GPL v2");
> > > diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> > > index 872232f..309d1b10 100644
> > > --- a/drivers/android/binder_internal.h
> > > +++ b/drivers/android/binder_internal.h
> > > @@ -15,6 +15,11 @@
> > >  #include <uapi/linux/android/binderfs.h>
> > >  #include "binder_alloc.h"
> > >  
> > > +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> > > +#include <uapi/linux/time_types.h>
> > > +#include <linux/rtc.h>
> > > +#endif
> > > +
> > >  struct binder_context {
> > >  	struct binder_node *binder_context_mgr_node;
> > >  	struct mutex context_mgr_node_lock;
> > > @@ -524,6 +529,14 @@ struct binder_transaction {
> > >  	 * during thread teardown
> > >  	 */
> > >  	spinlock_t lock;
> > > +	/**
> > > +	 * @timestamp and @tv are used to record the time
> > > +	 * that the binder transaction startup
> > > +	 */
> > > +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> > > +	struct timespec64 timestamp;
> > > +	struct __kernel_old_timeval tv;
> > 
> > As I stated in the 0/3 email, please use the correct in-kernel structure
> > for this.  That structure is _ONLY_ for old userspace api compatibility
> > issues.  Which is not the case here at all.
> > 
> > thanks,
> > greg k-h
> 
> Since it's not proper to use timeval & __kernel_old_timeval.
> The remaining structure with tv_usec that I found is
> __kernel_timex_timeval, but I'm not sure if this is proper one or not.
> All I can see is in
> https://lwn.net/ml/linux-kernel/20180705213604.18883-6-deepa.kernel@gmail.com/
> 
> I'm afraid of using wrong structure again.
> Could you give me some advices about this?
> 
> Or is there another suitable way to replace timeval

What exactly are you trying to measure such that you feel you need a
timeval structure?

thanks,

greg k-h
Frankie Chang Sept. 8, 2020, 5:38 a.m. UTC | #4
On Mon, 2020-09-07 at 16:09 +0200, Greg Kroah-Hartman wrote:
> On Mon, Sep 07, 2020 at 09:51:15PM +0800, Frankie Chang wrote:
> > On Mon, 2020-09-07 at 14:25 +0200, Greg Kroah-Hartman wrote:
> > > On Mon, Sep 07, 2020 at 08:00:55PM +0800, Frankie Chang wrote:
> > > > From: "Frankie.Chang" <Frankie.Chang@mediatek.com>
> > > > 
> > > > Record start/end timestamp for binder transaction.
> > > > When transaction is completed or transaction is free,
> > > > it would be checked if transaction latency over threshold
> > > > (default 2 sec), if yes, printing related information for tracing.
> > > > 
> > > > /* Implement details */
> > > > - Add latency tracer module to monitor transaction
> > > >   by attaching to new tracepoints introduced
> > > >   when transactions are allocated and freed.
> > > >   The trace_binder_txn_latency_free would not be enabled
> > > >   by default. Monitoring which transaction is too slow to
> > > >   cause some of exceptions is important. So we hook the
> > > >   tracepoint to call the monitor function.
> > > > 
> > > > - Since some of modules would trigger timeout NE
> > > >   if their binder transaction don't finish in time,
> > > >   such as audio timeout (5 sec), even BT command
> > > >   timeout (2 sec), etc.
> > > >   Therefore, setting the timeout threshold as default
> > > >   2 seconds could be helpful to debug.
> > > >   But this timeout threshold is configurable, to let
> > > >   all users determine the more suitable threshold.
> > > > 
> > > > - The reason why printing the related information to
> > > >   kernel information log but not trace buffer is that
> > > >   some abnormal transactions may be pending for a long
> > > >   time ago, they could not be recorded due to buffer
> > > >   limited.
> > > > 
> > > > Signed-off-by: Frankie.Chang <Frankie.Chang@mediatek.com>
> > > > Acked-by: Todd Kjos <tkjos@google.com>
> > > > ---
> > > >  drivers/android/Kconfig                 |    8 +++
> > > >  drivers/android/Makefile                |    1 +
> > > >  drivers/android/binder.c                |    6 ++
> > > >  drivers/android/binder_internal.h       |   13 ++++
> > > >  drivers/android/binder_latency_tracer.c |  112 +++++++++++++++++++++++++++++++
> > > >  drivers/android/binder_trace.h          |   26 ++++++-
> > > >  6 files changed, 163 insertions(+), 3 deletions(-)
> > > >  create mode 100644 drivers/android/binder_latency_tracer.c
> > > > 
> > > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > > > index 53b22e2..8aadaf4 100644
> > > > --- a/drivers/android/Kconfig
> > > > +++ b/drivers/android/Kconfig
> > > > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> > > >  	  exhaustively with combinations of various buffer sizes and
> > > >  	  alignments.
> > > >  
> > > > +config BINDER_TRANSACTION_LATENCY_TRACKING
> > > > +	tristate "Android Binder transaction tracking"
> > > > +	help
> > > > +	  Used for track abnormal binder transaction which is over threshold,
> > > > +	  when the transaction is done or be free, this transaction would be
> > > > +	  checked whether it executed overtime.
> > > > +	  If yes, printing out the detailed info.
> > > > +
> > > >  endif # if ANDROID
> > > >  
> > > >  endmenu
> > > > diff --git a/drivers/android/Makefile b/drivers/android/Makefile
> > > > index c9d3d0c9..c2ffdb6 100644
> > > > --- a/drivers/android/Makefile
> > > > +++ b/drivers/android/Makefile
> > > > @@ -4,3 +4,4 @@ ccflags-y += -I$(src)			# needed for trace events
> > > >  obj-$(CONFIG_ANDROID_BINDERFS)		+= binderfs.o
> > > >  obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
> > > >  obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
> > > > +obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)	+= binder_latency_tracer.o
> > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > > > index 0cab900..65ad38c 100644
> > > > --- a/drivers/android/binder.c
> > > > +++ b/drivers/android/binder.c
> > > > @@ -2674,6 +2674,7 @@ static void binder_transaction(struct binder_proc *proc,
> > > >  		return_error_line = __LINE__;
> > > >  		goto err_alloc_t_failed;
> > > >  	}
> > > > +	trace_binder_txn_latency_alloc(t);
> > > >  	INIT_LIST_HEAD(&t->fd_fixups);
> > > >  	binder_stats_created(BINDER_STAT_TRANSACTION);
> > > >  	spin_lock_init(&t->lock);
> > > > @@ -5177,6 +5178,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
> > > >  		   to_proc ? to_proc->pid : 0,
> > > >  		   t->to_thread ? t->to_thread->pid : 0,
> > > >  		   t->code, t->flags, t->priority, t->need_reply);
> > > > +	trace_binder_txn_latency_info(m, t);
> > > >  	spin_unlock(&t->lock);
> > > >  
> > > >  	if (proc != to_proc) {
> > > > @@ -5818,4 +5820,8 @@ static int __init binder_init(void)
> > > >  #define CREATE_TRACE_POINTS
> > > >  #include "binder_trace.h"
> > > >  
> > > > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_alloc);
> > > > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_info);
> > > > +EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_free);
> > > > +
> > > >  MODULE_LICENSE("GPL v2");
> > > > diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> > > > index 872232f..309d1b10 100644
> > > > --- a/drivers/android/binder_internal.h
> > > > +++ b/drivers/android/binder_internal.h
> > > > @@ -15,6 +15,11 @@
> > > >  #include <uapi/linux/android/binderfs.h>
> > > >  #include "binder_alloc.h"
> > > >  
> > > > +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> > > > +#include <uapi/linux/time_types.h>
> > > > +#include <linux/rtc.h>
> > > > +#endif
> > > > +
> > > >  struct binder_context {
> > > >  	struct binder_node *binder_context_mgr_node;
> > > >  	struct mutex context_mgr_node_lock;
> > > > @@ -524,6 +529,14 @@ struct binder_transaction {
> > > >  	 * during thread teardown
> > > >  	 */
> > > >  	spinlock_t lock;
> > > > +	/**
> > > > +	 * @timestamp and @tv are used to record the time
> > > > +	 * that the binder transaction startup
> > > > +	 */
> > > > +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> > > > +	struct timespec64 timestamp;
> > > > +	struct __kernel_old_timeval tv;
> > > 
> > > As I stated in the 0/3 email, please use the correct in-kernel structure
> > > for this.  That structure is _ONLY_ for old userspace api compatibility
> > > issues.  Which is not the case here at all.
> > > 
> > > thanks,
> > > greg k-h
> > 
> > Since it's not proper to use timeval & __kernel_old_timeval.
> > The remaining structure with tv_usec that I found is
> > __kernel_timex_timeval, but I'm not sure if this is proper one or not.
> > All I can see is in
> > https://lwn.net/ml/linux-kernel/20180705213604.18883-6-deepa.kernel@gmail.com/
> > 
> > I'm afraid of using wrong structure again.
> > Could you give me some advices about this?
> > 
> > Or is there another suitable way to replace timeval
> 
> What exactly are you trying to measure such that you feel you need a
> timeval structure?
> 
Actually I'd like to record timestamp with standard time structure, so I
used 'struct timeval' in the past patch. However it's deprecated, so I
am looking for the alternatives.

Additionally, I have already looked some patches that replacing timeval
with timespec64. I'll do the same way in next version patch.

Many thanks,
Frankie
Frankie Chang Sept. 8, 2020, 2:06 p.m. UTC | #5
Change from v9:
  - rename timestamp to ts in binder_internal.h for conciseness.
  - change 'struct timeval' to 'struct timespec64' in binder_internal.h.

Change from v8:
  - change rtc_time_to_tm to rtc_time64_to_tm.
  - change timeval to __kernel_old_timeval due to 
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c766d1472c70d25ad475cf56042af1652e792b23
  - export tracepoint symbol for binder_txn_latency_* which binder_transaction_latency_tracer to be ko needed.

Change from v7:
  - Use the passed-in values instead of accessing via t->from/to_proc/to_thread
    for trace_binder_txn_latency_free, when trace_binder_txn_latency_free_enable() return true.
  - make a helper function to do the above.

Change from v6:
  - change CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING type from bool to tristate
  - add comments to @timestamp and @tv under struct binder_transaction
  - make binder_txn_latency threshold configurable
  - enhance lock protection

Change from v5:
  - change config name to the proper one, CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING.
  - change tracepoint name to more descriptive one, trace_binder_txn_latency_(alloc|info|free)
  - enhance some lock protection.

Change from v4:
  - split up into patch series.

Change from v3:
  - use tracepoints for binder_update_info and print_binder_transaction_ext,
    instead of custom registration functions.

Change from v2:
  - create transaction latency module to monitor slow transaction.

Change from v1:
  - first patchset.


Frankie.Chang (3):
  binder: move structs from core file to header file
  binder: add trace at free transaction.
  binder: add transaction latency tracer

 drivers/android/Kconfig                 |   8 +
 drivers/android/Makefile                |   1 +
 drivers/android/binder.c                | 430 ++----------------------
 drivers/android/binder_internal.h       | 418 +++++++++++++++++++++++
 drivers/android/binder_latency_tracer.c | 108 ++++++
 drivers/android/binder_trace.h          |  49 +++
 6 files changed, 608 insertions(+), 406 deletions(-)
 create mode 100644 drivers/android/binder_latency_tracer.c
Greg Kroah-Hartman Sept. 16, 2020, 3:29 p.m. UTC | #6
On Tue, Sep 08, 2020 at 10:06:45PM +0800, Frankie Chang wrote:
> Change from v9:
>   - rename timestamp to ts in binder_internal.h for conciseness.
>   - change 'struct timeval' to 'struct timespec64' in binder_internal.h.

These just blew up in the 0-day testing, again, so I'm dropping them
from my tree.

Can you please fix them up and resend when you have them properly
building?

thanks,

greg k-h
diff mbox series

Patch

diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 53b22e2..8aadaf4 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -54,6 +54,14 @@  config ANDROID_BINDER_IPC_SELFTEST
 	  exhaustively with combinations of various buffer sizes and
 	  alignments.
 
+config BINDER_TRANSACTION_LATENCY_TRACKING
+	tristate "Android Binder transaction tracking"
+	help
+	  Used for track abnormal binder transaction which is over threshold,
+	  when the transaction is done or be free, this transaction would be
+	  checked whether it executed overtime.
+	  If yes, printing out the detailed info.
+
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/android/Makefile b/drivers/android/Makefile
index c9d3d0c9..c2ffdb6 100644
--- a/drivers/android/Makefile
+++ b/drivers/android/Makefile
@@ -4,3 +4,4 @@  ccflags-y += -I$(src)			# needed for trace events
 obj-$(CONFIG_ANDROID_BINDERFS)		+= binderfs.o
 obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
 obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
+obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)	+= binder_latency_tracer.o
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 0cab900..65ad38c 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2674,6 +2674,7 @@  static void binder_transaction(struct binder_proc *proc,
 		return_error_line = __LINE__;
 		goto err_alloc_t_failed;
 	}
+	trace_binder_txn_latency_alloc(t);
 	INIT_LIST_HEAD(&t->fd_fixups);
 	binder_stats_created(BINDER_STAT_TRANSACTION);
 	spin_lock_init(&t->lock);
@@ -5177,6 +5178,7 @@  static void print_binder_transaction_ilocked(struct seq_file *m,
 		   to_proc ? to_proc->pid : 0,
 		   t->to_thread ? t->to_thread->pid : 0,
 		   t->code, t->flags, t->priority, t->need_reply);
+	trace_binder_txn_latency_info(m, t);
 	spin_unlock(&t->lock);
 
 	if (proc != to_proc) {
@@ -5818,4 +5820,8 @@  static int __init binder_init(void)
 #define CREATE_TRACE_POINTS
 #include "binder_trace.h"
 
+EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_alloc);
+EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_info);
+EXPORT_TRACEPOINT_SYMBOL(binder_txn_latency_free);
+
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 872232f..309d1b10 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -15,6 +15,11 @@ 
 #include <uapi/linux/android/binderfs.h>
 #include "binder_alloc.h"
 
+#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
+#include <uapi/linux/time_types.h>
+#include <linux/rtc.h>
+#endif
+
 struct binder_context {
 	struct binder_node *binder_context_mgr_node;
 	struct mutex context_mgr_node_lock;
@@ -524,6 +529,14 @@  struct binder_transaction {
 	 * during thread teardown
 	 */
 	spinlock_t lock;
+	/**
+	 * @timestamp and @tv are used to record the time
+	 * that the binder transaction startup
+	 */
+#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
+	struct timespec64 timestamp;
+	struct __kernel_old_timeval tv;
+#endif
 };
 
 /**
diff --git a/drivers/android/binder_latency_tracer.c b/drivers/android/binder_latency_tracer.c
new file mode 100644
index 0000000..b29b26f
--- /dev/null
+++ b/drivers/android/binder_latency_tracer.c
@@ -0,0 +1,112 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 MediaTek Inc.
+ */
+
+#include <linux/module.h>
+#include <uapi/linux/android/binder.h>
+#include "binder_internal.h"
+#include "binder_trace.h"
+
+/**
+ * The reason setting the binder_txn_latency_threshold to 2 sec
+ * is that most of timeout abort is greater or equal to 2 sec.
+ * Making it configurable to let all users determine which
+ * threshold is more suitable.
+ */
+static uint32_t binder_txn_latency_threshold = 2;
+module_param_named(threshold, binder_txn_latency_threshold,
+			uint, 0644);
+
+/*
+ * probe_binder_txn_latency_free - Output info of a delay transaction
+ * @t:          pointer to the over-time transaction
+ */
+void probe_binder_txn_latency_free(void *ignore, struct binder_transaction *t,
+					int from_proc, int from_thread,
+					int to_proc, int to_thread)
+{
+	struct rtc_time tm;
+	struct timespec64 *startime;
+	struct timespec64 cur, sub_t;
+
+	ktime_get_ts64(&cur);
+	startime = &t->timestamp;
+	sub_t = timespec64_sub(cur, *startime);
+
+	/* if transaction time is over than binder_txn_latency_threshold (sec),
+	 * show timeout warning log.
+	 */
+	if (sub_t.tv_sec < binder_txn_latency_threshold)
+		return;
+
+	rtc_time64_to_tm(t->tv.tv_sec, &tm);
+
+	pr_info_ratelimited("%d: from %d:%d to %d:%d",
+			t->debug_id, from_proc, from_thread,
+			to_proc, to_thread);
+
+	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
+			(unsigned int)sub_t.tv_sec,
+			(sub_t.tv_nsec / NSEC_PER_MSEC),
+			t->code,
+			(unsigned long)startime->tv_sec,
+			(startime->tv_nsec / NSEC_PER_MSEC),
+			(tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
+			tm.tm_hour, tm.tm_min, tm.tm_sec,
+			(unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
+}
+
+static void probe_binder_txn_latency_alloc(void *ignore,
+					struct binder_transaction *t)
+{
+	struct timespec64 now;
+
+	ktime_get_ts64(&t->timestamp);
+	ktime_get_real_ts64(&now);
+	t->tv.tv_sec = now.tv_sec;
+	t->tv.tv_sec -= (sys_tz.tz_minuteswest * 60);
+	t->tv.tv_usec = now.tv_nsec/1000;
+}
+
+static void probe_binder_txn_latency_info(void *ignore, struct seq_file *m,
+					struct binder_transaction *t)
+{
+	struct rtc_time tm;
+
+	rtc_time64_to_tm(t->tv.tv_sec, &tm);
+	seq_printf(m,
+		   " start %lu.%06lu android %d-%02d-%02d %02d:%02d:%02d.%03lu",
+		   (unsigned long)t->timestamp.tv_sec,
+		   (t->timestamp.tv_nsec / NSEC_PER_USEC),
+		   (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
+		   tm.tm_hour, tm.tm_min, tm.tm_sec,
+		   (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
+}
+
+static int __init init_binder_latency_tracer(void)
+{
+	register_trace_binder_txn_latency_free(
+			probe_binder_txn_latency_free, NULL);
+	register_trace_binder_txn_latency_alloc(
+			probe_binder_txn_latency_alloc, NULL);
+	register_trace_binder_txn_latency_info(
+			probe_binder_txn_latency_info, NULL);
+
+	return 0;
+}
+
+static void exit_binder_latency_tracer(void)
+{
+	unregister_trace_binder_txn_latency_free(
+			probe_binder_txn_latency_free, NULL);
+	unregister_trace_binder_txn_latency_alloc(
+			probe_binder_txn_latency_alloc, NULL);
+	unregister_trace_binder_txn_latency_info(
+			probe_binder_txn_latency_info, NULL);
+}
+
+module_init(init_binder_latency_tracer);
+module_exit(exit_binder_latency_tracer);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
index 8eeccdc..f1bbff4 100644
--- a/drivers/android/binder_trace.h
+++ b/drivers/android/binder_trace.h
@@ -95,6 +95,17 @@ 
 		  __entry->thread_todo)
 );
 
+DECLARE_TRACE(binder_txn_latency_alloc,
+	TP_PROTO(struct binder_transaction *t),
+	TP_ARGS(t)
+);
+
+DECLARE_TRACE(binder_txn_latency_info,
+	TP_PROTO(struct seq_file *m,
+		 struct binder_transaction *t),
+	TP_ARGS(m, t)
+);
+
 TRACE_EVENT(binder_txn_latency_free,
 	TP_PROTO(struct binder_transaction *t,
 		 int from_proc, int from_thread,
@@ -108,6 +119,8 @@ 
 		__field(int, to_thread)
 		__field(unsigned int, code)
 		__field(unsigned int, flags)
+		__field(unsigned long, start_sec)
+		__field(unsigned long, start_nsec)
 	),
 	TP_fast_assign(
 		__entry->debug_id = t->debug_id;
@@ -117,11 +130,18 @@ 
 		__entry->to_thread = to_thread;
 		__entry->code = t->code;
 		__entry->flags = t->flags;
-	),
-	TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x",
+#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
+		__entry->start_sec = t->timestamp.tv_sec;
+		__entry->start_nsec = t->timestamp.tv_nsec / NSEC_PER_MSEC;
+#else
+		__entry->start_sec = 0;
+		__entry->start_nsec = 0;
+#endif
+	),
+	TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x start %lu.%03ld",
 		  __entry->debug_id, __entry->from_proc, __entry->from_thread,
 		  __entry->to_proc, __entry->to_thread, __entry->code,
-		  __entry->flags)
+		  __entry->flags, __entry->start_sec, __entry->start_nsec)
 );
 
 TRACE_EVENT(binder_transaction,