[v1,1/1] binder: transaction latency tracking for user build
diff mbox series

Message ID 1580885572-14272-1-git-send-email-Frankie.Chang@mediatek.com
State New
Headers show
Series
  • [v1,1/1] binder: transaction latency tracking for user build
Related show

Commit Message

Frankie Chang Feb. 5, 2020, 6:52 a.m. UTC
Record start/end timestamp to binder transaction.
When transaction is completed or transaction is free,
it would be checked if transaction latency over threshold (2 sec),
if yes, printing related information for tracing.

Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
---
 drivers/android/Kconfig           |    8 +++
 drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
 drivers/android/binder_internal.h |    4 ++
 3 files changed, 119 insertions(+)

Comments

Greg KH Feb. 5, 2020, 9:36 a.m. UTC | #1
On Wed, Feb 05, 2020 at 02:52:52PM +0800, Frankie Chang wrote:
> Record start/end timestamp to binder transaction.
> When transaction is completed or transaction is free,
> it would be checked if transaction latency over threshold (2 sec),
> if yes, printing related information for tracing.
> 
> Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
> ---
>  drivers/android/Kconfig           |    8 +++
>  drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
>  drivers/android/binder_internal.h |    4 ++
>  3 files changed, 119 insertions(+)
> 
> diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> index 6fdf2ab..7ba80eb 100644
> --- a/drivers/android/Kconfig
> +++ b/drivers/android/Kconfig
> @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
>  	  exhaustively with combinations of various buffer sizes and
>  	  alignments.
>  
> +config BINDER_USER_TRACKING
> +	bool "Android Binder transaction tracking"
> +	help
> +	  Used for track abnormal binder transaction which is over 2 seconds,
> +	  when the transaction is done or be free, this transaction would be
> +	  checked whether it executed overtime.
> +	  If yes, printing out the detail info about it.
> +
>  endif # if ANDROID
>  
>  endmenu
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index e9bc9fc..5a352ee 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -76,6 +76,11 @@
>  #include "binder_internal.h"
>  #include "binder_trace.h"
>  
> +#ifdef CONFIG_BINDER_USER_TRACKING
> +#include <linux/rtc.h>
> +#include <linux/time.h>
> +#endif
> +
>  static HLIST_HEAD(binder_deferred_list);
>  static DEFINE_MUTEX(binder_deferred_lock);
>  
> @@ -591,8 +596,104 @@ struct binder_transaction {
>  	 * during thread teardown
>  	 */
>  	spinlock_t lock;
> +#ifdef CONFIG_BINDER_USER_TRACKING
> +	struct timespec timestamp;
> +	struct timeval tv;
> +#endif
>  };
>  
> +#ifdef CONFIG_BINDER_USER_TRACKING
> +
> +/*
> + * binder_print_delay - Output info of a delay transaction
> + * @t:          pointer to the over-time transaction
> + */
> +static void binder_print_delay(struct binder_transaction *t)
> +{
> +	struct rtc_time tm;
> +	struct timespec *startime;
> +	struct timespec cur, sub_t;
> +
> +	ktime_get_ts(&cur);
> +	startime = &t->timestamp;
> +	sub_t = timespec_sub(cur, *startime);
> +
> +	/* if transaction time is over than 2 sec,
> +	 * show timeout warning log.
> +	 */
> +	if (sub_t.tv_sec < 2)
> +		return;
> +
> +	rtc_time_to_tm(t->tv.tv_sec, &tm);
> +
> +	spin_lock(&t->lock);
> +	pr_info_ratelimited("%d: from %d:%d to %d:%d",
> +			    t->debug_id,
> +			    t->from ? t->from->proc->pid : 0,
> +			    t->from ? t->from->pid : 0,
> +			    t->to_proc ? t->to_proc->pid : 0,
> +			    t->to_thread ? t->to_thread->pid : 0);
> +	spin_unlock(&t->lock);
> +
> +	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> +			    (unsigned int)sub_t.tv_sec,
> +			    (sub_t.tv_nsec / NSEC_PER_MSEC),
> +			    t->code,
> +			    (unsigned long)startime->tv_sec,
> +			    (startime->tv_nsec / NSEC_PER_MSEC),
> +			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> +			    tm.tm_hour, tm.tm_min, tm.tm_sec,
> +			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> +}

Ick, why not use a tracepoint for this instead?

And what is userspace supposed to do with this if they see it?

thanks,

greg k-h
Joel Fernandes Feb. 5, 2020, 3:49 p.m. UTC | #2
On Wed, Feb 05, 2020 at 09:36:12AM +0000, Greg Kroah-Hartman wrote:
> On Wed, Feb 05, 2020 at 02:52:52PM +0800, Frankie Chang wrote:
> > Record start/end timestamp to binder transaction.
> > When transaction is completed or transaction is free,
> > it would be checked if transaction latency over threshold (2 sec),
> > if yes, printing related information for tracing.
> > 
> > Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
> > ---
> >  drivers/android/Kconfig           |    8 +++
> >  drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
> >  drivers/android/binder_internal.h |    4 ++
> >  3 files changed, 119 insertions(+)
> > 
> > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > index 6fdf2ab..7ba80eb 100644
> > --- a/drivers/android/Kconfig
> > +++ b/drivers/android/Kconfig
> > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> >  	  exhaustively with combinations of various buffer sizes and
> >  	  alignments.
> >  
> > +config BINDER_USER_TRACKING
> > +	bool "Android Binder transaction tracking"
> > +	help
> > +	  Used for track abnormal binder transaction which is over 2 seconds,
> > +	  when the transaction is done or be free, this transaction would be
> > +	  checked whether it executed overtime.
> > +	  If yes, printing out the detail info about it.
> > +
> >  endif # if ANDROID
> >  
> >  endmenu
> > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > index e9bc9fc..5a352ee 100644
> > --- a/drivers/android/binder.c
> > +++ b/drivers/android/binder.c
> > @@ -76,6 +76,11 @@
> >  #include "binder_internal.h"
> >  #include "binder_trace.h"
> >  
> > +#ifdef CONFIG_BINDER_USER_TRACKING
> > +#include <linux/rtc.h>
> > +#include <linux/time.h>
> > +#endif
> > +
> >  static HLIST_HEAD(binder_deferred_list);
> >  static DEFINE_MUTEX(binder_deferred_lock);
> >  
> > @@ -591,8 +596,104 @@ struct binder_transaction {
> >  	 * during thread teardown
> >  	 */
> >  	spinlock_t lock;
> > +#ifdef CONFIG_BINDER_USER_TRACKING
> > +	struct timespec timestamp;
> > +	struct timeval tv;
> > +#endif
> >  };
> >  
> > +#ifdef CONFIG_BINDER_USER_TRACKING
> > +
> > +/*
> > + * binder_print_delay - Output info of a delay transaction
> > + * @t:          pointer to the over-time transaction
> > + */
> > +static void binder_print_delay(struct binder_transaction *t)
> > +{
> > +	struct rtc_time tm;
> > +	struct timespec *startime;
> > +	struct timespec cur, sub_t;
> > +
> > +	ktime_get_ts(&cur);
> > +	startime = &t->timestamp;
> > +	sub_t = timespec_sub(cur, *startime);
> > +
> > +	/* if transaction time is over than 2 sec,
> > +	 * show timeout warning log.
> > +	 */
> > +	if (sub_t.tv_sec < 2)
> > +		return;
> > +
> > +	rtc_time_to_tm(t->tv.tv_sec, &tm);
> > +
> > +	spin_lock(&t->lock);
> > +	pr_info_ratelimited("%d: from %d:%d to %d:%d",
> > +			    t->debug_id,
> > +			    t->from ? t->from->proc->pid : 0,
> > +			    t->from ? t->from->pid : 0,
> > +			    t->to_proc ? t->to_proc->pid : 0,
> > +			    t->to_thread ? t->to_thread->pid : 0);
> > +	spin_unlock(&t->lock);
> > +
> > +	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> > +			    (unsigned int)sub_t.tv_sec,
> > +			    (sub_t.tv_nsec / NSEC_PER_MSEC),
> > +			    t->code,
> > +			    (unsigned long)startime->tv_sec,
> > +			    (startime->tv_nsec / NSEC_PER_MSEC),
> > +			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> > +			    tm.tm_hour, tm.tm_min, tm.tm_sec,
> > +			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> > +}
> 
> Ick, why not use a tracepoint for this instead?
> 
> And what is userspace supposed to do with this if they see it?

Or another option is to implement this separately outside of binder.c using
register_trace_* on the existing binder tracepoints, similar to what say the
block tracer or preempt-off tracers do. Call it, say, "binder-latency tracer".

That way all of this tracing code is in-kernel but outside of binder.c.

thanks,

 - Joel
Frankie Chang Feb. 7, 2020, 3:10 a.m. UTC | #3
On Wed, 2020-02-05 at 10:49 -0500, Joel Fernandes wrote:
> On Wed, Feb 05, 2020 at 09:36:12AM +0000, Greg Kroah-Hartman wrote:
> > On Wed, Feb 05, 2020 at 02:52:52PM +0800, Frankie Chang wrote:
> > > Record start/end timestamp to binder transaction.
> > > When transaction is completed or transaction is free,
> > > it would be checked if transaction latency over threshold (2 sec),
> > > if yes, printing related information for tracing.
> > > 
> > > Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
> > > ---
> > >  drivers/android/Kconfig           |    8 +++
> > >  drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
> > >  drivers/android/binder_internal.h |    4 ++
> > >  3 files changed, 119 insertions(+)
> > > 
> > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > > index 6fdf2ab..7ba80eb 100644
> > > --- a/drivers/android/Kconfig
> > > +++ b/drivers/android/Kconfig
> > > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> > >  	  exhaustively with combinations of various buffer sizes and
> > >  	  alignments.
> > >  
> > > +config BINDER_USER_TRACKING
> > > +	bool "Android Binder transaction tracking"
> > > +	help
> > > +	  Used for track abnormal binder transaction which is over 2 seconds,
> > > +	  when the transaction is done or be free, this transaction would be
> > > +	  checked whether it executed overtime.
> > > +	  If yes, printing out the detail info about it.
> > > +
> > >  endif # if ANDROID
> > >  
> > >  endmenu
> > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > > index e9bc9fc..5a352ee 100644
> > > --- a/drivers/android/binder.c
> > > +++ b/drivers/android/binder.c
> > > @@ -76,6 +76,11 @@
> > >  #include "binder_internal.h"
> > >  #include "binder_trace.h"
> > >  
> > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > +#include <linux/rtc.h>
> > > +#include <linux/time.h>
> > > +#endif
> > > +
> > >  static HLIST_HEAD(binder_deferred_list);
> > >  static DEFINE_MUTEX(binder_deferred_lock);
> > >  
> > > @@ -591,8 +596,104 @@ struct binder_transaction {
> > >  	 * during thread teardown
> > >  	 */
> > >  	spinlock_t lock;
> > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > +	struct timespec timestamp;
> > > +	struct timeval tv;
> > > +#endif
> > >  };
> > >  
> > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > +
> > > +/*
> > > + * binder_print_delay - Output info of a delay transaction
> > > + * @t:          pointer to the over-time transaction
> > > + */
> > > +static void binder_print_delay(struct binder_transaction *t)
> > > +{
> > > +	struct rtc_time tm;
> > > +	struct timespec *startime;
> > > +	struct timespec cur, sub_t;
> > > +
> > > +	ktime_get_ts(&cur);
> > > +	startime = &t->timestamp;
> > > +	sub_t = timespec_sub(cur, *startime);
> > > +
> > > +	/* if transaction time is over than 2 sec,
> > > +	 * show timeout warning log.
> > > +	 */
> > > +	if (sub_t.tv_sec < 2)
> > > +		return;
> > > +
> > > +	rtc_time_to_tm(t->tv.tv_sec, &tm);
> > > +
> > > +	spin_lock(&t->lock);
> > > +	pr_info_ratelimited("%d: from %d:%d to %d:%d",
> > > +			    t->debug_id,
> > > +			    t->from ? t->from->proc->pid : 0,
> > > +			    t->from ? t->from->pid : 0,
> > > +			    t->to_proc ? t->to_proc->pid : 0,
> > > +			    t->to_thread ? t->to_thread->pid : 0);
> > > +	spin_unlock(&t->lock);
> > > +
> > > +	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> > > +			    (unsigned int)sub_t.tv_sec,
> > > +			    (sub_t.tv_nsec / NSEC_PER_MSEC),
> > > +			    t->code,
> > > +			    (unsigned long)startime->tv_sec,
> > > +			    (startime->tv_nsec / NSEC_PER_MSEC),
> > > +			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> > > +			    tm.tm_hour, tm.tm_min, tm.tm_sec,
> > > +			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> > > +}
> > 
> > Ick, why not use a tracepoint for this instead?
> > 
> > And what is userspace supposed to do with this if they see it?
> 
> Or another option is to implement this separately outside of binder.c using
> register_trace_* on the existing binder tracepoints, similar to what say the
> block tracer or preempt-off tracers do. Call it, say, "binder-latency tracer".
> 
> That way all of this tracing code is in-kernel but outside of binder.c.
> 
> thanks,
> 
>  - Joel
> 
Time limitation of recording is the reason why we don't use tracepoint.
In some situations, the exception is caused by a series of transactions
interaction.
Some abnormal transactions may be pending for a long time ago, they
could not be recorded due to buffer limited.
Therefore, it is difficult to dig out the root causes which caused by
the earlier transactions occurred.

Another point is that we'd just like to record the abnormal
transactions.
But most of transactions are less than 2 seconds, these are not the key
point we need to focus on.

thanks,

Frankie
Joel Fernandes Feb. 7, 2020, 3:17 a.m. UTC | #4
On Fri, Feb 07, 2020 at 11:10:23AM +0800, Frankie Chang wrote:
> On Wed, 2020-02-05 at 10:49 -0500, Joel Fernandes wrote:
> > On Wed, Feb 05, 2020 at 09:36:12AM +0000, Greg Kroah-Hartman wrote:
> > > On Wed, Feb 05, 2020 at 02:52:52PM +0800, Frankie Chang wrote:
> > > > Record start/end timestamp to binder transaction.
> > > > When transaction is completed or transaction is free,
> > > > it would be checked if transaction latency over threshold (2 sec),
> > > > if yes, printing related information for tracing.
> > > > 
> > > > Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
> > > > ---
> > > >  drivers/android/Kconfig           |    8 +++
> > > >  drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
> > > >  drivers/android/binder_internal.h |    4 ++
> > > >  3 files changed, 119 insertions(+)
> > > > 
> > > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > > > index 6fdf2ab..7ba80eb 100644
> > > > --- a/drivers/android/Kconfig
> > > > +++ b/drivers/android/Kconfig
> > > > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> > > >  	  exhaustively with combinations of various buffer sizes and
> > > >  	  alignments.
> > > >  
> > > > +config BINDER_USER_TRACKING
> > > > +	bool "Android Binder transaction tracking"
> > > > +	help
> > > > +	  Used for track abnormal binder transaction which is over 2 seconds,
> > > > +	  when the transaction is done or be free, this transaction would be
> > > > +	  checked whether it executed overtime.
> > > > +	  If yes, printing out the detail info about it.
> > > > +
> > > >  endif # if ANDROID
> > > >  
> > > >  endmenu
> > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > > > index e9bc9fc..5a352ee 100644
> > > > --- a/drivers/android/binder.c
> > > > +++ b/drivers/android/binder.c
> > > > @@ -76,6 +76,11 @@
> > > >  #include "binder_internal.h"
> > > >  #include "binder_trace.h"
> > > >  
> > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > +#include <linux/rtc.h>
> > > > +#include <linux/time.h>
> > > > +#endif
> > > > +
> > > >  static HLIST_HEAD(binder_deferred_list);
> > > >  static DEFINE_MUTEX(binder_deferred_lock);
> > > >  
> > > > @@ -591,8 +596,104 @@ struct binder_transaction {
> > > >  	 * during thread teardown
> > > >  	 */
> > > >  	spinlock_t lock;
> > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > +	struct timespec timestamp;
> > > > +	struct timeval tv;
> > > > +#endif
> > > >  };
> > > >  
> > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > +
> > > > +/*
> > > > + * binder_print_delay - Output info of a delay transaction
> > > > + * @t:          pointer to the over-time transaction
> > > > + */
> > > > +static void binder_print_delay(struct binder_transaction *t)
> > > > +{
> > > > +	struct rtc_time tm;
> > > > +	struct timespec *startime;
> > > > +	struct timespec cur, sub_t;
> > > > +
> > > > +	ktime_get_ts(&cur);
> > > > +	startime = &t->timestamp;
> > > > +	sub_t = timespec_sub(cur, *startime);
> > > > +
> > > > +	/* if transaction time is over than 2 sec,
> > > > +	 * show timeout warning log.
> > > > +	 */
> > > > +	if (sub_t.tv_sec < 2)
> > > > +		return;
> > > > +
> > > > +	rtc_time_to_tm(t->tv.tv_sec, &tm);
> > > > +
> > > > +	spin_lock(&t->lock);
> > > > +	pr_info_ratelimited("%d: from %d:%d to %d:%d",
> > > > +			    t->debug_id,
> > > > +			    t->from ? t->from->proc->pid : 0,
> > > > +			    t->from ? t->from->pid : 0,
> > > > +			    t->to_proc ? t->to_proc->pid : 0,
> > > > +			    t->to_thread ? t->to_thread->pid : 0);
> > > > +	spin_unlock(&t->lock);
> > > > +
> > > > +	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> > > > +			    (unsigned int)sub_t.tv_sec,
> > > > +			    (sub_t.tv_nsec / NSEC_PER_MSEC),
> > > > +			    t->code,
> > > > +			    (unsigned long)startime->tv_sec,
> > > > +			    (startime->tv_nsec / NSEC_PER_MSEC),
> > > > +			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> > > > +			    tm.tm_hour, tm.tm_min, tm.tm_sec,
> > > > +			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> > > > +}
> > > 
> > > Ick, why not use a tracepoint for this instead?
> > > 
> > > And what is userspace supposed to do with this if they see it?
> > 
> > Or another option is to implement this separately outside of binder.c using
> > register_trace_* on the existing binder tracepoints, similar to what say the
> > block tracer or preempt-off tracers do. Call it, say, "binder-latency tracer".
> > 
> > That way all of this tracing code is in-kernel but outside of binder.c.
> > 
> > thanks,
> > 
> >  - Joel
> > 
> Time limitation of recording is the reason why we don't use tracepoint.
> In some situations, the exception is caused by a series of transactions
> interaction.
> Some abnormal transactions may be pending for a long time ago, they
> could not be recorded due to buffer limited.

register_trace_* does not use the trace buffer so I am not sure what you
mean. I am asking you to use tracepoints, not ftrace events.

thanks,

 - Joel
Frankie Chang Feb. 7, 2020, 6:28 a.m. UTC | #5
On Thu, 2020-02-06 at 22:17 -0500, Joel Fernandes wrote:
> On Fri, Feb 07, 2020 at 11:10:23AM +0800, Frankie Chang wrote:
> > On Wed, 2020-02-05 at 10:49 -0500, Joel Fernandes wrote:
> > > On Wed, Feb 05, 2020 at 09:36:12AM +0000, Greg Kroah-Hartman wrote:
> > > > On Wed, Feb 05, 2020 at 02:52:52PM +0800, Frankie Chang wrote:
> > > > > Record start/end timestamp to binder transaction.
> > > > > When transaction is completed or transaction is free,
> > > > > it would be checked if transaction latency over threshold (2 sec),
> > > > > if yes, printing related information for tracing.
> > > > > 
> > > > > Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
> > > > > ---
> > > > >  drivers/android/Kconfig           |    8 +++
> > > > >  drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
> > > > >  drivers/android/binder_internal.h |    4 ++
> > > > >  3 files changed, 119 insertions(+)
> > > > > 
> > > > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > > > > index 6fdf2ab..7ba80eb 100644
> > > > > --- a/drivers/android/Kconfig
> > > > > +++ b/drivers/android/Kconfig
> > > > > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> > > > >  	  exhaustively with combinations of various buffer sizes and
> > > > >  	  alignments.
> > > > >  
> > > > > +config BINDER_USER_TRACKING
> > > > > +	bool "Android Binder transaction tracking"
> > > > > +	help
> > > > > +	  Used for track abnormal binder transaction which is over 2 seconds,
> > > > > +	  when the transaction is done or be free, this transaction would be
> > > > > +	  checked whether it executed overtime.
> > > > > +	  If yes, printing out the detail info about it.
> > > > > +
> > > > >  endif # if ANDROID
> > > > >  
> > > > >  endmenu
> > > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > > > > index e9bc9fc..5a352ee 100644
> > > > > --- a/drivers/android/binder.c
> > > > > +++ b/drivers/android/binder.c
> > > > > @@ -76,6 +76,11 @@
> > > > >  #include "binder_internal.h"
> > > > >  #include "binder_trace.h"
> > > > >  
> > > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > > +#include <linux/rtc.h>
> > > > > +#include <linux/time.h>
> > > > > +#endif
> > > > > +
> > > > >  static HLIST_HEAD(binder_deferred_list);
> > > > >  static DEFINE_MUTEX(binder_deferred_lock);
> > > > >  
> > > > > @@ -591,8 +596,104 @@ struct binder_transaction {
> > > > >  	 * during thread teardown
> > > > >  	 */
> > > > >  	spinlock_t lock;
> > > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > > +	struct timespec timestamp;
> > > > > +	struct timeval tv;
> > > > > +#endif
> > > > >  };
> > > > >  
> > > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > > +
> > > > > +/*
> > > > > + * binder_print_delay - Output info of a delay transaction
> > > > > + * @t:          pointer to the over-time transaction
> > > > > + */
> > > > > +static void binder_print_delay(struct binder_transaction *t)
> > > > > +{
> > > > > +	struct rtc_time tm;
> > > > > +	struct timespec *startime;
> > > > > +	struct timespec cur, sub_t;
> > > > > +
> > > > > +	ktime_get_ts(&cur);
> > > > > +	startime = &t->timestamp;
> > > > > +	sub_t = timespec_sub(cur, *startime);
> > > > > +
> > > > > +	/* if transaction time is over than 2 sec,
> > > > > +	 * show timeout warning log.
> > > > > +	 */
> > > > > +	if (sub_t.tv_sec < 2)
> > > > > +		return;
> > > > > +
> > > > > +	rtc_time_to_tm(t->tv.tv_sec, &tm);
> > > > > +
> > > > > +	spin_lock(&t->lock);
> > > > > +	pr_info_ratelimited("%d: from %d:%d to %d:%d",
> > > > > +			    t->debug_id,
> > > > > +			    t->from ? t->from->proc->pid : 0,
> > > > > +			    t->from ? t->from->pid : 0,
> > > > > +			    t->to_proc ? t->to_proc->pid : 0,
> > > > > +			    t->to_thread ? t->to_thread->pid : 0);
> > > > > +	spin_unlock(&t->lock);
> > > > > +
> > > > > +	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> > > > > +			    (unsigned int)sub_t.tv_sec,
> > > > > +			    (sub_t.tv_nsec / NSEC_PER_MSEC),
> > > > > +			    t->code,
> > > > > +			    (unsigned long)startime->tv_sec,
> > > > > +			    (startime->tv_nsec / NSEC_PER_MSEC),
> > > > > +			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> > > > > +			    tm.tm_hour, tm.tm_min, tm.tm_sec,
> > > > > +			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> > > > > +}
> > > > 
> > > > Ick, why not use a tracepoint for this instead?
> > > > 
> > > > And what is userspace supposed to do with this if they see it?
> > > 
> > > Or another option is to implement this separately outside of binder.c using
> > > register_trace_* on the existing binder tracepoints, similar to what say the
> > > block tracer or preempt-off tracers do. Call it, say, "binder-latency tracer".
> > > 
> > > That way all of this tracing code is in-kernel but outside of binder.c.
> > > 
> > > thanks,
> > > 
> > >  - Joel
> > > 
> > Time limitation of recording is the reason why we don't use tracepoint.
> > In some situations, the exception is caused by a series of transactions
> > interaction.
> > Some abnormal transactions may be pending for a long time ago, they
> > could not be recorded due to buffer limited.
> 
> register_trace_* does not use the trace buffer so I am not sure what you
> mean. I am asking you to use tracepoints, not ftrace events.
> 
> thanks,
> 
>  - Joel
> 
The existing binder tracepoint may not be precise enough because there
is no proper tracepoint which can represent the real finished time of
transaction.

The reason why we don't put the code outside binder.c is that structure
of binder_transaction didn't put in header file.
If it could be moved to binder_internal.h, then we can add
"binder-latency tracer" as you mentioned earlier.
Joel Fernandes Feb. 7, 2020, 1:26 p.m. UTC | #6
On Fri, Feb 07, 2020 at 02:28:59PM +0800, Frankie Chang wrote:
> On Thu, 2020-02-06 at 22:17 -0500, Joel Fernandes wrote:
> > On Fri, Feb 07, 2020 at 11:10:23AM +0800, Frankie Chang wrote:
> > > On Wed, 2020-02-05 at 10:49 -0500, Joel Fernandes wrote:
> > > > On Wed, Feb 05, 2020 at 09:36:12AM +0000, Greg Kroah-Hartman wrote:
> > > > > On Wed, Feb 05, 2020 at 02:52:52PM +0800, Frankie Chang wrote:
> > > > > > Record start/end timestamp to binder transaction.
> > > > > > When transaction is completed or transaction is free,
> > > > > > it would be checked if transaction latency over threshold (2 sec),
> > > > > > if yes, printing related information for tracing.
> > > > > > 
> > > > > > Signed-off-by: Frankie Chang <Frankie.Chang@mediatek.com>
> > > > > > ---
> > > > > >  drivers/android/Kconfig           |    8 +++
> > > > > >  drivers/android/binder.c          |  107 +++++++++++++++++++++++++++++++++++++
> > > > > >  drivers/android/binder_internal.h |    4 ++
> > > > > >  3 files changed, 119 insertions(+)
> > > > > > 
> > > > > > diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> > > > > > index 6fdf2ab..7ba80eb 100644
> > > > > > --- a/drivers/android/Kconfig
> > > > > > +++ b/drivers/android/Kconfig
> > > > > > @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> > > > > >  	  exhaustively with combinations of various buffer sizes and
> > > > > >  	  alignments.
> > > > > >  
> > > > > > +config BINDER_USER_TRACKING
> > > > > > +	bool "Android Binder transaction tracking"
> > > > > > +	help
> > > > > > +	  Used for track abnormal binder transaction which is over 2 seconds,
> > > > > > +	  when the transaction is done or be free, this transaction would be
> > > > > > +	  checked whether it executed overtime.
> > > > > > +	  If yes, printing out the detail info about it.
> > > > > > +
> > > > > >  endif # if ANDROID
> > > > > >  
> > > > > >  endmenu
> > > > > > diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > > > > > index e9bc9fc..5a352ee 100644
> > > > > > --- a/drivers/android/binder.c
> > > > > > +++ b/drivers/android/binder.c
> > > > > > @@ -76,6 +76,11 @@
> > > > > >  #include "binder_internal.h"
> > > > > >  #include "binder_trace.h"
> > > > > >  
> > > > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > > > +#include <linux/rtc.h>
> > > > > > +#include <linux/time.h>
> > > > > > +#endif
> > > > > > +
> > > > > >  static HLIST_HEAD(binder_deferred_list);
> > > > > >  static DEFINE_MUTEX(binder_deferred_lock);
> > > > > >  
> > > > > > @@ -591,8 +596,104 @@ struct binder_transaction {
> > > > > >  	 * during thread teardown
> > > > > >  	 */
> > > > > >  	spinlock_t lock;
> > > > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > > > +	struct timespec timestamp;
> > > > > > +	struct timeval tv;
> > > > > > +#endif
> > > > > >  };
> > > > > >  
> > > > > > +#ifdef CONFIG_BINDER_USER_TRACKING
> > > > > > +
> > > > > > +/*
> > > > > > + * binder_print_delay - Output info of a delay transaction
> > > > > > + * @t:          pointer to the over-time transaction
> > > > > > + */
> > > > > > +static void binder_print_delay(struct binder_transaction *t)
> > > > > > +{
> > > > > > +	struct rtc_time tm;
> > > > > > +	struct timespec *startime;
> > > > > > +	struct timespec cur, sub_t;
> > > > > > +
> > > > > > +	ktime_get_ts(&cur);
> > > > > > +	startime = &t->timestamp;
> > > > > > +	sub_t = timespec_sub(cur, *startime);
> > > > > > +
> > > > > > +	/* if transaction time is over than 2 sec,
> > > > > > +	 * show timeout warning log.
> > > > > > +	 */
> > > > > > +	if (sub_t.tv_sec < 2)
> > > > > > +		return;
> > > > > > +
> > > > > > +	rtc_time_to_tm(t->tv.tv_sec, &tm);
> > > > > > +
> > > > > > +	spin_lock(&t->lock);
> > > > > > +	pr_info_ratelimited("%d: from %d:%d to %d:%d",
> > > > > > +			    t->debug_id,
> > > > > > +			    t->from ? t->from->proc->pid : 0,
> > > > > > +			    t->from ? t->from->pid : 0,
> > > > > > +			    t->to_proc ? t->to_proc->pid : 0,
> > > > > > +			    t->to_thread ? t->to_thread->pid : 0);
> > > > > > +	spin_unlock(&t->lock);
> > > > > > +
> > > > > > +	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> > > > > > +			    (unsigned int)sub_t.tv_sec,
> > > > > > +			    (sub_t.tv_nsec / NSEC_PER_MSEC),
> > > > > > +			    t->code,
> > > > > > +			    (unsigned long)startime->tv_sec,
> > > > > > +			    (startime->tv_nsec / NSEC_PER_MSEC),
> > > > > > +			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> > > > > > +			    tm.tm_hour, tm.tm_min, tm.tm_sec,
> > > > > > +			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> > > > > > +}
> > > > > 
> > > > > Ick, why not use a tracepoint for this instead?
> > > > > 
> > > > > And what is userspace supposed to do with this if they see it?
> > > > 
> > > > Or another option is to implement this separately outside of binder.c using
> > > > register_trace_* on the existing binder tracepoints, similar to what say the
> > > > block tracer or preempt-off tracers do. Call it, say, "binder-latency tracer".
> > > > 
> > > > That way all of this tracing code is in-kernel but outside of binder.c.
> > > > 
> > > > thanks,
> > > > 
> > > >  - Joel
> > > > 
> > > Time limitation of recording is the reason why we don't use tracepoint.
> > > In some situations, the exception is caused by a series of transactions
> > > interaction.
> > > Some abnormal transactions may be pending for a long time ago, they
> > > could not be recorded due to buffer limited.
> > 
> > register_trace_* does not use the trace buffer so I am not sure what you
> > mean. I am asking you to use tracepoints, not ftrace events.
> > 
> > thanks,
> > 
> >  - Joel
> > 
> The existing binder tracepoint may not be precise enough because there
> is no proper tracepoint which can represent the real finished time of
> transaction.

May be you can add such TP then?

> The reason why we don't put the code outside binder.c is that structure
> of binder_transaction didn't put in header file.
> If it could be moved to binder_internal.h, then we can add
> "binder-latency tracer" as you mentioned earlier.

That should be doable.

The reason I don't like custom tracing logic in core files as your patch is
doing, is that it is doubtful if a lot of people would use it to warrant
adding into a core file. In this case, probably a separate tracer (outside of
binder.c) is a better option so as to not pollute the core files.

thanks,

 - Joel

Patch
diff mbox series

diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 6fdf2ab..7ba80eb 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -54,6 +54,14 @@  config ANDROID_BINDER_IPC_SELFTEST
 	  exhaustively with combinations of various buffer sizes and
 	  alignments.
 
+config BINDER_USER_TRACKING
+	bool "Android Binder transaction tracking"
+	help
+	  Used for track abnormal binder transaction which is over 2 seconds,
+	  when the transaction is done or be free, this transaction would be
+	  checked whether it executed overtime.
+	  If yes, printing out the detail info about it.
+
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e9bc9fc..5a352ee 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -76,6 +76,11 @@ 
 #include "binder_internal.h"
 #include "binder_trace.h"
 
+#ifdef CONFIG_BINDER_USER_TRACKING
+#include <linux/rtc.h>
+#include <linux/time.h>
+#endif
+
 static HLIST_HEAD(binder_deferred_list);
 static DEFINE_MUTEX(binder_deferred_lock);
 
@@ -591,8 +596,104 @@  struct binder_transaction {
 	 * during thread teardown
 	 */
 	spinlock_t lock;
+#ifdef CONFIG_BINDER_USER_TRACKING
+	struct timespec timestamp;
+	struct timeval tv;
+#endif
 };
 
+#ifdef CONFIG_BINDER_USER_TRACKING
+
+/*
+ * binder_print_delay - Output info of a delay transaction
+ * @t:          pointer to the over-time transaction
+ */
+static void binder_print_delay(struct binder_transaction *t)
+{
+	struct rtc_time tm;
+	struct timespec *startime;
+	struct timespec cur, sub_t;
+
+	ktime_get_ts(&cur);
+	startime = &t->timestamp;
+	sub_t = timespec_sub(cur, *startime);
+
+	/* if transaction time is over than 2 sec,
+	 * show timeout warning log.
+	 */
+	if (sub_t.tv_sec < 2)
+		return;
+
+	rtc_time_to_tm(t->tv.tv_sec, &tm);
+
+	spin_lock(&t->lock);
+	pr_info_ratelimited("%d: from %d:%d to %d:%d",
+			    t->debug_id,
+			    t->from ? t->from->proc->pid : 0,
+			    t->from ? t->from->pid : 0,
+			    t->to_proc ? t->to_proc->pid : 0,
+			    t->to_thread ? t->to_thread->pid : 0);
+	spin_unlock(&t->lock);
+
+	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
+			    (unsigned int)sub_t.tv_sec,
+			    (sub_t.tv_nsec / NSEC_PER_MSEC),
+			    t->code,
+			    (unsigned long)startime->tv_sec,
+			    (startime->tv_nsec / NSEC_PER_MSEC),
+			    (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
+			    tm.tm_hour, tm.tm_min, tm.tm_sec,
+			    (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
+}
+
+static void binder_log_entry_setup(struct binder_transaction_log_entry *e)
+{
+	ktime_get_ts(&e->timestamp);
+	do_gettimeofday(&e->tv);
+	e->tv.tv_sec -= (sys_tz.tz_minuteswest * 60);
+}
+
+static void timestamp_copy(struct binder_transaction *t,
+			   struct binder_transaction_log_entry *e)
+{
+	memcpy(&t->timestamp, &e->timestamp, sizeof(struct timespec));
+	memcpy(&t->tv, &e->tv, sizeof(struct timeval));
+}
+
+static void print_binder_transaction_ext(struct seq_file *m,
+					 struct binder_transaction *t)
+{
+	struct rtc_time tm;
+
+	rtc_time_to_tm(t->tv.tv_sec, &tm);
+	seq_printf(m,
+		   " start %lu.%06lu android %d-%02d-%02d %02d:%02d:%02d.%03lu",
+		   (unsigned long)t->timestamp.tv_sec,
+		   (t->timestamp.tv_nsec / NSEC_PER_USEC),
+		   (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
+		   tm.tm_hour, tm.tm_min, tm.tm_sec,
+		   (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
+}
+#else
+static void binder_print_delay(struct binder_transaction *t)
+{
+}
+
+static void binder_log_entry_setup(struct binder_transaction_log_entry *e)
+{
+}
+
+static void timestamp_copy(struct binder_transaction *t,
+			   struct binder_transaction_log_entry *e)
+{
+}
+
+static void print_binder_transaction_ext(struct seq_file *m,
+					 struct binder_transaction *t)
+{
+}
+#endif
+
 /**
  * struct binder_object - union of flat binder object types
  * @hdr:   generic object header
@@ -1927,6 +2028,7 @@  static void binder_free_transaction(struct binder_transaction *t)
 	 * If the transaction has no target_proc, then
 	 * t->buffer->transaction has already been cleared.
 	 */
+	binder_print_delay(t);
 	binder_free_txn_fixups(t);
 	kfree(t);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION);
@@ -2872,6 +2974,8 @@  static void binder_transaction(struct binder_proc *proc,
 	e->offsets_size = tr->offsets_size;
 	strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME);
 
+	binder_log_entry_setup(e);
+
 	if (reply) {
 		binder_inner_proc_lock(proc);
 		in_reply_to = thread->transaction_stack;
@@ -3058,6 +3162,7 @@  static void binder_transaction(struct binder_proc *proc,
 		return_error_line = __LINE__;
 		goto err_alloc_t_failed;
 	}
+	timestamp_copy(t, e);
 	INIT_LIST_HEAD(&t->fd_fixups);
 	binder_stats_created(BINDER_STAT_TRANSACTION);
 	spin_lock_init(&t->lock);
@@ -3496,6 +3601,7 @@  static void binder_transaction(struct binder_proc *proc,
 	kfree(tcomplete);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 err_alloc_tcomplete_failed:
+	binder_print_delay(t);
 	kfree(t);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION);
 err_alloc_t_failed:
@@ -5544,6 +5650,7 @@  static void print_binder_transaction_ilocked(struct seq_file *m,
 		   t->to_thread ? t->to_thread->pid : 0,
 		   t->code, t->flags, t->priority, t->need_reply);
 	spin_unlock(&t->lock);
+	print_binder_transaction_ext(m, t);
 
 	if (proc != to_proc) {
 		/*
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index ae99109..ea52c5d 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -131,6 +131,10 @@  struct binder_transaction_log_entry {
 	uint32_t return_error;
 	uint32_t return_error_param;
 	char context_name[BINDERFS_MAX_NAME + 1];
+#ifdef CONFIG_BINDER_USER_TRACKING
+	struct timespec timestamp;
+	struct timeval tv;
+#endif
 };
 
 struct binder_transaction_log {