diff mbox series

locking/lockdep: Save and display stack trace of held locks

Message ID 1561363259-14705-1-git-send-email-kobe-cp.wu@mediatek.com (mailing list archive)
State New, archived
Headers show
Series locking/lockdep: Save and display stack trace of held locks | expand

Commit Message

Kobe-CP Wu June 24, 2019, 8 a.m. UTC
Save the stack trace of held locks when lock_acquire() is invoked
and display the stack trace when lockdep_print_held_locks() is
invoked. The runtime stack trace of held locks are helpful in
analyzing code flow and lockdep's warning.

Save stack trace of each held lock will increase runtime overhead
and memory consumption. The operation will be activated under
CONFIG_LOCKDEP_TRACE_HELD_LOCK. So the impact will only occur
when CONFIG_LOCKDEP_TRACE_HELD_LOCK=y.

Signed-off-by: Kobe Wu <kobe-cp.wu@mediatek.com>
---
 include/linux/lockdep.h  |   13 +++++++++++++
 kernel/locking/lockdep.c |   22 ++++++++++++++++++++++
 lib/Kconfig.debug        |   13 +++++++++++++
 3 files changed, 48 insertions(+)

Comments

Peter Zijlstra June 24, 2019, 11:08 a.m. UTC | #1
On Mon, Jun 24, 2019 at 04:00:59PM +0800, Kobe Wu wrote:
> Save the stack trace of held locks when lock_acquire() is invoked
> and display the stack trace when lockdep_print_held_locks() is
> invoked. The runtime stack trace of held locks are helpful in
> analyzing code flow and lockdep's warning.
> 
> Save stack trace of each held lock will increase runtime overhead
> and memory consumption. The operation will be activated under
> CONFIG_LOCKDEP_TRACE_HELD_LOCK. So the impact will only occur
> when CONFIG_LOCKDEP_TRACE_HELD_LOCK=y.

Yeah, I don't see the point of this. If you cannot find where the lock
got taken you've bigger problems.
Kobe-CP Wu June 28, 2019, 9:42 a.m. UTC | #2
On Mon, 2019-06-24 at 13:08 +0200, Peter Zijlstra wrote:
> On Mon, Jun 24, 2019 at 04:00:59PM +0800, Kobe Wu wrote:
> > Save the stack trace of held locks when lock_acquire() is invoked
> > and display the stack trace when lockdep_print_held_locks() is
> > invoked. The runtime stack trace of held locks are helpful in
> > analyzing code flow and lockdep's warning.
> > 
> > Save stack trace of each held lock will increase runtime overhead
> > and memory consumption. The operation will be activated under
> > CONFIG_LOCKDEP_TRACE_HELD_LOCK. So the impact will only occur
> > when CONFIG_LOCKDEP_TRACE_HELD_LOCK=y.
> 
> Yeah, I don't see the point of this. If you cannot find where the lock
> got taken you've bigger problems.

There are some examples which can explain why stack trace are helpful.

<#1>

It provides more information for debug_show_all_locks().
debug_show_all_locks() is called by other debug functions and helpful in
analyzing problems and understanding system status.

If a task goes into sleeping with holding several locks for a long time,
it may result in problems. It would be easy to know how the task goes
into sleeping and as a beginning to analyze why the task keep sleeping
from the stack trace. Because the task goes into sleeping after the
stack trace. We can research what happened after the code flow.

For the following example, (&f->f_pos_lock) is the lock we cannot
acquire for a long time. We want to know what happened on
logd.klogd/344. Why logd.klogd/344 keep sleeping? Only final function
entry and task name is hard to understand the code flow and condition.

Showing all locks held in the system:
1 lock held by logd.klogd/344:
 #0:  (&f->f_pos_lock){+.+.+.}, at: [<ffffff8182a8ae04>] __fdget_pos
+0x44/0x58
...

<#2>

Provide more information for "BUG: task/pid still has locks held!".

The original warning is as following. It not easy to know where the lock
is held. Because the lock could be used in many functions and could be
packed by other functions.

=====================================
[ BUG: vpud/657 still has locks held! ]
4.9.117+ #2 Tainted: G S      W  O   
-------------------------------------
1 lock held by vpud/657 on CPU#6:
 #0:  (&dev->enc_mutex){+.+.+.}, at: [<ffffff8c5ca3ca74>] venc_lock
+0xec/0x108
Call trace:
[<ffffff8c5be8d190>] dump_backtrace+0x0/0x2bc
[<ffffff8c5be8d188>] show_stack+0x18/0x20
[<ffffff8c5c268274>] dump_stack+0xa8/0xf8
[<ffffff8c5bf4c1e4>] debug_check_no_locks_held+0x174/0x17c
[<ffffff8c5bf9e1dc>] futex_wait_queue_me+0xf4/0x16c
[<ffffff8c5bf9bc44>] futex_wait+0x14c/0x334
[<ffffff8c5bf9a534>] do_futex+0x10c/0x16d0
[<ffffff8c5bf9f688>] compat_SyS_futex+0x100/0x158
[<ffffff8c5be83e00>] el0_svc_naked+0x34/0x38

If there is a stack trace for reference, it will be easy to understand
and resolve this problem.

1 lock held by vpud/657/[0] on CPU#6:
 #0:  (&dev->enc_mutex){+.+.+.}, at: [<ffffff8c5ca3ca74>] venc_lock
+0xec/0x108
       lock_acquire+0x224/0x25c
       __mutex_lock_common+0x88/0x728
       mutex_lock_nested+0x50/0x60
       venc_lock+0xec/0x108
       venc_encode_prepare+0x20/0x68
       vcu_unlocked_ioctl+0x91c/0x1108
       vcu_unlocked_compat_ioctl+0x110/0x238
       compat_SyS_ioctl+0x128/0x244
       el0_svc_naked+0x34/0x38

For the same reason, it is helpful in analyzing "BUG: held lock freed!" 
and "INFO: possible recursive locking detected".
diff mbox series

Patch

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6e2377e..4e8d027 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -224,6 +224,10 @@  struct lock_chain {
  */
 #define MAX_LOCKDEP_KEYS		((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
 
+#ifdef CONFIG_LOCKDEP_TRACE_HELD_LOCK
+#define MAX_HELD_LOCK_ENTRIES 32
+#endif
+
 struct held_lock {
 	/*
 	 * One-way hash of the dependency chain up to this point. We
@@ -269,6 +273,15 @@  struct held_lock {
 	unsigned int hardirqs_off:1;
 	unsigned int references:12;					/* 32 bits */
 	unsigned int pin_count;
+
+#ifdef CONFIG_LOCKDEP_TRACE_HELD_LOCK
+	/*
+	 * When trying to acquire a lock, the stack trace will be saved
+	 * in the entries.
+	 */
+	unsigned int nr_entries;
+	unsigned long entries[MAX_HELD_LOCK_ENTRIES];
+#endif
 };
 
 /*
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d06190f..ad7cecf 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -457,6 +457,28 @@  static int save_trace(struct lock_trace *trace)
 	return 1;
 }
 
+/* Stack-trace: record stack backtrace when trying to acquire a lock. */
+#ifdef CONFIG_LOCKDEP_TRACE_HELD_LOCK
+static void save_held_lock_trace(struct held_lock *hlock)
+{
+	hlock->nr_entries =
+		stack_trace_save(hlock->entries, MAX_HELD_LOCK_ENTRIES, 2);
+}
+
+static void print_held_lock_trace(struct held_lock *hlock)
+{
+	stack_trace_print(hlock->entries, hlock->nr_entries, 6);
+}
+#else
+static void save_held_lock_trace(struct held_lock *hlock)
+{
+}
+
+static void print_held_lock_trace(struct held_lock *hlock)
+{
+}
+#endif
+
 unsigned int nr_hardirq_chains;
 unsigned int nr_softirq_chains;
 unsigned int nr_process_chains;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index eae4395..7082843 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1285,6 +1285,19 @@  config WW_MUTEX_SELFTEST
 	  Say M if you want these self tests to build as a module.
 	  Say N if you are unsure.
 
+config LOCKDEP_TRACE_HELD_LOCK
+	bool "Lock information: save and display stack trace of held locks"
+	depends on PROVE_LOCKING
+	default n
+	help
+	  This feature will save the stack trace when trying to acquire a lock
+	  and display the stack trace when a lockdep warning is reported. This
+	  might be helpful in analyzing lockdep's warnings.
+
+	  Enable this will increase runtime overhead and memory consumption.
+
+	  If unsure, say N.
+
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS