diff mbox

[Linux-v4.6-rc1] ext4: WARNING: CPU: 2 PID: 2692 at kernel/locking/lockdep.c:2017 __lock_acquire+0x180e/0x2260

Message ID 20160330093659.GS3408@twins.programming.kicks-ass.net (mailing list archive)
State New, archived
Headers show

Commit Message

Peter Zijlstra March 30, 2016, 9:36 a.m. UTC
On Tue, Mar 29, 2016 at 10:47:02AM +0200, Ingo Molnar wrote:

> > You are right; this is lockdep running into a hash collision; which is a new 
> > DEBUG_LOCKDEP test. See 9e4e7554e755 ("locking/lockdep: Detect chain_key 
> > collisions").
> 
> I've Cc:-ed Alfredo Alvarez Fernandez who added that test.

OK, so while the code in check_no_collision() seems sensible, it relies
on borken bits.

The whole chain_hlocks and /proc/lockdep_chains stuff appears to have
been buggered from the start.

The below patch should fix this.

Furthermore, our hash function has definite room for improvement.

---
 include/linux/lockdep.h       |  8 +++++---
 kernel/locking/lockdep.c      | 30 ++++++++++++++++++++++++------
 kernel/locking/lockdep_proc.c |  2 ++
 3 files changed, 31 insertions(+), 9 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Sedat Dilek March 30, 2016, 9:49 a.m. UTC | #1
On Wed, Mar 30, 2016 at 11:36 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, Mar 29, 2016 at 10:47:02AM +0200, Ingo Molnar wrote:
>
>> > You are right; this is lockdep running into a hash collision; which is a new
>> > DEBUG_LOCKDEP test. See 9e4e7554e755 ("locking/lockdep: Detect chain_key
>> > collisions").
>>
>> I've Cc:-ed Alfredo Alvarez Fernandez who added that test.
>
> OK, so while the code in check_no_collision() seems sensible, it relies
> on borken bits.
>
> The whole chain_hlocks and /proc/lockdep_chains stuff appears to have
> been buggered from the start.
>
> The below patch should fix this.
>

checkpatch.pl says...

WARNING: Prefer seq_puts to seq_printf
#124: FILE: kernel/locking/lockdep_proc.c:145:
+                       seq_printf(m, "(buggered) ");

Testing your patch right now.

- Sedat -


> Furthermore, our hash function has definite room for improvement.
>
> ---
>  include/linux/lockdep.h       |  8 +++++---
>  kernel/locking/lockdep.c      | 30 ++++++++++++++++++++++++------
>  kernel/locking/lockdep_proc.c |  2 ++
>  3 files changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index d026b190c530..2568c120513b 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -196,9 +196,11 @@ struct lock_list {
>   * We record lock dependency chains, so that we can cache them:
>   */
>  struct lock_chain {
> -       u8                              irq_context;
> -       u8                              depth;
> -       u16                             base;
> +       /* see BUILD_BUG_ON()s in lookup_chain_cache() */
> +       unsigned int                    irq_context :  2,
> +                                       depth       :  6,
> +                                       base        : 24;
> +       /* 4 byte hole */
>         struct hlist_node               entry;
>         u64                             chain_key;
>  };
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 53ab2f85d77e..91a4b7780afb 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -2099,15 +2099,37 @@ static inline int lookup_chain_cache(struct task_struct *curr,
>         chain->irq_context = hlock->irq_context;
>         i = get_first_held_lock(curr, hlock);
>         chain->depth = curr->lockdep_depth + 1 - i;
> +
> +       BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks));
> +       BUILD_BUG_ON((1UL << 6)  <= ARRAY_SIZE(curr->held_locks));
> +       BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
> +
>         if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
>                 chain->base = nr_chain_hlocks;
> -               nr_chain_hlocks += chain->depth;
>                 for (j = 0; j < chain->depth - 1; j++, i++) {
>                         int lock_id = curr->held_locks[i].class_idx - 1;
>                         chain_hlocks[chain->base + j] = lock_id;
>                 }
>                 chain_hlocks[chain->base + j] = class - lock_classes;
>         }
> +
> +       if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS)
> +               nr_chain_hlocks += chain->depth;
> +
> +#ifdef CONFIG_DEBUG_LOCKDEP
> +       /*
> +        * Important for check_no_collision().
> +        */
> +       if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
> +               if (debug_locks_off_graph_unlock())
> +                       return 0;
> +
> +               print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
> +               dump_stack();
> +               return 0;
> +       }
> +#endif
> +
>         hlist_add_head_rcu(&chain->entry, hash_head);
>         debug_atomic_inc(chain_lookup_misses);
>         inc_chains();
> @@ -2860,11 +2882,6 @@ static int separate_irq_context(struct task_struct *curr,
>  {
>         unsigned int depth = curr->lockdep_depth;
>
> -       /*
> -        * Keep track of points where we cross into an interrupt context:
> -        */
> -       hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
> -                               curr->softirq_context;
>         if (depth) {
>                 struct held_lock *prev_hlock;
>
> @@ -3164,6 +3181,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
>         hlock->acquire_ip = ip;
>         hlock->instance = lock;
>         hlock->nest_lock = nest_lock;
> +       hlock->irq_context = 2*(!!curr->hardirq_context) + !!curr->softirq_context;
>         hlock->trylock = trylock;
>         hlock->read = read;
>         hlock->check = check;
> diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
> index dbb61a302548..a0f61effad25 100644
> --- a/kernel/locking/lockdep_proc.c
> +++ b/kernel/locking/lockdep_proc.c
> @@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v)
>         int i;
>
>         if (v == SEQ_START_TOKEN) {
> +               if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
> +                       seq_printf(m, "(buggered) ");
>                 seq_printf(m, "all lock chains:\n");
>                 return 0;
>         }
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra March 30, 2016, 9:50 a.m. UTC | #2
On Wed, Mar 30, 2016 at 11:36:59AM +0200, Peter Zijlstra wrote:
> On Tue, Mar 29, 2016 at 10:47:02AM +0200, Ingo Molnar wrote:
> 
> > > You are right; this is lockdep running into a hash collision; which is a new 
> > > DEBUG_LOCKDEP test. See 9e4e7554e755 ("locking/lockdep: Detect chain_key 
> > > collisions").
> > 
> > I've Cc:-ed Alfredo Alvarez Fernandez who added that test.
> 
> OK, so while the code in check_no_collision() seems sensible, it relies
> on borken bits.
> 
> The whole chain_hlocks and /proc/lockdep_chains stuff appears to have
> been buggered from the start.
> 
> The below patch should fix this.

Note that unless we had more than 65536 chain_hlocks consumed the patch
would not make a difference.

> Furthermore, our hash function has definite room for improvement.

And no matter how good we make it, a u64 hash is bound to collide at
some point (or of any size really).

Also, we could make them non-fatal, returning true from
lookup_chain_cache() is always correct (_very_ expensive, but correct),
so in case of doubt we could just return true.


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Boqun Feng March 30, 2016, 9:59 a.m. UTC | #3
Hi Peter,

On Wed, Mar 30, 2016 at 11:36:59AM +0200, Peter Zijlstra wrote:
> On Tue, Mar 29, 2016 at 10:47:02AM +0200, Ingo Molnar wrote:
> 
> > > You are right; this is lockdep running into a hash collision; which is a new 
> > > DEBUG_LOCKDEP test. See 9e4e7554e755 ("locking/lockdep: Detect chain_key 
> > > collisions").
> > 
> > I've Cc:-ed Alfredo Alvarez Fernandez who added that test.
> 
> OK, so while the code in check_no_collision() seems sensible, it relies
> on borken bits.
> 
> The whole chain_hlocks and /proc/lockdep_chains stuff appears to have
> been buggered from the start.
> 
> The below patch should fix this.
> 
> Furthermore, our hash function has definite room for improvement.
> 
> ---
>  include/linux/lockdep.h       |  8 +++++---
>  kernel/locking/lockdep.c      | 30 ++++++++++++++++++++++++------
>  kernel/locking/lockdep_proc.c |  2 ++
>  3 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index d026b190c530..2568c120513b 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -196,9 +196,11 @@ struct lock_list {
>   * We record lock dependency chains, so that we can cache them:
>   */
>  struct lock_chain {
> -	u8				irq_context;
> -	u8				depth;
> -	u16				base;
> +	/* see BUILD_BUG_ON()s in lookup_chain_cache() */
> +	unsigned int			irq_context :  2,
> +					depth       :  6,
> +					base	    : 24;
> +	/* 4 byte hole */
>  	struct hlist_node		entry;
>  	u64				chain_key;
>  };
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 53ab2f85d77e..91a4b7780afb 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c

[...]

> @@ -2860,11 +2882,6 @@ static int separate_irq_context(struct task_struct *curr,
>  {
>  	unsigned int depth = curr->lockdep_depth;
>  
> -	/*
> -	 * Keep track of points where we cross into an interrupt context:
> -	 */
> -	hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
> -				curr->softirq_context;
>  	if (depth) {
>  		struct held_lock *prev_hlock;
>  
> @@ -3164,6 +3181,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
>  	hlock->acquire_ip = ip;
>  	hlock->instance = lock;
>  	hlock->nest_lock = nest_lock;
> +	hlock->irq_context = 2*(!!curr->hardirq_context) + !!curr->softirq_context;
>  	hlock->trylock = trylock;
>  	hlock->read = read;
>  	hlock->check = check;

This is just for cleaning up, right? However ->hardirq_context and
->softirq_context only defined when CONFIG_TRACE_IRQFLAGS=y.

So we should use macro like current_hardirq_context() here? Or
considering the two helpers introduced in my RFC:

http://lkml.kernel.org/g/1455602265-16490-2-git-send-email-boqun.feng@gmail.com

if you don't think that overkills ;-)

Regards,
Boqun

[...]
Peter Zijlstra March 30, 2016, 10:36 a.m. UTC | #4
On Wed, Mar 30, 2016 at 05:59:54PM +0800, Boqun Feng wrote:
> > @@ -3164,6 +3181,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
> >  	hlock->acquire_ip = ip;
> >  	hlock->instance = lock;
> >  	hlock->nest_lock = nest_lock;
> > +	hlock->irq_context = 2*(!!curr->hardirq_context) + !!curr->softirq_context;
> >  	hlock->trylock = trylock;
> >  	hlock->read = read;
> >  	hlock->check = check;
> 
> This is just for cleaning up, right? However ->hardirq_context and
> ->softirq_context only defined when CONFIG_TRACE_IRQFLAGS=y.

Ah, that is the reason it was in a 'funny' place.

The other reason is that we're careful to reduce hardirq_context to 0,1
but don't do so for softirq_context.

> So we should use macro like current_hardirq_context() here? Or
> considering the two helpers introduced in my RFC:
> 
> http://lkml.kernel.org/g/1455602265-16490-2-git-send-email-boqun.feng@gmail.com
> 
> if you don't think that overkills ;-)

Yeah, that might work, although I would like to keep the !! on both,
makes me worry less.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sedat Dilek March 30, 2016, 11:07 a.m. UTC | #5
On Wed, Mar 30, 2016 at 12:36 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Mar 30, 2016 at 05:59:54PM +0800, Boqun Feng wrote:
>> > @@ -3164,6 +3181,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
>> >     hlock->acquire_ip = ip;
>> >     hlock->instance = lock;
>> >     hlock->nest_lock = nest_lock;
>> > +   hlock->irq_context = 2*(!!curr->hardirq_context) + !!curr->softirq_context;
>> >     hlock->trylock = trylock;
>> >     hlock->read = read;
>> >     hlock->check = check;
>>
>> This is just for cleaning up, right? However ->hardirq_context and
>> ->softirq_context only defined when CONFIG_TRACE_IRQFLAGS=y.
>
> Ah, that is the reason it was in a 'funny' place.
>
> The other reason is that we're careful to reduce hardirq_context to 0,1
> but don't do so for softirq_context.
>
>> So we should use macro like current_hardirq_context() here? Or
>> considering the two helpers introduced in my RFC:
>>
>> http://lkml.kernel.org/g/1455602265-16490-2-git-send-email-boqun.feng@gmail.com
>>
>> if you don't think that overkills ;-)
>
> Yeah, that might work, although I would like to keep the !! on both,
> makes me worry less.

Can you CC me on any new patches in this area?

Thanks.

- Sedat -
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra March 30, 2016, 12:43 p.m. UTC | #6
On Wed, Mar 30, 2016 at 11:49:57AM +0200, Sedat Dilek wrote:
> On Wed, Mar 30, 2016 at 11:36 AM, Peter Zijlstra <peterz@infradead.org> wrote:

> > OK, so while the code in check_no_collision() seems sensible, it relies
> > on borken bits.
> >
> > The whole chain_hlocks and /proc/lockdep_chains stuff appears to have
> > been buggered from the start.
> >
> > The below patch should fix this.
> >
> 
> checkpatch.pl says...
> 
> WARNING: Prefer seq_puts to seq_printf
> #124: FILE: kernel/locking/lockdep_proc.c:145:
> +                       seq_printf(m, "(buggered) ");

Yeah, sod checkpatch ;-)

What's in your /proc/lockdep_stats file?
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sedat Dilek March 30, 2016, 12:46 p.m. UTC | #7
On Wed, Mar 30, 2016 at 2:43 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Mar 30, 2016 at 11:49:57AM +0200, Sedat Dilek wrote:
>> On Wed, Mar 30, 2016 at 11:36 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
>> > OK, so while the code in check_no_collision() seems sensible, it relies
>> > on borken bits.
>> >
>> > The whole chain_hlocks and /proc/lockdep_chains stuff appears to have
>> > been buggered from the start.
>> >
>> > The below patch should fix this.
>> >
>>
>> checkpatch.pl says...
>>
>> WARNING: Prefer seq_puts to seq_printf
>> #124: FILE: kernel/locking/lockdep_proc.c:145:
>> +                       seq_printf(m, "(buggered) ");
>
> Yeah, sod checkpatch ;-)
>
> What's in your /proc/lockdep_stats file?

Eat thiz!

$ sudo cat /proc/lockdep_stats
 lock-classes:                         2012 [max: 8191]
 direct dependencies:                  9638 [max: 32768]
 indirect dependencies:               39300
 all direct dependencies:            256286
 dependency chains:                   12869 [max: 65536]
 dependency chain hlocks:             49608 [max: 327680]
 in-hardirq chains:                     115
 in-softirq chains:                     458
 in-process chains:                   11504
 stack-trace entries:                154861 [max: 524288]
 combined max dependencies:       612572220
 hardirq-safe locks:                     61
 hardirq-unsafe locks:                 1032
 softirq-safe locks:                    169
 softirq-unsafe locks:                  949
 irq-safe locks:                        178
 irq-unsafe locks:                     1032
 hardirq-read-safe locks:                 4
 hardirq-read-unsafe locks:             226
 softirq-read-safe locks:                 8
 softirq-read-unsafe locks:             221
 irq-read-safe locks:                     9
 irq-read-unsafe locks:                 226
 uncategorized locks:                   216
 unused locks:                            0
 max locking depth:                      17
 max bfs queue depth:                   354
 chain lookup misses:                 12974
 chain lookup hits:                36326533
 cyclic checks:                       11430
 find-mask forwards checks:            3952
 find-mask backwards checks:          74700
 hardirq on events:                41715052
 hardirq off events:               41715056
 redundant hardirq ons:                 404
 redundant hardirq offs:           19500606
 softirq on events:                  220687
 softirq off events:                 220715
 redundant softirq ons:                   0
 redundant softirq offs:                  0
 debug_locks:                             1

- Sedat -
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Zijlstra March 30, 2016, 1:15 p.m. UTC | #8
On Wed, Mar 30, 2016 at 02:46:36PM +0200, Sedat Dilek wrote:
>  dependency chain hlocks:             49608 [max: 327680]

OK, so that is still below the u16 limit, so you're seeing an actual
hash collision and my patch will not cure that.

A different hash function _might_ help, but eventually this is an
unfixable problem. Our input space is (2^13)^48 = 2^(13*48) = 2^624 =
ff'n huge, reducing that to 2^64 is bound to generate a collision at some
point.

[ technically the 48 held_lock spots are not fully independent, so
  (2^13)^48 is slightly overestimating it, but the numbers are big
  enough for this to not matter much. ]


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d026b190c530..2568c120513b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -196,9 +196,11 @@  struct lock_list {
  * We record lock dependency chains, so that we can cache them:
  */
 struct lock_chain {
-	u8				irq_context;
-	u8				depth;
-	u16				base;
+	/* see BUILD_BUG_ON()s in lookup_chain_cache() */
+	unsigned int			irq_context :  2,
+					depth       :  6,
+					base	    : 24;
+	/* 4 byte hole */
 	struct hlist_node		entry;
 	u64				chain_key;
 };
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 53ab2f85d77e..91a4b7780afb 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2099,15 +2099,37 @@  static inline int lookup_chain_cache(struct task_struct *curr,
 	chain->irq_context = hlock->irq_context;
 	i = get_first_held_lock(curr, hlock);
 	chain->depth = curr->lockdep_depth + 1 - i;
+
+	BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks));
+	BUILD_BUG_ON((1UL << 6)  <= ARRAY_SIZE(curr->held_locks));
+	BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
+
 	if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
 		chain->base = nr_chain_hlocks;
-		nr_chain_hlocks += chain->depth;
 		for (j = 0; j < chain->depth - 1; j++, i++) {
 			int lock_id = curr->held_locks[i].class_idx - 1;
 			chain_hlocks[chain->base + j] = lock_id;
 		}
 		chain_hlocks[chain->base + j] = class - lock_classes;
 	}
+
+	if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS)
+		nr_chain_hlocks += chain->depth;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+	/*
+	 * Important for check_no_collision().
+	 */
+	if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
+		if (debug_locks_off_graph_unlock())
+			return 0;
+
+		print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
+		dump_stack();
+		return 0;
+	}
+#endif
+
 	hlist_add_head_rcu(&chain->entry, hash_head);
 	debug_atomic_inc(chain_lookup_misses);
 	inc_chains();
@@ -2860,11 +2882,6 @@  static int separate_irq_context(struct task_struct *curr,
 {
 	unsigned int depth = curr->lockdep_depth;
 
-	/*
-	 * Keep track of points where we cross into an interrupt context:
-	 */
-	hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
-				curr->softirq_context;
 	if (depth) {
 		struct held_lock *prev_hlock;
 
@@ -3164,6 +3181,7 @@  static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	hlock->acquire_ip = ip;
 	hlock->instance = lock;
 	hlock->nest_lock = nest_lock;
+	hlock->irq_context = 2*(!!curr->hardirq_context) + !!curr->softirq_context;
 	hlock->trylock = trylock;
 	hlock->read = read;
 	hlock->check = check;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index dbb61a302548..a0f61effad25 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -141,6 +141,8 @@  static int lc_show(struct seq_file *m, void *v)
 	int i;
 
 	if (v == SEQ_START_TOKEN) {
+		if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
+			seq_printf(m, "(buggered) ");
 		seq_printf(m, "all lock chains:\n");
 		return 0;
 	}