diff mbox series

mm: kmemleak: Turn kmemleak_lock to spin lock and RCU primitives

Message ID 1546612153-451172-1-git-send-email-zhe.he@windriver.com (mailing list archive)
State New, archived
Headers show
Series mm: kmemleak: Turn kmemleak_lock to spin lock and RCU primitives | expand

Commit Message

He Zhe Jan. 4, 2019, 2:29 p.m. UTC
From: He Zhe <zhe.he@windriver.com>

It's not necessary to keep consistency between readers and writers of
kmemleak_lock. RCU is more proper for this case. And in order to gain better
performance, we turn the reader locks to RCU read locks and writer locks to
normal spin locks.

"time echo scan > /sys/kernel/debug/kmemleak" is improved from around 1.010s to
0.475s, without lock debug options, tested on Intel Corporation Broadwell Client
platform/Basking Ridge.

spin_lock_nested is replaced with irqsave version since the original outside
irqsave lock is gone. Otherwise we might have the following potential deadlock,
reported by lockdep.

WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
4.20.0-standard #1 Not tainted
-----------------------------------------------------
kmemleak/163 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
000000008d7de78e (&(&object->lock)->rlock/1){+.+.}, at: scan_block+0xc4/0x1e0

and this task is already holding:
000000009178399c (&(&object->lock)->rlock){..-.}, at: scan_gray_list+0xec/0x180
which would create a new lock dependency:
 (&(&object->lock)->rlock){..-.} -> (&(&object->lock)->rlock/1){+.+.}

but this new dependency connects a HARDIRQ-irq-safe lock:
 (&(&ehci->lock)->rlock){-.-.}

snip

       CPU0                    CPU1
       ----                    ----
  lock(&(&object->lock)->rlock/1);
                               local_irq_disable();
                               lock(&(&ehci->lock)->rlock);
                               lock(&(&object->lock)->rlock);
  <Interrupt>
    lock(&(&ehci->lock)->rlock);

Signed-off-by: He Zhe <zhe.he@windriver.com>
Cc: catalin.marinas@arm.com
---
 mm/kmemleak.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

Comments

Catalin Marinas Jan. 4, 2019, 6:37 p.m. UTC | #1
On Fri, Jan 04, 2019 at 10:29:13PM +0800, zhe.he@windriver.com wrote:
> It's not necessary to keep consistency between readers and writers of
> kmemleak_lock. RCU is more proper for this case. And in order to gain better
> performance, we turn the reader locks to RCU read locks and writer locks to
> normal spin locks.

This won't work.

> @@ -515,9 +515,7 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
>  	struct kmemleak_object *object;
>  
>  	rcu_read_lock();
> -	read_lock_irqsave(&kmemleak_lock, flags);
>  	object = lookup_object(ptr, alias);
> -	read_unlock_irqrestore(&kmemleak_lock, flags);

The comment on lookup_object() states that the kmemleak_lock must be
held. That's because we don't have an RCU-like mechanism for removing
removing objects from the object_tree_root:

>  
>  	/* check whether the object is still available */
>  	if (object && !get_object(object))
> @@ -537,13 +535,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
>  	unsigned long flags;
>  	struct kmemleak_object *object;
>  
> -	write_lock_irqsave(&kmemleak_lock, flags);
> +	spin_lock_irqsave(&kmemleak_lock, flags);
>  	object = lookup_object(ptr, alias);
>  	if (object) {
>  		rb_erase(&object->rb_node, &object_tree_root);
>  		list_del_rcu(&object->object_list);
>  	}
> -	write_unlock_irqrestore(&kmemleak_lock, flags);
> +	spin_unlock_irqrestore(&kmemleak_lock, flags);

So here, while list removal is RCU-safe, rb_erase() is not.

If you have time to implement an rb_erase_rcu(), than we could reduce
the locking in kmemleak.
He Zhe Jan. 7, 2019, 7:31 a.m. UTC | #2
On 1/5/19 2:37 AM, Catalin Marinas wrote:
> On Fri, Jan 04, 2019 at 10:29:13PM +0800, zhe.he@windriver.com wrote:
>> It's not necessary to keep consistency between readers and writers of
>> kmemleak_lock. RCU is more proper for this case. And in order to gain better
>> performance, we turn the reader locks to RCU read locks and writer locks to
>> normal spin locks.
> This won't work.
>
>> @@ -515,9 +515,7 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
>>  	struct kmemleak_object *object;
>>  
>>  	rcu_read_lock();
>> -	read_lock_irqsave(&kmemleak_lock, flags);
>>  	object = lookup_object(ptr, alias);
>> -	read_unlock_irqrestore(&kmemleak_lock, flags);
> The comment on lookup_object() states that the kmemleak_lock must be
> held. That's because we don't have an RCU-like mechanism for removing
> removing objects from the object_tree_root:
>
>>  
>>  	/* check whether the object is still available */
>>  	if (object && !get_object(object))
>> @@ -537,13 +535,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
>>  	unsigned long flags;
>>  	struct kmemleak_object *object;
>>  
>> -	write_lock_irqsave(&kmemleak_lock, flags);
>> +	spin_lock_irqsave(&kmemleak_lock, flags);
>>  	object = lookup_object(ptr, alias);
>>  	if (object) {
>>  		rb_erase(&object->rb_node, &object_tree_root);
>>  		list_del_rcu(&object->object_list);
>>  	}
>> -	write_unlock_irqrestore(&kmemleak_lock, flags);
>> +	spin_unlock_irqrestore(&kmemleak_lock, flags);
> So here, while list removal is RCU-safe, rb_erase() is not.
>
> If you have time to implement an rb_erase_rcu(), than we could reduce
> the locking in kmemleak.

Thanks, I really neglected that rb_erase is not RCU-safe here.

I'm not sure if it is practically possible to implement rb_erase_rcu. Here
is my concern:
In the code paths starting from rb_erase, the tree is tweaked at many
places, in both __rb_erase_augmented and ____rb_erase_color. To my
understanding, there are many intermediate versions of the tree
during the erasion. In some of the versions, the tree is incomplete, i.e.
some nodes(not the one to be deleted) are invisible to readers. I'm not
sure if this is acceptable as an RCU implementation. Does it mean we
need to form a rb_erase_rcu from scratch?

And are there any other concerns about this attempt?

Let me add RCU supporters Paul and Josh here. Your advice would be
highly appreciated.

Thanks,
Zhe


>
Catalin Marinas Jan. 7, 2019, 10:10 a.m. UTC | #3
On Mon, Jan 07, 2019 at 03:31:18PM +0800, He Zhe wrote:
> On 1/5/19 2:37 AM, Catalin Marinas wrote:
> > On Fri, Jan 04, 2019 at 10:29:13PM +0800, zhe.he@windriver.com wrote:
> >> It's not necessary to keep consistency between readers and writers of
> >> kmemleak_lock. RCU is more proper for this case. And in order to gain better
> >> performance, we turn the reader locks to RCU read locks and writer locks to
> >> normal spin locks.
> > This won't work.
> >
> >> @@ -515,9 +515,7 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
> >>  	struct kmemleak_object *object;
> >>  
> >>  	rcu_read_lock();
> >> -	read_lock_irqsave(&kmemleak_lock, flags);
> >>  	object = lookup_object(ptr, alias);
> >> -	read_unlock_irqrestore(&kmemleak_lock, flags);
> > The comment on lookup_object() states that the kmemleak_lock must be
> > held. That's because we don't have an RCU-like mechanism for removing
> > removing objects from the object_tree_root:
> >
> >>  
> >>  	/* check whether the object is still available */
> >>  	if (object && !get_object(object))
> >> @@ -537,13 +535,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
> >>  	unsigned long flags;
> >>  	struct kmemleak_object *object;
> >>  
> >> -	write_lock_irqsave(&kmemleak_lock, flags);
> >> +	spin_lock_irqsave(&kmemleak_lock, flags);
> >>  	object = lookup_object(ptr, alias);
> >>  	if (object) {
> >>  		rb_erase(&object->rb_node, &object_tree_root);
> >>  		list_del_rcu(&object->object_list);
> >>  	}
> >> -	write_unlock_irqrestore(&kmemleak_lock, flags);
> >> +	spin_unlock_irqrestore(&kmemleak_lock, flags);
> > So here, while list removal is RCU-safe, rb_erase() is not.
> >
> > If you have time to implement an rb_erase_rcu(), than we could reduce
> > the locking in kmemleak.
> 
> Thanks, I really neglected that rb_erase is not RCU-safe here.
> 
> I'm not sure if it is practically possible to implement rb_erase_rcu. Here
> is my concern:
> In the code paths starting from rb_erase, the tree is tweaked at many
> places, in both __rb_erase_augmented and ____rb_erase_color. To my
> understanding, there are many intermediate versions of the tree
> during the erasion. In some of the versions, the tree is incomplete, i.e.
> some nodes(not the one to be deleted) are invisible to readers. I'm not
> sure if this is acceptable as an RCU implementation. Does it mean we
> need to form a rb_erase_rcu from scratch?

If it's possible, I think it would help. I had a quick look as well but
as it seemed non-trivial, I moved on to something else.

> And are there any other concerns about this attempt?

No concerns if it's possible at all. In the meantime, you could try to
replace the rw_lock with a classic spinlock. There was a thread recently
and I concluded that the rw_lock is no longer necessary as we don't have
multiple readers contention.

Yet another improvement could be to drop the kmemleak_object.lock
entirely and just rely on the main kmemleak_lock. I don't think the
fine-grained locking saves us much as in most cases where it acquires
the object->lock it already holds (or may have acquired/released) the
kmemleak_lock.

Note that even if we have an rb_erase_rcu(), we'd still need to acquire
the object->lock to prevent the scanned block being de-allocated
(unmapped in the case of vmalloc()). So if we manage with a single
kmemleak_lock (spin_lock_t), it may give a similar performance boost to
what you've got without kmemleak_lock.

FTR, the original aim of RCU grace period in kmemleak was to avoid a
recursive call into the slab freeing code; it later came in handy for
some list traversal.
Paul E. McKenney Jan. 7, 2019, 6:59 p.m. UTC | #4
On Mon, Jan 07, 2019 at 03:31:18PM +0800, He Zhe wrote:
> 
> 
> On 1/5/19 2:37 AM, Catalin Marinas wrote:
> > On Fri, Jan 04, 2019 at 10:29:13PM +0800, zhe.he@windriver.com wrote:
> >> It's not necessary to keep consistency between readers and writers of
> >> kmemleak_lock. RCU is more proper for this case. And in order to gain better
> >> performance, we turn the reader locks to RCU read locks and writer locks to
> >> normal spin locks.
> > This won't work.
> >
> >> @@ -515,9 +515,7 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
> >>  	struct kmemleak_object *object;
> >>  
> >>  	rcu_read_lock();
> >> -	read_lock_irqsave(&kmemleak_lock, flags);
> >>  	object = lookup_object(ptr, alias);
> >> -	read_unlock_irqrestore(&kmemleak_lock, flags);
> > The comment on lookup_object() states that the kmemleak_lock must be
> > held. That's because we don't have an RCU-like mechanism for removing
> > removing objects from the object_tree_root:
> >
> >>  
> >>  	/* check whether the object is still available */
> >>  	if (object && !get_object(object))
> >> @@ -537,13 +535,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
> >>  	unsigned long flags;
> >>  	struct kmemleak_object *object;
> >>  
> >> -	write_lock_irqsave(&kmemleak_lock, flags);
> >> +	spin_lock_irqsave(&kmemleak_lock, flags);
> >>  	object = lookup_object(ptr, alias);
> >>  	if (object) {
> >>  		rb_erase(&object->rb_node, &object_tree_root);
> >>  		list_del_rcu(&object->object_list);
> >>  	}
> >> -	write_unlock_irqrestore(&kmemleak_lock, flags);
> >> +	spin_unlock_irqrestore(&kmemleak_lock, flags);
> > So here, while list removal is RCU-safe, rb_erase() is not.
> >
> > If you have time to implement an rb_erase_rcu(), than we could reduce
> > the locking in kmemleak.
> 
> Thanks, I really neglected that rb_erase is not RCU-safe here.
> 
> I'm not sure if it is practically possible to implement rb_erase_rcu. Here
> is my concern:
> In the code paths starting from rb_erase, the tree is tweaked at many
> places, in both __rb_erase_augmented and ____rb_erase_color. To my
> understanding, there are many intermediate versions of the tree
> during the erasion. In some of the versions, the tree is incomplete, i.e.
> some nodes(not the one to be deleted) are invisible to readers. I'm not
> sure if this is acceptable as an RCU implementation. Does it mean we
> need to form a rb_erase_rcu from scratch?
> 
> And are there any other concerns about this attempt?
> 
> Let me add RCU supporters Paul and Josh here. Your advice would be
> highly appreciated.

If updates and rebalancing are handled carefully, it can be made to work.
The classic paper by Kung and Lehman covers the rebalancing issues:
http://www.eecs.harvard.edu/~htk/publication/1980-tods-kung-lehman.pdf

							Thanx, Paul
diff mbox series

Patch

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index f9d9dc2..ef9ea00 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -26,7 +26,7 @@ 
  *
  * The following locks and mutexes are used by kmemleak:
  *
- * - kmemleak_lock (rwlock): protects the object_list modifications and
+ * - kmemleak_lock (spinlock): protects the object_list modifications and
  *   accesses to the object_tree_root. The object_list is the main list
  *   holding the metadata (struct kmemleak_object) for the allocated memory
  *   blocks. The object_tree_root is a red black tree used to look-up
@@ -199,7 +199,7 @@  static LIST_HEAD(gray_list);
 /* search tree for object boundaries */
 static struct rb_root object_tree_root = RB_ROOT;
 /* rw_lock protecting the access to object_list and object_tree_root */
-static DEFINE_RWLOCK(kmemleak_lock);
+static DEFINE_SPINLOCK(kmemleak_lock);
 
 /* allocation caches for kmemleak internal data */
 static struct kmem_cache *object_cache;
@@ -515,9 +515,7 @@  static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
 	struct kmemleak_object *object;
 
 	rcu_read_lock();
-	read_lock_irqsave(&kmemleak_lock, flags);
 	object = lookup_object(ptr, alias);
-	read_unlock_irqrestore(&kmemleak_lock, flags);
 
 	/* check whether the object is still available */
 	if (object && !get_object(object))
@@ -537,13 +535,13 @@  static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
 	unsigned long flags;
 	struct kmemleak_object *object;
 
-	write_lock_irqsave(&kmemleak_lock, flags);
+	spin_lock_irqsave(&kmemleak_lock, flags);
 	object = lookup_object(ptr, alias);
 	if (object) {
 		rb_erase(&object->rb_node, &object_tree_root);
 		list_del_rcu(&object->object_list);
 	}
-	write_unlock_irqrestore(&kmemleak_lock, flags);
+	spin_unlock_irqrestore(&kmemleak_lock, flags);
 
 	return object;
 }
@@ -617,7 +615,7 @@  static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
 	/* kernel backtrace */
 	object->trace_len = __save_stack_trace(object->trace);
 
-	write_lock_irqsave(&kmemleak_lock, flags);
+	spin_lock_irqsave(&kmemleak_lock, flags);
 
 	min_addr = min(min_addr, ptr);
 	max_addr = max(max_addr, ptr + size);
@@ -648,7 +646,7 @@  static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
 
 	list_add_tail_rcu(&object->object_list, &object_list);
 out:
-	write_unlock_irqrestore(&kmemleak_lock, flags);
+	spin_unlock_irqrestore(&kmemleak_lock, flags);
 	return object;
 }
 
@@ -1334,7 +1332,7 @@  static void scan_block(void *_start, void *_end,
 	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 	unsigned long flags;
 
-	read_lock_irqsave(&kmemleak_lock, flags);
+	rcu_read_lock();
 	for (ptr = start; ptr < end; ptr++) {
 		struct kmemleak_object *object;
 		unsigned long pointer;
@@ -1350,14 +1348,8 @@  static void scan_block(void *_start, void *_end,
 		if (pointer < min_addr || pointer >= max_addr)
 			continue;
 
-		/*
-		 * No need for get_object() here since we hold kmemleak_lock.
-		 * object->use_count cannot be dropped to 0 while the object
-		 * is still present in object_tree_root and object_list
-		 * (with updates protected by kmemleak_lock).
-		 */
 		object = lookup_object(pointer, 1);
-		if (!object)
+		if (!object || !get_object(object))
 			continue;
 		if (object == scanned)
 			/* self referenced, ignore */
@@ -1368,7 +1360,8 @@  static void scan_block(void *_start, void *_end,
 		 * previously acquired in scan_object(). These locks are
 		 * enclosed by scan_mutex.
 		 */
-		spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+		spin_lock_irqsave_nested(&object->lock, flags,
+					 SINGLE_DEPTH_NESTING);
 		/* only pass surplus references (object already gray) */
 		if (color_gray(object)) {
 			excess_ref = object->excess_ref;
@@ -1377,21 +1370,22 @@  static void scan_block(void *_start, void *_end,
 			excess_ref = 0;
 			update_refs(object);
 		}
-		spin_unlock(&object->lock);
+		spin_unlock_irqrestore(&object->lock, flags);
 
 		if (excess_ref) {
 			object = lookup_object(excess_ref, 0);
-			if (!object)
+			if (!object || !get_object(object))
 				continue;
 			if (object == scanned)
 				/* circular reference, ignore */
 				continue;
-			spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+			spin_lock_irqsave_nested(&object->lock, flags,
+						 SINGLE_DEPTH_NESTING);
 			update_refs(object);
-			spin_unlock(&object->lock);
+			spin_unlock_irqrestore(&object->lock, flags);
 		}
 	}
-	read_unlock_irqrestore(&kmemleak_lock, flags);
+	rcu_read_unlock();
 }
 
 /*