diff mbox series

[1/2] kmemleak: enable tracking for percpu pointers

Message ID 20240725041223.872472-2-ptikhomirov@virtuozzo.com (mailing list archive)
State New
Headers show
Series kmemleak: support for percpu memory leak detect | expand

Commit Message

Pavel Tikhomirov July 25, 2024, 4:12 a.m. UTC
This basically does:

- Add min_percpu_addr and max_percpu_addr to filter out unrelated data
similar to min_addr and max_addr;
- Set min_count for percpu pointers to 1 to start tracking them;
- Calculate checksum of percpu area as xor of crc32 for each cpu;
- Split pointer lookup code into separate helper and use it twice: once
as if the pointer is a virtual pointer and once as if it's percpu.

CC: Wei Yongjun <weiyongjun1@huawei.com>
CC: Chen Jun <chenjun102@huawei.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
note: excess_ref can only be a virtual pointer at least for now
---
 mm/kmemleak.c | 153 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 94 insertions(+), 59 deletions(-)

Comments

Catalin Marinas July 30, 2024, 2:15 p.m. UTC | #1
Hi Pavel,

On Thu, Jul 25, 2024 at 12:12:15PM +0800, Pavel Tikhomirov wrote:
> @@ -1308,12 +1319,23 @@ static bool update_checksum(struct kmemleak_object *object)
>  {
>  	u32 old_csum = object->checksum;
>  
> -	if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
> +	if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
>  		return false;
>  
>  	kasan_disable_current();
>  	kcsan_disable_current();
> -	object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
> +	if (object->flags & OBJECT_PERCPU) {
> +		unsigned int cpu;
> +
> +		object->checksum = 0;
> +		for_each_possible_cpu(cpu) {
> +			void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu);
> +
> +			object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size);
> +		}

Slight worry this may take too long for large-ish objects with a large
number of CPUs. But we can revisit if anyone complains.

> +	} else {
> +		object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
> +	}
>  	kasan_enable_current();
>  	kcsan_enable_current();
>  
> @@ -1365,6 +1387,64 @@ static int scan_should_stop(void)
>  	return 0;
>  }
>  
> +static void scan_pointer(struct kmemleak_object *scanned,
> +			 unsigned long pointer, unsigned int objflags)

Nitpick: I'd have called this lookup_pointer or something like that.
When I first saw it, I tried to figure out why it doesn't have a size
argument but it became clear that it's not actually scanning/reading the
location at 'pointer' but simply looking the value up in various trees.
Up to you if you want to change the name to something else. The patch
looks good.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>

Thanks.
Pavel Tikhomirov July 31, 2024, 2:30 a.m. UTC | #2
On 7/30/24 22:15, Catalin Marinas wrote:
> Hi Pavel,
> 
> On Thu, Jul 25, 2024 at 12:12:15PM +0800, Pavel Tikhomirov wrote:
>> @@ -1308,12 +1319,23 @@ static bool update_checksum(struct kmemleak_object *object)
>>   {
>>   	u32 old_csum = object->checksum;
>>   
>> -	if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
>> +	if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
>>   		return false;
>>   
>>   	kasan_disable_current();
>>   	kcsan_disable_current();
>> -	object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
>> +	if (object->flags & OBJECT_PERCPU) {
>> +		unsigned int cpu;
>> +
>> +		object->checksum = 0;
>> +		for_each_possible_cpu(cpu) {
>> +			void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu);
>> +
>> +			object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size);
>> +		}
> 
> Slight worry this may take too long for large-ish objects with a large
> number of CPUs. But we can revisit if anyone complains.
> 
>> +	} else {
>> +		object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
>> +	}
>>   	kasan_enable_current();
>>   	kcsan_enable_current();
>>   
>> @@ -1365,6 +1387,64 @@ static int scan_should_stop(void)
>>   	return 0;
>>   }
>>   
>> +static void scan_pointer(struct kmemleak_object *scanned,
>> +			 unsigned long pointer, unsigned int objflags)
> 
> Nitpick: I'd have called this lookup_pointer or something like that.
> When I first saw it, I tried to figure out why it doesn't have a size
> argument but it became clear that it's not actually scanning/reading the
> location at 'pointer' but simply looking the value up in various trees.
> Up to you if you want to change the name to something else. The patch
> looks good.

Yes, I agree "scan_" is a bit too confusing. Let's try 
"pointer_update_refs", as it is more self-explanatory, and represents 
what the function does. The "lookup_pointer" seems like a function which 
should return a pointer, e.g. like "lookup_object" returns an object, 
but we can always switch to "lookup_pointer" if you like.

I will send v2 with this change shortly.

> 
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> 
> Thanks.
>
diff mbox series

Patch

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 6a540c2b27c52..7aa5727aff330 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -230,6 +230,10 @@  static int kmemleak_error;
 static unsigned long min_addr = ULONG_MAX;
 static unsigned long max_addr;
 
+/* minimum and maximum address that may be valid per-CPU pointers */
+static unsigned long min_percpu_addr = ULONG_MAX;
+static unsigned long max_percpu_addr;
+
 static struct task_struct *scan_thread;
 /* used to avoid reporting of recently allocated objects */
 static unsigned long jiffies_min_age;
@@ -300,13 +304,20 @@  static void hex_dump_object(struct seq_file *seq,
 	const u8 *ptr = (const u8 *)object->pointer;
 	size_t len;
 
-	if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
+	if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
 		return;
 
+	if (object->flags & OBJECT_PERCPU)
+		ptr = (const u8 *)this_cpu_ptr((void __percpu *)object->pointer);
+
 	/* limit the number of lines to HEX_MAX_LINES */
 	len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
 
-	warn_or_seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
+	if (object->flags & OBJECT_PERCPU)
+		warn_or_seq_printf(seq, "  hex dump (first %zu bytes on cpu %d):\n",
+				   len, raw_smp_processor_id());
+	else
+		warn_or_seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
 	kasan_disable_current();
 	warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE,
 			     HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII);
@@ -700,10 +711,14 @@  static int __link_object(struct kmemleak_object *object, unsigned long ptr,
 
 	untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
 	/*
-	 * Only update min_addr and max_addr with object
-	 * storing virtual address.
+	 * Only update min_addr and max_addr with object storing virtual
+	 * address. And update min_percpu_addr max_percpu_addr for per-CPU
+	 * objects.
 	 */
-	if (!(objflags & (OBJECT_PHYS | OBJECT_PERCPU))) {
+	if (objflags & OBJECT_PERCPU) {
+		min_percpu_addr = min(min_percpu_addr, untagged_ptr);
+		max_percpu_addr = max(max_percpu_addr, untagged_ptr + size);
+	} else if (!(objflags & OBJECT_PHYS)) {
 		min_addr = min(min_addr, untagged_ptr);
 		max_addr = max(max_addr, untagged_ptr + size);
 	}
@@ -1059,12 +1074,8 @@  void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
 {
 	pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size);
 
-	/*
-	 * Percpu allocations are only scanned and not reported as leaks
-	 * (min_count is set to 0).
-	 */
 	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
-		create_object_percpu((unsigned long)ptr, size, 0, gfp);
+		create_object_percpu((unsigned long)ptr, size, 1, gfp);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
 
@@ -1308,12 +1319,23 @@  static bool update_checksum(struct kmemleak_object *object)
 {
 	u32 old_csum = object->checksum;
 
-	if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
+	if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
 		return false;
 
 	kasan_disable_current();
 	kcsan_disable_current();
-	object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
+	if (object->flags & OBJECT_PERCPU) {
+		unsigned int cpu;
+
+		object->checksum = 0;
+		for_each_possible_cpu(cpu) {
+			void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu);
+
+			object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size);
+		}
+	} else {
+		object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
+	}
 	kasan_enable_current();
 	kcsan_enable_current();
 
@@ -1365,6 +1387,64 @@  static int scan_should_stop(void)
 	return 0;
 }
 
+static void scan_pointer(struct kmemleak_object *scanned,
+			 unsigned long pointer, unsigned int objflags)
+{
+	struct kmemleak_object *object;
+	unsigned long untagged_ptr;
+	unsigned long excess_ref;
+
+	untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
+	if (objflags & OBJECT_PERCPU) {
+		if (untagged_ptr < min_percpu_addr || untagged_ptr >= max_percpu_addr)
+			return;
+	} else {
+		if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
+			return;
+	}
+
+	/*
+	 * No need for get_object() here since we hold kmemleak_lock.
+	 * object->use_count cannot be dropped to 0 while the object
+	 * is still present in object_tree_root and object_list
+	 * (with updates protected by kmemleak_lock).
+	 */
+	object = __lookup_object(pointer, 1, objflags);
+	if (!object)
+		return;
+	if (object == scanned)
+		/* self referenced, ignore */
+		return;
+
+	/*
+	 * Avoid the lockdep recursive warning on object->lock being
+	 * previously acquired in scan_object(). These locks are
+	 * enclosed by scan_mutex.
+	 */
+	raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+	/* only pass surplus references (object already gray) */
+	if (color_gray(object)) {
+		excess_ref = object->excess_ref;
+		/* no need for update_refs() if object already gray */
+	} else {
+		excess_ref = 0;
+		update_refs(object);
+	}
+	raw_spin_unlock(&object->lock);
+
+	if (excess_ref) {
+		object = lookup_object(excess_ref, 0);
+		if (!object)
+			return;
+		if (object == scanned)
+			/* circular reference, ignore */
+			return;
+		raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
+		update_refs(object);
+		raw_spin_unlock(&object->lock);
+	}
+}
+
 /*
  * Scan a memory block (exclusive range) for valid pointers and add those
  * found to the gray list.
@@ -1376,13 +1456,10 @@  static void scan_block(void *_start, void *_end,
 	unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
 	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 	unsigned long flags;
-	unsigned long untagged_ptr;
 
 	raw_spin_lock_irqsave(&kmemleak_lock, flags);
 	for (ptr = start; ptr < end; ptr++) {
-		struct kmemleak_object *object;
 		unsigned long pointer;
-		unsigned long excess_ref;
 
 		if (scan_should_stop())
 			break;
@@ -1391,50 +1468,8 @@  static void scan_block(void *_start, void *_end,
 		pointer = *(unsigned long *)kasan_reset_tag((void *)ptr);
 		kasan_enable_current();
 
-		untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
-		if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
-			continue;
-
-		/*
-		 * No need for get_object() here since we hold kmemleak_lock.
-		 * object->use_count cannot be dropped to 0 while the object
-		 * is still present in object_tree_root and object_list
-		 * (with updates protected by kmemleak_lock).
-		 */
-		object = lookup_object(pointer, 1);
-		if (!object)
-			continue;
-		if (object == scanned)
-			/* self referenced, ignore */
-			continue;
-
-		/*
-		 * Avoid the lockdep recursive warning on object->lock being
-		 * previously acquired in scan_object(). These locks are
-		 * enclosed by scan_mutex.
-		 */
-		raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
-		/* only pass surplus references (object already gray) */
-		if (color_gray(object)) {
-			excess_ref = object->excess_ref;
-			/* no need for update_refs() if object already gray */
-		} else {
-			excess_ref = 0;
-			update_refs(object);
-		}
-		raw_spin_unlock(&object->lock);
-
-		if (excess_ref) {
-			object = lookup_object(excess_ref, 0);
-			if (!object)
-				continue;
-			if (object == scanned)
-				/* circular reference, ignore */
-				continue;
-			raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
-			update_refs(object);
-			raw_spin_unlock(&object->lock);
-		}
+		scan_pointer(scanned, pointer, 0);
+		scan_pointer(scanned, pointer, OBJECT_PERCPU);
 	}
 	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
 }