diff mbox series

[v6,2/8] lib/ref_tracker: improve printing stats

Message ID 20230224-track_gt-v6-2-0dc8601fd02f@intel.com (mailing list archive)
State Not Applicable
Headers show
Series drm/i915: use ref_tracker library for tracking wakerefs | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Andrzej Hajda March 29, 2023, 7:24 a.m. UTC
In case the library is tracking busy subsystem, simply
printing stack for every active reference will spam log
with long, hard to read, redundant stack traces. To improve
readabilty following changes have been made:
- reports are printed per stack_handle - log is more compact,
- added display name for ref_tracker_dir - it will differentiate
  multiple subsystems,
- stack trace is printed indented, in the same printk call,
- info about dropped references is printed as well.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
---
 include/linux/ref_tracker.h | 15 ++++++--
 lib/ref_tracker.c           | 90 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 91 insertions(+), 14 deletions(-)

Comments

Andi Shyti March 29, 2023, 12:51 p.m. UTC | #1
Hi Andrzej,

[...]

> -void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
> -				  unsigned int display_limit)
> +struct ref_tracker_dir_stats {
> +	int total;
> +	int count;
> +	struct {
> +		depot_stack_handle_t stack_handle;
> +		unsigned int count;
> +	} stacks[];
> +};
> +
> +static struct ref_tracker_dir_stats *
> +ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
>  {
> +	struct ref_tracker_dir_stats *stats;
>  	struct ref_tracker *tracker;
> -	unsigned int i = 0;
>  
> -	lockdep_assert_held(&dir->lock);
> +	stats = kmalloc(struct_size(stats, stacks, limit),
> +			GFP_NOWAIT | __GFP_NOWARN);
> +	if (!stats)
> +		return ERR_PTR(-ENOMEM);
> +	stats->total = 0;
> +	stats->count = 0;
>  
>  	list_for_each_entry(tracker, &dir->list, head) {
> -		if (i < display_limit) {
> -			pr_err("leaked reference.\n");
> -			if (tracker->alloc_stack_handle)
> -				stack_depot_print(tracker->alloc_stack_handle);
> -			i++;
> -		} else {
> -			break;
> +		depot_stack_handle_t stack = tracker->alloc_stack_handle;
> +		int i;
> +
> +		++stats->total;
> +		for (i = 0; i < stats->count; ++i)
> +			if (stats->stacks[i].stack_handle == stack)
> +				break;
> +		if (i >= limit)
> +			continue;
> +		if (i >= stats->count) {
> +			stats->stacks[i].stack_handle = stack;
> +			stats->stacks[i].count = 0;
> +			++stats->count;
>  		}
> +		++stats->stacks[i].count;
> +	}
> +
> +	return stats;
> +}
> +
> +void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
> +				  unsigned int display_limit)
> +{
> +	struct ref_tracker_dir_stats *stats;
> +	unsigned int i = 0, skipped;
> +	depot_stack_handle_t stack;
> +	char *sbuf;
> +
> +	lockdep_assert_held(&dir->lock);
> +
> +	if (list_empty(&dir->list))
> +		return;
> +
> +	stats = ref_tracker_get_stats(dir, display_limit);
> +	if (IS_ERR(stats)) {
> +		pr_err("%s@%pK: couldn't get stats, error %pe\n",
> +		       dir->name, dir, stats);
> +		return;
>  	}
> +
> +	sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
> +
> +	for (i = 0, skipped = stats->total; i < stats->count; ++i) {
> +		stack = stats->stacks[i].stack_handle;
> +		if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
> +			sbuf[0] = 0;
> +		pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
> +		       stats->stacks[i].count, stats->total, sbuf);
> +		skipped -= stats->stacks[i].count;
> +	}
> +
> +	if (skipped)
> +		pr_err("%s@%pK skipped reports about %d/%d users.\n",
> +		       dir->name, dir, skipped, stats->total);
> +
> +	kfree(sbuf);
> +
> +	kfree(stats);

There's a chance of confusion here because
ref_tracker_get_stats() might need a ref_tracker_put_stats() to
go with it.

When you allocate in one function and free in another without a
clear pair (get/put, alloc/free, etc.), it can be hard to notice
and could lead to mistakes.

But in this simple situation, it's not a big problem, and I'm not
sure if having the put side is really needed.

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> 

Thanks,
Andi

>  }
>  EXPORT_SYMBOL(ref_tracker_dir_print_locked);
>  
> 
> -- 
> 2.34.1
diff mbox series

Patch

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 87a92f2bec1b88..fc9ef9952f01fd 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -17,12 +17,19 @@  struct ref_tracker_dir {
 	bool			dead;
 	struct list_head	list; /* List of active trackers */
 	struct list_head	quarantine; /* List of dead trackers */
+	char			name[32];
 #endif
 };
 
 #ifdef CONFIG_REF_TRACKER
-static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-					unsigned int quarantine_count)
+
+/* Temporary allow two and three arguments, until consumers are converted */
+#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d)
+#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n)
+
+static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count,
+					const char *name)
 {
 	INIT_LIST_HEAD(&dir->list);
 	INIT_LIST_HEAD(&dir->quarantine);
@@ -31,6 +38,7 @@  static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 	dir->dead = false;
 	refcount_set(&dir->untracked, 1);
 	refcount_set(&dir->no_tracker, 1);
+	strlcpy(dir->name, name, sizeof(dir->name));
 	stack_depot_init();
 }
 
@@ -51,7 +59,8 @@  int ref_tracker_free(struct ref_tracker_dir *dir,
 #else /* CONFIG_REF_TRACKER */
 
 static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-					unsigned int quarantine_count)
+					unsigned int quarantine_count,
+					...)
 {
 }
 
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index d4eb0929af8f96..2ffe79c90c1771 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -1,11 +1,16 @@ 
 // SPDX-License-Identifier: GPL-2.0-or-later
+
+#define pr_fmt(fmt) "ref_tracker: " fmt
+
 #include <linux/export.h>
+#include <linux/list_sort.h>
 #include <linux/ref_tracker.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/stackdepot.h>
 
 #define REF_TRACKER_STACK_ENTRIES 16
+#define STACK_BUF_SIZE 1024
 
 struct ref_tracker {
 	struct list_head	head;   /* anchor into dir->list or dir->quarantine */
@@ -14,24 +19,87 @@  struct ref_tracker {
 	depot_stack_handle_t	free_stack_handle;
 };
 
-void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
-				  unsigned int display_limit)
+struct ref_tracker_dir_stats {
+	int total;
+	int count;
+	struct {
+		depot_stack_handle_t stack_handle;
+		unsigned int count;
+	} stacks[];
+};
+
+static struct ref_tracker_dir_stats *
+ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
 {
+	struct ref_tracker_dir_stats *stats;
 	struct ref_tracker *tracker;
-	unsigned int i = 0;
 
-	lockdep_assert_held(&dir->lock);
+	stats = kmalloc(struct_size(stats, stacks, limit),
+			GFP_NOWAIT | __GFP_NOWARN);
+	if (!stats)
+		return ERR_PTR(-ENOMEM);
+	stats->total = 0;
+	stats->count = 0;
 
 	list_for_each_entry(tracker, &dir->list, head) {
-		if (i < display_limit) {
-			pr_err("leaked reference.\n");
-			if (tracker->alloc_stack_handle)
-				stack_depot_print(tracker->alloc_stack_handle);
-			i++;
-		} else {
-			break;
+		depot_stack_handle_t stack = tracker->alloc_stack_handle;
+		int i;
+
+		++stats->total;
+		for (i = 0; i < stats->count; ++i)
+			if (stats->stacks[i].stack_handle == stack)
+				break;
+		if (i >= limit)
+			continue;
+		if (i >= stats->count) {
+			stats->stacks[i].stack_handle = stack;
+			stats->stacks[i].count = 0;
+			++stats->count;
 		}
+		++stats->stacks[i].count;
+	}
+
+	return stats;
+}
+
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+				  unsigned int display_limit)
+{
+	struct ref_tracker_dir_stats *stats;
+	unsigned int i = 0, skipped;
+	depot_stack_handle_t stack;
+	char *sbuf;
+
+	lockdep_assert_held(&dir->lock);
+
+	if (list_empty(&dir->list))
+		return;
+
+	stats = ref_tracker_get_stats(dir, display_limit);
+	if (IS_ERR(stats)) {
+		pr_err("%s@%pK: couldn't get stats, error %pe\n",
+		       dir->name, dir, stats);
+		return;
 	}
+
+	sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+
+	for (i = 0, skipped = stats->total; i < stats->count; ++i) {
+		stack = stats->stacks[i].stack_handle;
+		if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
+			sbuf[0] = 0;
+		pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
+		       stats->stacks[i].count, stats->total, sbuf);
+		skipped -= stats->stacks[i].count;
+	}
+
+	if (skipped)
+		pr_err("%s@%pK skipped reports about %d/%d users.\n",
+		       dir->name, dir, skipped, stats->total);
+
+	kfree(sbuf);
+
+	kfree(stats);
 }
 EXPORT_SYMBOL(ref_tracker_dir_print_locked);