diff mbox series

[v3,05/11] lib/ref_tracker: __ref_tracker_dir_print improve printing

Message ID 20220221232542.1481315-6-andrzej.hajda@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: use ref_tracker library for tracking wakerefs | expand

Commit Message

Andrzej Hajda Feb. 21, 2022, 11:25 p.m. UTC
To improve readibility of ref_tracker printing following changes
have been performed:
- reports are printed per stack_handle - log is more compact,
- added display name for ref_tracker_dir,
- stack trace is printed indented, in the same printk call,
- total number of references is printed every time,
- print info about dropped references.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
---
 include/linux/ref_tracker.h | 15 +++++--
 lib/ref_tracker.c           | 90 ++++++++++++++++++++++++++++++++-----
 2 files changed, 91 insertions(+), 14 deletions(-)

Comments

Eric Dumazet Feb. 22, 2022, 12:08 a.m. UTC | #1
On Mon, Feb 21, 2022 at 3:26 PM Andrzej Hajda <andrzej.hajda@intel.com> wrote:
>
> To improve readibility of ref_tracker printing following changes

   readability

> have been performed:
> - reports are printed per stack_handle - log is more compact,
> - added display name for ref_tracker_dir,
> - stack trace is printed indented, in the same printk call,
> - total number of references is printed every time,
> - print info about dropped references.
>
> Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
> ---
>  include/linux/ref_tracker.h | 15 +++++--
>  lib/ref_tracker.c           | 90 ++++++++++++++++++++++++++++++++-----
>  2 files changed, 91 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
> index 3e9e9df2a41f5..a2cf1f6309adb 100644
> --- a/include/linux/ref_tracker.h
> +++ b/include/linux/ref_tracker.h
> @@ -17,12 +17,19 @@ struct ref_tracker_dir {
>         bool                    dead;
>         struct list_head        list; /* List of active trackers */
>         struct list_head        quarantine; /* List of dead trackers */
> +       char                    name[32];
>  #endif
>  };
>
>  #ifdef CONFIG_REF_TRACKER
> -static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
> -                                       unsigned int quarantine_count)
> +
> +// Temporary allow two and three arguments, until consumers are converted
> +#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d)
> +#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n)
> +
> +static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir,
> +                                       unsigned int quarantine_count,
> +                                       const char *name)
>  {
>         INIT_LIST_HEAD(&dir->list);
>         INIT_LIST_HEAD(&dir->quarantine);
> @@ -31,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
>         dir->dead = false;
>         refcount_set(&dir->untracked, 1);
>         refcount_set(&dir->no_tracker, 1);
> +       strlcpy(dir->name, name, sizeof(dir->name));
>         stack_depot_init();
>  }
>
> @@ -51,7 +59,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
>  #else /* CONFIG_REF_TRACKER */
>
>  static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
> -                                       unsigned int quarantine_count)
> +                                       unsigned int quarantine_count,
> +                                       ...)
>  {
>  }
>
> diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
> index 5e9f90bbf771b..ab1253fde244e 100644
> --- a/lib/ref_tracker.c
> +++ b/lib/ref_tracker.c
> @@ -1,11 +1,16 @@
>  // SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#define pr_fmt(fmt) "ref_tracker: " fmt
> +
>  #include <linux/export.h>
> +#include <linux/list_sort.h>
>  #include <linux/ref_tracker.h>
>  #include <linux/slab.h>
>  #include <linux/stacktrace.h>
>  #include <linux/stackdepot.h>
>
>  #define REF_TRACKER_STACK_ENTRIES 16
> +#define STACK_BUF_SIZE 1024
>
>  struct ref_tracker {
>         struct list_head        head;   /* anchor into dir->list or dir->quarantine */
> @@ -14,24 +19,87 @@ struct ref_tracker {
>         depot_stack_handle_t    free_stack_handle;
>  };
>
> -void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
> -                          unsigned int display_limit)
> +struct ref_tracker_dir_stats {
> +       int total;
> +       int count;
> +       struct {
> +               depot_stack_handle_t stack_handle;
> +               unsigned int count;
> +       } stacks[];
> +};
> +
> +static struct ref_tracker_dir_stats *
> +ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
>  {
> +       struct ref_tracker_dir_stats *stats;
>         struct ref_tracker *tracker;
> -       unsigned int i = 0;
>
> -       lockdep_assert_held(&dir->lock);
> +       stats = kmalloc(struct_size(stats, stacks, limit),
> +                       GFP_NOWAIT | __GFP_NOWARN);

I would be more comfortable if the allocation was done by the caller,
possibly using GFP_KERNEL and evenutally kvmalloc(),
instead of under dir->lock ?


> +       if (!stats)
> +               return ERR_PTR(-ENOMEM);
> +       stats->total = 0;
> +       stats->count = 0;
>
>         list_for_each_entry(tracker, &dir->list, head) {
> -               if (i < display_limit) {
> -                       pr_err("leaked reference.\n");
> -                       if (tracker->alloc_stack_handle)
> -                               stack_depot_print(tracker->alloc_stack_handle);
> -                       i++;
> -               } else {
> -                       break;
> +               depot_stack_handle_t stack = tracker->alloc_stack_handle;
> +               int i;
> +
> +               ++stats->total;
> +               for (i = 0; i < stats->count; ++i)
> +                       if (stats->stacks[i].stack_handle == stack)
> +                               break;
> +               if (i >= limit)
> +                       continue;
> +               if (i >= stats->count) {
> +                       stats->stacks[i].stack_handle = stack;
> +                       stats->stacks[i].count = 0;
> +                       ++stats->count;
>                 }
> +               ++stats->stacks[i].count;
> +       }
> +
> +       return stats;
> +}
> +
> +void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
> +                          unsigned int display_limit)
> +{
> +       struct ref_tracker_dir_stats *stats;
> +       unsigned int i = 0, skipped;
> +       depot_stack_handle_t stack;
> +       char *sbuf;
> +
> +       lockdep_assert_held(&dir->lock);
> +
> +       if (list_empty(&dir->list))
> +               return;
> +
> +       stats = ref_tracker_get_stats(dir, display_limit);
> +       if (IS_ERR(stats)) {
> +               pr_err("%s@%pK: couldn't get stats, error %pe\n",
> +                      dir->name, dir, stats);
> +               return;
>         }
> +

> +       sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);

Same remark. These allocations are most probably going to happen from process
context, I think GFP_KERNEL is more robust.

This is debugging infra, it would be sad if we give up at this point,
after storing MB of traces :)

> +
> +       for (i = 0, skipped = stats->total; i < stats->count; ++i) {
> +               stack = stats->stacks[i].stack_handle;
> +               if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
> +                       sbuf[0] = 0;
> +               pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
> +                      stats->stacks[i].count, stats->total, sbuf);
> +               skipped -= stats->stacks[i].count;
> +       }
> +
> +       if (skipped)
> +               pr_err("%s@%pK skipped reports about %d/%d users.\n",
> +                      dir->name, dir, skipped, stats->total);
> +
> +       kfree(sbuf);
> +
> +       kfree(stats);
>  }
>  EXPORT_SYMBOL(__ref_tracker_dir_print);
>
> --
> 2.25.1
>
Andrzej Hajda Feb. 22, 2022, 10:27 a.m. UTC | #2
On 22.02.2022 01:08, Eric Dumazet wrote:
> On Mon, Feb 21, 2022 at 3:26 PM Andrzej Hajda <andrzej.hajda@intel.com> wrote:
>> To improve readibility of ref_tracker printing following changes
>     readability
>
>> have been performed:
>> - reports are printed per stack_handle - log is more compact,
>> - added display name for ref_tracker_dir,
>> - stack trace is printed indented, in the same printk call,
>> - total number of references is printed every time,
>> - print info about dropped references.
>>
>> Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
>> ---
>>   include/linux/ref_tracker.h | 15 +++++--
>>   lib/ref_tracker.c           | 90 ++++++++++++++++++++++++++++++++-----
>>   2 files changed, 91 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
>> index 3e9e9df2a41f5..a2cf1f6309adb 100644
>> --- a/include/linux/ref_tracker.h
>> +++ b/include/linux/ref_tracker.h
>> @@ -17,12 +17,19 @@ struct ref_tracker_dir {
>>          bool                    dead;
>>          struct list_head        list; /* List of active trackers */
>>          struct list_head        quarantine; /* List of dead trackers */
>> +       char                    name[32];
>>   #endif
>>   };
>>
>>   #ifdef CONFIG_REF_TRACKER
>> -static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
>> -                                       unsigned int quarantine_count)
>> +
>> +// Temporary allow two and three arguments, until consumers are converted
>> +#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d)
>> +#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n)
>> +
>> +static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir,
>> +                                       unsigned int quarantine_count,
>> +                                       const char *name)
>>   {
>>          INIT_LIST_HEAD(&dir->list);
>>          INIT_LIST_HEAD(&dir->quarantine);
>> @@ -31,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
>>          dir->dead = false;
>>          refcount_set(&dir->untracked, 1);
>>          refcount_set(&dir->no_tracker, 1);
>> +       strlcpy(dir->name, name, sizeof(dir->name));
>>          stack_depot_init();
>>   }
>>
>> @@ -51,7 +59,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
>>   #else /* CONFIG_REF_TRACKER */
>>
>>   static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
>> -                                       unsigned int quarantine_count)
>> +                                       unsigned int quarantine_count,
>> +                                       ...)
>>   {
>>   }
>>
>> diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
>> index 5e9f90bbf771b..ab1253fde244e 100644
>> --- a/lib/ref_tracker.c
>> +++ b/lib/ref_tracker.c
>> @@ -1,11 +1,16 @@
>>   // SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +#define pr_fmt(fmt) "ref_tracker: " fmt
>> +
>>   #include <linux/export.h>
>> +#include <linux/list_sort.h>
>>   #include <linux/ref_tracker.h>
>>   #include <linux/slab.h>
>>   #include <linux/stacktrace.h>
>>   #include <linux/stackdepot.h>
>>
>>   #define REF_TRACKER_STACK_ENTRIES 16
>> +#define STACK_BUF_SIZE 1024
>>
>>   struct ref_tracker {
>>          struct list_head        head;   /* anchor into dir->list or dir->quarantine */
>> @@ -14,24 +19,87 @@ struct ref_tracker {
>>          depot_stack_handle_t    free_stack_handle;
>>   };
>>
>> -void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
>> -                          unsigned int display_limit)
>> +struct ref_tracker_dir_stats {
>> +       int total;
>> +       int count;
>> +       struct {
>> +               depot_stack_handle_t stack_handle;
>> +               unsigned int count;
>> +       } stacks[];
>> +};
>> +
>> +static struct ref_tracker_dir_stats *
>> +ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
>>   {
>> +       struct ref_tracker_dir_stats *stats;
>>          struct ref_tracker *tracker;
>> -       unsigned int i = 0;
>>
>> -       lockdep_assert_held(&dir->lock);
>> +       stats = kmalloc(struct_size(stats, stacks, limit),
>> +                       GFP_NOWAIT | __GFP_NOWARN);
> I would be more comfortable if the allocation was done by the caller,
> possibly using GFP_KERNEL and evenutally kvmalloc(),
> instead of under dir->lock ?

I though also about it, but decided to left this change to another patch 
as the change can be substantial and could open another discussion.

I am not sure what you mean by 'caller' but it could be even external 
user of the API:
1. alloc data for ref_tracker_dir_stats.
2. take locks, if necessary.
3. gather stats (ref_tracker_get_stats) atomically.
4. release taken locks.
5. print stats.

This way, allocation and printing would happen outside locks.

>
>
>> +       if (!stats)
>> +               return ERR_PTR(-ENOMEM);
>> +       stats->total = 0;
>> +       stats->count = 0;
>>
>>          list_for_each_entry(tracker, &dir->list, head) {
>> -               if (i < display_limit) {
>> -                       pr_err("leaked reference.\n");
>> -                       if (tracker->alloc_stack_handle)
>> -                               stack_depot_print(tracker->alloc_stack_handle);
>> -                       i++;
>> -               } else {
>> -                       break;
>> +               depot_stack_handle_t stack = tracker->alloc_stack_handle;
>> +               int i;
>> +
>> +               ++stats->total;
>> +               for (i = 0; i < stats->count; ++i)
>> +                       if (stats->stacks[i].stack_handle == stack)
>> +                               break;
>> +               if (i >= limit)
>> +                       continue;
>> +               if (i >= stats->count) {
>> +                       stats->stacks[i].stack_handle = stack;
>> +                       stats->stacks[i].count = 0;
>> +                       ++stats->count;
>>                  }
>> +               ++stats->stacks[i].count;
>> +       }
>> +
>> +       return stats;
>> +}
>> +
>> +void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
>> +                          unsigned int display_limit)
>> +{
>> +       struct ref_tracker_dir_stats *stats;
>> +       unsigned int i = 0, skipped;
>> +       depot_stack_handle_t stack;
>> +       char *sbuf;
>> +
>> +       lockdep_assert_held(&dir->lock);
>> +
>> +       if (list_empty(&dir->list))
>> +               return;
>> +
>> +       stats = ref_tracker_get_stats(dir, display_limit);
>> +       if (IS_ERR(stats)) {
>> +               pr_err("%s@%pK: couldn't get stats, error %pe\n",
>> +                      dir->name, dir, stats);
>> +               return;
>>          }
>> +
>> +       sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
> Same remark. These allocations are most probably going to happen from process
> context, I think GFP_KERNEL is more robust.

The problem is that in my scenario it can be called under spinlock, this 
is why I want almost everywhere non-sleeping allocations.

>
> This is debugging infra, it would be sad if we give up at this point,
> after storing MB of traces :)

My approach was to avoid allocations if the system is short on memory - 
better to keep it alive, and we still get the report, just without 
stacktraces, one can print full stats later (for example via sysfs, or 
trigger to dmesg) - big chances that the bug will be still there.
If you think that is no-go, alternatives I see:
- go back to GFP_ATOMIC,
- print stack directly, without using stack_depot_snprint,
- pre-allocate buffer.

Regards
Andrzej

>
>> +
>> +       for (i = 0, skipped = stats->total; i < stats->count; ++i) {
>> +               stack = stats->stacks[i].stack_handle;
>> +               if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
>> +                       sbuf[0] = 0;
>> +               pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
>> +                      stats->stacks[i].count, stats->total, sbuf);
>> +               skipped -= stats->stacks[i].count;
>> +       }
>> +
>> +       if (skipped)
>> +               pr_err("%s@%pK skipped reports about %d/%d users.\n",
>> +                      dir->name, dir, skipped, stats->total);
>> +
>> +       kfree(sbuf);
>> +
>> +       kfree(stats);
>>   }
>>   EXPORT_SYMBOL(__ref_tracker_dir_print);
>>
>> --
>> 2.25.1
>>
diff mbox series

Patch

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 3e9e9df2a41f5..a2cf1f6309adb 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -17,12 +17,19 @@  struct ref_tracker_dir {
 	bool			dead;
 	struct list_head	list; /* List of active trackers */
 	struct list_head	quarantine; /* List of dead trackers */
+	char			name[32];
 #endif
 };
 
 #ifdef CONFIG_REF_TRACKER
-static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-					unsigned int quarantine_count)
+
+// Temporary allow two and three arguments, until consumers are converted
+#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d)
+#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n)
+
+static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count,
+					const char *name)
 {
 	INIT_LIST_HEAD(&dir->list);
 	INIT_LIST_HEAD(&dir->quarantine);
@@ -31,6 +38,7 @@  static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 	dir->dead = false;
 	refcount_set(&dir->untracked, 1);
 	refcount_set(&dir->no_tracker, 1);
+	strlcpy(dir->name, name, sizeof(dir->name));
 	stack_depot_init();
 }
 
@@ -51,7 +59,8 @@  int ref_tracker_free(struct ref_tracker_dir *dir,
 #else /* CONFIG_REF_TRACKER */
 
 static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-					unsigned int quarantine_count)
+					unsigned int quarantine_count,
+					...)
 {
 }
 
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 5e9f90bbf771b..ab1253fde244e 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -1,11 +1,16 @@ 
 // SPDX-License-Identifier: GPL-2.0-or-later
+
+#define pr_fmt(fmt) "ref_tracker: " fmt
+
 #include <linux/export.h>
+#include <linux/list_sort.h>
 #include <linux/ref_tracker.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/stackdepot.h>
 
 #define REF_TRACKER_STACK_ENTRIES 16
+#define STACK_BUF_SIZE 1024
 
 struct ref_tracker {
 	struct list_head	head;   /* anchor into dir->list or dir->quarantine */
@@ -14,24 +19,87 @@  struct ref_tracker {
 	depot_stack_handle_t	free_stack_handle;
 };
 
-void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
-			   unsigned int display_limit)
+struct ref_tracker_dir_stats {
+	int total;
+	int count;
+	struct {
+		depot_stack_handle_t stack_handle;
+		unsigned int count;
+	} stacks[];
+};
+
+static struct ref_tracker_dir_stats *
+ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
 {
+	struct ref_tracker_dir_stats *stats;
 	struct ref_tracker *tracker;
-	unsigned int i = 0;
 
-	lockdep_assert_held(&dir->lock);
+	stats = kmalloc(struct_size(stats, stacks, limit),
+			GFP_NOWAIT | __GFP_NOWARN);
+	if (!stats)
+		return ERR_PTR(-ENOMEM);
+	stats->total = 0;
+	stats->count = 0;
 
 	list_for_each_entry(tracker, &dir->list, head) {
-		if (i < display_limit) {
-			pr_err("leaked reference.\n");
-			if (tracker->alloc_stack_handle)
-				stack_depot_print(tracker->alloc_stack_handle);
-			i++;
-		} else {
-			break;
+		depot_stack_handle_t stack = tracker->alloc_stack_handle;
+		int i;
+
+		++stats->total;
+		for (i = 0; i < stats->count; ++i)
+			if (stats->stacks[i].stack_handle == stack)
+				break;
+		if (i >= limit)
+			continue;
+		if (i >= stats->count) {
+			stats->stacks[i].stack_handle = stack;
+			stats->stacks[i].count = 0;
+			++stats->count;
 		}
+		++stats->stacks[i].count;
+	}
+
+	return stats;
+}
+
+void __ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit)
+{
+	struct ref_tracker_dir_stats *stats;
+	unsigned int i = 0, skipped;
+	depot_stack_handle_t stack;
+	char *sbuf;
+
+	lockdep_assert_held(&dir->lock);
+
+	if (list_empty(&dir->list))
+		return;
+
+	stats = ref_tracker_get_stats(dir, display_limit);
+	if (IS_ERR(stats)) {
+		pr_err("%s@%pK: couldn't get stats, error %pe\n",
+		       dir->name, dir, stats);
+		return;
 	}
+
+	sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+
+	for (i = 0, skipped = stats->total; i < stats->count; ++i) {
+		stack = stats->stacks[i].stack_handle;
+		if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
+			sbuf[0] = 0;
+		pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
+		       stats->stacks[i].count, stats->total, sbuf);
+		skipped -= stats->stacks[i].count;
+	}
+
+	if (skipped)
+		pr_err("%s@%pK skipped reports about %d/%d users.\n",
+		       dir->name, dir, skipped, stats->total);
+
+	kfree(sbuf);
+
+	kfree(stats);
 }
 EXPORT_SYMBOL(__ref_tracker_dir_print);