Message ID | 20220221232542.1481315-6-andrzej.hajda@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: use ref_tracker library for tracking wakerefs | expand |
On Mon, Feb 21, 2022 at 3:26 PM Andrzej Hajda <andrzej.hajda@intel.com> wrote: > > To improve readibility of ref_tracker printing following changes readability > have been performed: > - reports are printed per stack_handle - log is more compact, > - added display name for ref_tracker_dir, > - stack trace is printed indented, in the same printk call, > - total number of references is printed every time, > - print info about dropped references. > > Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com> > --- > include/linux/ref_tracker.h | 15 +++++-- > lib/ref_tracker.c | 90 ++++++++++++++++++++++++++++++++----- > 2 files changed, 91 insertions(+), 14 deletions(-) > > diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h > index 3e9e9df2a41f5..a2cf1f6309adb 100644 > --- a/include/linux/ref_tracker.h > +++ b/include/linux/ref_tracker.h > @@ -17,12 +17,19 @@ struct ref_tracker_dir { > bool dead; > struct list_head list; /* List of active trackers */ > struct list_head quarantine; /* List of dead trackers */ > + char name[32]; > #endif > }; > > #ifdef CONFIG_REF_TRACKER > -static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, > - unsigned int quarantine_count) > + > +// Temporary allow two and three arguments, until consumers are converted > +#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d) > +#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n) > + > +static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir, > + unsigned int quarantine_count, > + const char *name) > { > INIT_LIST_HEAD(&dir->list); > INIT_LIST_HEAD(&dir->quarantine); > @@ -31,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, > dir->dead = false; > refcount_set(&dir->untracked, 1); > refcount_set(&dir->no_tracker, 1); > + strlcpy(dir->name, name, sizeof(dir->name)); > stack_depot_init(); > } > > @@ -51,7 +59,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir, > #else /* CONFIG_REF_TRACKER */ > > static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, > - unsigned int quarantine_count) > + unsigned int quarantine_count, > + ...) > { > } > > diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c > index 5e9f90bbf771b..ab1253fde244e 100644 > --- a/lib/ref_tracker.c > +++ b/lib/ref_tracker.c > @@ -1,11 +1,16 @@ > // SPDX-License-Identifier: GPL-2.0-or-later > + > +#define pr_fmt(fmt) "ref_tracker: " fmt > + > #include <linux/export.h> > +#include <linux/list_sort.h> > #include <linux/ref_tracker.h> > #include <linux/slab.h> > #include <linux/stacktrace.h> > #include <linux/stackdepot.h> > > #define REF_TRACKER_STACK_ENTRIES 16 > +#define STACK_BUF_SIZE 1024 > > struct ref_tracker { > struct list_head head; /* anchor into dir->list or dir->quarantine */ > @@ -14,24 +19,87 @@ struct ref_tracker { > depot_stack_handle_t free_stack_handle; > }; > > -void __ref_tracker_dir_print(struct ref_tracker_dir *dir, > - unsigned int display_limit) > +struct ref_tracker_dir_stats { > + int total; > + int count; > + struct { > + depot_stack_handle_t stack_handle; > + unsigned int count; > + } stacks[]; > +}; > + > +static struct ref_tracker_dir_stats * > +ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit) > { > + struct ref_tracker_dir_stats *stats; > struct ref_tracker *tracker; > - unsigned int i = 0; > > - lockdep_assert_held(&dir->lock); > + stats = kmalloc(struct_size(stats, stacks, limit), > + GFP_NOWAIT | __GFP_NOWARN); I would be more comfortable if the allocation was done by the caller, possibly using GFP_KERNEL and evenutally kvmalloc(), instead of under dir->lock ? > + if (!stats) > + return ERR_PTR(-ENOMEM); > + stats->total = 0; > + stats->count = 0; > > list_for_each_entry(tracker, &dir->list, head) { > - if (i < display_limit) { > - pr_err("leaked reference.\n"); > - if (tracker->alloc_stack_handle) > - stack_depot_print(tracker->alloc_stack_handle); > - i++; > - } else { > - break; > + depot_stack_handle_t stack = tracker->alloc_stack_handle; > + int i; > + > + ++stats->total; > + for (i = 0; i < stats->count; ++i) > + if (stats->stacks[i].stack_handle == stack) > + break; > + if (i >= limit) > + continue; > + if (i >= stats->count) { > + stats->stacks[i].stack_handle = stack; > + stats->stacks[i].count = 0; > + ++stats->count; > } > + ++stats->stacks[i].count; > + } > + > + return stats; > +} > + > +void __ref_tracker_dir_print(struct ref_tracker_dir *dir, > + unsigned int display_limit) > +{ > + struct ref_tracker_dir_stats *stats; > + unsigned int i = 0, skipped; > + depot_stack_handle_t stack; > + char *sbuf; > + > + lockdep_assert_held(&dir->lock); > + > + if (list_empty(&dir->list)) > + return; > + > + stats = ref_tracker_get_stats(dir, display_limit); > + if (IS_ERR(stats)) { > + pr_err("%s@%pK: couldn't get stats, error %pe\n", > + dir->name, dir, stats); > + return; > } > + > + sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN); Same remark. These allocations are most probably going to happen from process context, I think GFP_KERNEL is more robust. This is debugging infra, it would be sad if we give up at this point, after storing MB of traces :) > + > + for (i = 0, skipped = stats->total; i < stats->count; ++i) { > + stack = stats->stacks[i].stack_handle; > + if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4)) > + sbuf[0] = 0; > + pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir, > + stats->stacks[i].count, stats->total, sbuf); > + skipped -= stats->stacks[i].count; > + } > + > + if (skipped) > + pr_err("%s@%pK skipped reports about %d/%d users.\n", > + dir->name, dir, skipped, stats->total); > + > + kfree(sbuf); > + > + kfree(stats); > } > EXPORT_SYMBOL(__ref_tracker_dir_print); > > -- > 2.25.1 >
On 22.02.2022 01:08, Eric Dumazet wrote: > On Mon, Feb 21, 2022 at 3:26 PM Andrzej Hajda <andrzej.hajda@intel.com> wrote: >> To improve readibility of ref_tracker printing following changes > readability > >> have been performed: >> - reports are printed per stack_handle - log is more compact, >> - added display name for ref_tracker_dir, >> - stack trace is printed indented, in the same printk call, >> - total number of references is printed every time, >> - print info about dropped references. >> >> Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com> >> --- >> include/linux/ref_tracker.h | 15 +++++-- >> lib/ref_tracker.c | 90 ++++++++++++++++++++++++++++++++----- >> 2 files changed, 91 insertions(+), 14 deletions(-) >> >> diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h >> index 3e9e9df2a41f5..a2cf1f6309adb 100644 >> --- a/include/linux/ref_tracker.h >> +++ b/include/linux/ref_tracker.h >> @@ -17,12 +17,19 @@ struct ref_tracker_dir { >> bool dead; >> struct list_head list; /* List of active trackers */ >> struct list_head quarantine; /* List of dead trackers */ >> + char name[32]; >> #endif >> }; >> >> #ifdef CONFIG_REF_TRACKER >> -static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, >> - unsigned int quarantine_count) >> + >> +// Temporary allow two and three arguments, until consumers are converted >> +#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d) >> +#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n) >> + >> +static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir, >> + unsigned int quarantine_count, >> + const char *name) >> { >> INIT_LIST_HEAD(&dir->list); >> INIT_LIST_HEAD(&dir->quarantine); >> @@ -31,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, >> dir->dead = false; >> refcount_set(&dir->untracked, 1); >> refcount_set(&dir->no_tracker, 1); >> + strlcpy(dir->name, name, sizeof(dir->name)); >> stack_depot_init(); >> } >> >> @@ -51,7 +59,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir, >> #else /* CONFIG_REF_TRACKER */ >> >> static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, >> - unsigned int quarantine_count) >> + unsigned int quarantine_count, >> + ...) >> { >> } >> >> diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c >> index 5e9f90bbf771b..ab1253fde244e 100644 >> --- a/lib/ref_tracker.c >> +++ b/lib/ref_tracker.c >> @@ -1,11 +1,16 @@ >> // SPDX-License-Identifier: GPL-2.0-or-later >> + >> +#define pr_fmt(fmt) "ref_tracker: " fmt >> + >> #include <linux/export.h> >> +#include <linux/list_sort.h> >> #include <linux/ref_tracker.h> >> #include <linux/slab.h> >> #include <linux/stacktrace.h> >> #include <linux/stackdepot.h> >> >> #define REF_TRACKER_STACK_ENTRIES 16 >> +#define STACK_BUF_SIZE 1024 >> >> struct ref_tracker { >> struct list_head head; /* anchor into dir->list or dir->quarantine */ >> @@ -14,24 +19,87 @@ struct ref_tracker { >> depot_stack_handle_t free_stack_handle; >> }; >> >> -void __ref_tracker_dir_print(struct ref_tracker_dir *dir, >> - unsigned int display_limit) >> +struct ref_tracker_dir_stats { >> + int total; >> + int count; >> + struct { >> + depot_stack_handle_t stack_handle; >> + unsigned int count; >> + } stacks[]; >> +}; >> + >> +static struct ref_tracker_dir_stats * >> +ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit) >> { >> + struct ref_tracker_dir_stats *stats; >> struct ref_tracker *tracker; >> - unsigned int i = 0; >> >> - lockdep_assert_held(&dir->lock); >> + stats = kmalloc(struct_size(stats, stacks, limit), >> + GFP_NOWAIT | __GFP_NOWARN); > I would be more comfortable if the allocation was done by the caller, > possibly using GFP_KERNEL and evenutally kvmalloc(), > instead of under dir->lock ? I though also about it, but decided to left this change to another patch as the change can be substantial and could open another discussion. I am not sure what you mean by 'caller' but it could be even external user of the API: 1. alloc data for ref_tracker_dir_stats. 2. take locks, if necessary. 3. gather stats (ref_tracker_get_stats) atomically. 4. release taken locks. 5. print stats. This way, allocation and printing would happen outside locks. > > >> + if (!stats) >> + return ERR_PTR(-ENOMEM); >> + stats->total = 0; >> + stats->count = 0; >> >> list_for_each_entry(tracker, &dir->list, head) { >> - if (i < display_limit) { >> - pr_err("leaked reference.\n"); >> - if (tracker->alloc_stack_handle) >> - stack_depot_print(tracker->alloc_stack_handle); >> - i++; >> - } else { >> - break; >> + depot_stack_handle_t stack = tracker->alloc_stack_handle; >> + int i; >> + >> + ++stats->total; >> + for (i = 0; i < stats->count; ++i) >> + if (stats->stacks[i].stack_handle == stack) >> + break; >> + if (i >= limit) >> + continue; >> + if (i >= stats->count) { >> + stats->stacks[i].stack_handle = stack; >> + stats->stacks[i].count = 0; >> + ++stats->count; >> } >> + ++stats->stacks[i].count; >> + } >> + >> + return stats; >> +} >> + >> +void __ref_tracker_dir_print(struct ref_tracker_dir *dir, >> + unsigned int display_limit) >> +{ >> + struct ref_tracker_dir_stats *stats; >> + unsigned int i = 0, skipped; >> + depot_stack_handle_t stack; >> + char *sbuf; >> + >> + lockdep_assert_held(&dir->lock); >> + >> + if (list_empty(&dir->list)) >> + return; >> + >> + stats = ref_tracker_get_stats(dir, display_limit); >> + if (IS_ERR(stats)) { >> + pr_err("%s@%pK: couldn't get stats, error %pe\n", >> + dir->name, dir, stats); >> + return; >> } >> + >> + sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN); > Same remark. These allocations are most probably going to happen from process > context, I think GFP_KERNEL is more robust. The problem is that in my scenario it can be called under spinlock, this is why I want almost everywhere non-sleeping allocations. > > This is debugging infra, it would be sad if we give up at this point, > after storing MB of traces :) My approach was to avoid allocations if the system is short on memory - better to keep it alive, and we still get the report, just without stacktraces, one can print full stats later (for example via sysfs, or trigger to dmesg) - big chances that the bug will be still there. If you think that is no-go, alternatives I see: - go back to GFP_ATOMIC, - print stack directly, without using stack_depot_snprint, - pre-allocate buffer. Regards Andrzej > >> + >> + for (i = 0, skipped = stats->total; i < stats->count; ++i) { >> + stack = stats->stacks[i].stack_handle; >> + if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4)) >> + sbuf[0] = 0; >> + pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir, >> + stats->stacks[i].count, stats->total, sbuf); >> + skipped -= stats->stacks[i].count; >> + } >> + >> + if (skipped) >> + pr_err("%s@%pK skipped reports about %d/%d users.\n", >> + dir->name, dir, skipped, stats->total); >> + >> + kfree(sbuf); >> + >> + kfree(stats); >> } >> EXPORT_SYMBOL(__ref_tracker_dir_print); >> >> -- >> 2.25.1 >>
diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index 3e9e9df2a41f5..a2cf1f6309adb 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -17,12 +17,19 @@ struct ref_tracker_dir { bool dead; struct list_head list; /* List of active trackers */ struct list_head quarantine; /* List of dead trackers */ + char name[32]; #endif }; #ifdef CONFIG_REF_TRACKER -static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, - unsigned int quarantine_count) + +// Temporary allow two and three arguments, until consumers are converted +#define ref_tracker_dir_init(_d, _q, args...) _ref_tracker_dir_init(_d, _q, ##args, #_d) +#define _ref_tracker_dir_init(_d, _q, _n, ...) __ref_tracker_dir_init(_d, _q, _n) + +static inline void __ref_tracker_dir_init(struct ref_tracker_dir *dir, + unsigned int quarantine_count, + const char *name) { INIT_LIST_HEAD(&dir->list); INIT_LIST_HEAD(&dir->quarantine); @@ -31,6 +38,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, dir->dead = false; refcount_set(&dir->untracked, 1); refcount_set(&dir->no_tracker, 1); + strlcpy(dir->name, name, sizeof(dir->name)); stack_depot_init(); } @@ -51,7 +59,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir, #else /* CONFIG_REF_TRACKER */ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, - unsigned int quarantine_count) + unsigned int quarantine_count, + ...) { } diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index 5e9f90bbf771b..ab1253fde244e 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -1,11 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-or-later + +#define pr_fmt(fmt) "ref_tracker: " fmt + #include <linux/export.h> +#include <linux/list_sort.h> #include <linux/ref_tracker.h> #include <linux/slab.h> #include <linux/stacktrace.h> #include <linux/stackdepot.h> #define REF_TRACKER_STACK_ENTRIES 16 +#define STACK_BUF_SIZE 1024 struct ref_tracker { struct list_head head; /* anchor into dir->list or dir->quarantine */ @@ -14,24 +19,87 @@ struct ref_tracker { depot_stack_handle_t free_stack_handle; }; -void __ref_tracker_dir_print(struct ref_tracker_dir *dir, - unsigned int display_limit) +struct ref_tracker_dir_stats { + int total; + int count; + struct { + depot_stack_handle_t stack_handle; + unsigned int count; + } stacks[]; +}; + +static struct ref_tracker_dir_stats * +ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit) { + struct ref_tracker_dir_stats *stats; struct ref_tracker *tracker; - unsigned int i = 0; - lockdep_assert_held(&dir->lock); + stats = kmalloc(struct_size(stats, stacks, limit), + GFP_NOWAIT | __GFP_NOWARN); + if (!stats) + return ERR_PTR(-ENOMEM); + stats->total = 0; + stats->count = 0; list_for_each_entry(tracker, &dir->list, head) { - if (i < display_limit) { - pr_err("leaked reference.\n"); - if (tracker->alloc_stack_handle) - stack_depot_print(tracker->alloc_stack_handle); - i++; - } else { - break; + depot_stack_handle_t stack = tracker->alloc_stack_handle; + int i; + + ++stats->total; + for (i = 0; i < stats->count; ++i) + if (stats->stacks[i].stack_handle == stack) + break; + if (i >= limit) + continue; + if (i >= stats->count) { + stats->stacks[i].stack_handle = stack; + stats->stacks[i].count = 0; + ++stats->count; } + ++stats->stacks[i].count; + } + + return stats; +} + +void __ref_tracker_dir_print(struct ref_tracker_dir *dir, + unsigned int display_limit) +{ + struct ref_tracker_dir_stats *stats; + unsigned int i = 0, skipped; + depot_stack_handle_t stack; + char *sbuf; + + lockdep_assert_held(&dir->lock); + + if (list_empty(&dir->list)) + return; + + stats = ref_tracker_get_stats(dir, display_limit); + if (IS_ERR(stats)) { + pr_err("%s@%pK: couldn't get stats, error %pe\n", + dir->name, dir, stats); + return; } + + sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN); + + for (i = 0, skipped = stats->total; i < stats->count; ++i) { + stack = stats->stacks[i].stack_handle; + if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4)) + sbuf[0] = 0; + pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir, + stats->stacks[i].count, stats->total, sbuf); + skipped -= stats->stacks[i].count; + } + + if (skipped) + pr_err("%s@%pK skipped reports about %d/%d users.\n", + dir->name, dir, skipped, stats->total); + + kfree(sbuf); + + kfree(stats); } EXPORT_SYMBOL(__ref_tracker_dir_print);
To improve readibility of ref_tracker printing following changes have been performed: - reports are printed per stack_handle - log is more compact, - added display name for ref_tracker_dir, - stack trace is printed indented, in the same printk call, - total number of references is printed every time, - print info about dropped references. Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com> --- include/linux/ref_tracker.h | 15 +++++-- lib/ref_tracker.c | 90 ++++++++++++++++++++++++++++++++----- 2 files changed, 91 insertions(+), 14 deletions(-)