diff mbox series

[v3,net-next,01/23] lib: add reference counting tracking infrastructure

Message ID 20211205042217.982127-2-eric.dumazet@gmail.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series net: add preliminary netdev refcount tracking | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 7 maintainers not CCed: mcroce@microsoft.com keescook@chromium.org jarkko@kernel.org arnd@arndb.de johannes.berg@intel.com elver@google.com akpm@linux-foundation.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning CHECK: spinlock_t definition without comment WARNING: 'controled' may be misspelled - perhaps 'controlled'? WARNING: Improper SPDX comment style for 'include/linux/ref_tracker.h', please use '/*' instead WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Dec. 5, 2021, 4:21 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

It can be hard to track where references are taken and released.

In networking, we have annoying issues at device or netns dismantles,
and we had various proposals to ease root causing them.

This patch adds new infrastructure pairing refcount increases
and decreases. This will self document code, because programmers
will have to associate increments/decrements.

This is controled by CONFIG_REF_TRACKER which can be selected
by users of this feature.

This adds both cpu and memory costs, and thus should probably be
used with care.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
---
 include/linux/ref_tracker.h |  73 +++++++++++++++++++
 lib/Kconfig                 |   5 ++
 lib/Makefile                |   2 +
 lib/ref_tracker.c           | 140 ++++++++++++++++++++++++++++++++++++
 4 files changed, 220 insertions(+)
 create mode 100644 include/linux/ref_tracker.h
 create mode 100644 lib/ref_tracker.c

Comments

Andrzej Hajda Dec. 8, 2021, 2:09 p.m. UTC | #1
Hi Eric,


I've spotted this patchset thanks to LWN, anyway it was merged very 
quickly, I think it missed more broader review.

As the patch touches kernel lib I have added few people who could be 
interested.


On 05.12.2021 05:21, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> It can be hard to track where references are taken and released.
>
> In networking, we have annoying issues at device or netns dismantles,
> and we had various proposals to ease root causing them.
>
> This patch adds new infrastructure pairing refcount increases
> and decreases. This will self document code, because programmers
> will have to associate increments/decrements.
>
> This is controled by CONFIG_REF_TRACKER which can be selected
> by users of this feature.
>
> This adds both cpu and memory costs, and thus should probably be
> used with care.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
> ---


Life is surprising, I was working on my own framework, solving the same 
issue, with intention to publish it in few days :)


My approach was little different:

1. Instead of creating separate framework I have extended debug_objects.

2. There were no additional fields in refcounted object and trackers - 
infrastructure of debug_objects was reused - debug_objects tracked both 
pointers of refcounted object and its users.


Have you considered using debug_object? it seems to be good place to put 
it there, I am not sure about performance differences.

One more thing about design - as I understand CONFIG_REF_TRACKER turns 
on all trackers in whole kernel, have you considered possibility/helpers 
to enable/disable tracking per class of objects?


>   include/linux/ref_tracker.h |  73 +++++++++++++++++++
>   lib/Kconfig                 |   5 ++
>   lib/Makefile                |   2 +
>   lib/ref_tracker.c           | 140 ++++++++++++++++++++++++++++++++++++
>   4 files changed, 220 insertions(+)
>   create mode 100644 include/linux/ref_tracker.h
>   create mode 100644 lib/ref_tracker.c
>
> diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
> new file mode 100644
> index 0000000000000000000000000000000000000000..c11c9db5825cf933acf529c83db441a818135f29
> --- /dev/null
> +++ b/include/linux/ref_tracker.h
> @@ -0,0 +1,73 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +#ifndef _LINUX_REF_TRACKER_H
> +#define _LINUX_REF_TRACKER_H
> +#include <linux/refcount.h>
> +#include <linux/types.h>
> +#include <linux/spinlock.h>
> +
> +struct ref_tracker;


With sth similar to:

#ifdef CONFIG_REF_TRACKER

typedef struct ref_tracker *ref_tracker_p;
#else
typedef struct {} ref_tracker_p;
#endif

you can eliminate unused field in user's structures.

Beside this it looks OK to me.


Regards

Andrzej


> +
> +struct ref_tracker_dir {
> +#ifdef CONFIG_REF_TRACKER
> +	spinlock_t		lock;
> +	unsigned int		quarantine_avail;
> +	refcount_t		untracked;
> +	struct list_head	list; /* List of active trackers */
> +	struct list_head	quarantine; /* List of dead trackers */
> +#endif
> +};
> +
> +#ifdef CONFIG_REF_TRACKER
> +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
> +					unsigned int quarantine_count)
> +{
> +	INIT_LIST_HEAD(&dir->list);
> +	INIT_LIST_HEAD(&dir->quarantine);
> +	spin_lock_init(&dir->lock);
> +	dir->quarantine_avail = quarantine_count;
> +	refcount_set(&dir->untracked, 1);
> +}
> +
> +void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
> +
> +void ref_tracker_dir_print(struct ref_tracker_dir *dir,
> +			   unsigned int display_limit);
> +
> +int ref_tracker_alloc(struct ref_tracker_dir *dir,
> +		      struct ref_tracker **trackerp, gfp_t gfp);
> +
> +int ref_tracker_free(struct ref_tracker_dir *dir,
> +		     struct ref_tracker **trackerp);
> +
> +#else /* CONFIG_REF_TRACKER */
> +
> +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
> +					unsigned int quarantine_count)
> +{
> +}
> +
> +static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
> +{
> +}
> +
> +static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
> +					 unsigned int display_limit)
> +{
> +}
> +
> +static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
> +				    struct ref_tracker **trackerp,
> +				    gfp_t gfp)
> +{
> +	return 0;
> +}
> +
> +static inline int ref_tracker_free(struct ref_tracker_dir *dir,
> +				   struct ref_tracker **trackerp)
> +{
> +	return 0;
> +}
> +
> +#endif
> +
> +#endif /* _LINUX_REF_TRACKER_H */
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 5e7165e6a346c9bec878b78c8c8c3d175fc98dfd..655b0e43f260bfca63240794191e3f1890b2e801 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
>   	 Select the hash size as a power of 2 for the stackdepot hash table.
>   	 Choose a lower value to reduce the memory impact.
>   
> +config REF_TRACKER
> +	bool
> +	depends on STACKTRACE_SUPPORT
> +	select STACKDEPOT
> +
>   config SBITMAP
>   	bool
>   
> diff --git a/lib/Makefile b/lib/Makefile
> index 364c23f1557816f73aebd8304c01224a4846ac6c..c1fd9243ddb9cc1ac5252d7eb8009f9290782c4a 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -270,6 +270,8 @@ obj-$(CONFIG_STACKDEPOT) += stackdepot.o
>   KASAN_SANITIZE_stackdepot.o := n
>   KCOV_INSTRUMENT_stackdepot.o := n
>   
> +obj-$(CONFIG_REF_TRACKER) += ref_tracker.o
> +
>   libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
>   	       fdt_empty_tree.o fdt_addresses.o
>   $(foreach file, $(libfdt_files), \
> diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..0ae2e66dcf0fdb976f4cb99e747c9448b37f22cc
> --- /dev/null
> +++ b/lib/ref_tracker.c
> @@ -0,0 +1,140 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +#include <linux/export.h>
> +#include <linux/ref_tracker.h>
> +#include <linux/slab.h>
> +#include <linux/stacktrace.h>
> +#include <linux/stackdepot.h>
> +
> +#define REF_TRACKER_STACK_ENTRIES 16
> +
> +struct ref_tracker {
> +	struct list_head	head;   /* anchor into dir->list or dir->quarantine */
> +	bool			dead;
> +	depot_stack_handle_t	alloc_stack_handle;
> +	depot_stack_handle_t	free_stack_handle;
> +};
> +
> +void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
> +{
> +	struct ref_tracker *tracker, *n;
> +	unsigned long flags;
> +	bool leak = false;
> +
> +	spin_lock_irqsave(&dir->lock, flags);
> +	list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
> +		list_del(&tracker->head);
> +		kfree(tracker);
> +		dir->quarantine_avail++;
> +	}
> +	list_for_each_entry_safe(tracker, n, &dir->list, head) {
> +		pr_err("leaked reference.\n");
> +		if (tracker->alloc_stack_handle)
> +			stack_depot_print(tracker->alloc_stack_handle);
> +		leak = true;
> +		list_del(&tracker->head);
> +		kfree(tracker);
> +	}
> +	spin_unlock_irqrestore(&dir->lock, flags);
> +	WARN_ON_ONCE(leak);
> +	WARN_ON_ONCE(refcount_read(&dir->untracked) != 1);
> +}
> +EXPORT_SYMBOL(ref_tracker_dir_exit);
> +
> +void ref_tracker_dir_print(struct ref_tracker_dir *dir,
> +			   unsigned int display_limit)
> +{
> +	struct ref_tracker *tracker;
> +	unsigned long flags;
> +	unsigned int i = 0;
> +
> +	spin_lock_irqsave(&dir->lock, flags);
> +	list_for_each_entry(tracker, &dir->list, head) {
> +		if (i < display_limit) {
> +			pr_err("leaked reference.\n");
> +			if (tracker->alloc_stack_handle)
> +				stack_depot_print(tracker->alloc_stack_handle);
> +			i++;
> +		} else {
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&dir->lock, flags);
> +}
> +EXPORT_SYMBOL(ref_tracker_dir_print);
> +
> +int ref_tracker_alloc(struct ref_tracker_dir *dir,
> +		      struct ref_tracker **trackerp,
> +		      gfp_t gfp)
> +{
> +	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
> +	struct ref_tracker *tracker;
> +	unsigned int nr_entries;
> +	unsigned long flags;
> +
> +	*trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL);
> +	if (unlikely(!tracker)) {
> +		pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
> +		refcount_inc(&dir->untracked);
> +		return -ENOMEM;
> +	}
> +	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> +	nr_entries = filter_irq_stacks(entries, nr_entries);
> +	tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);
> +
> +	spin_lock_irqsave(&dir->lock, flags);
> +	list_add(&tracker->head, &dir->list);
> +	spin_unlock_irqrestore(&dir->lock, flags);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ref_tracker_alloc);
> +
> +int ref_tracker_free(struct ref_tracker_dir *dir,
> +		     struct ref_tracker **trackerp)
> +{
> +	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
> +	struct ref_tracker *tracker = *trackerp;
> +	depot_stack_handle_t stack_handle;
> +	unsigned int nr_entries;
> +	unsigned long flags;
> +
> +	if (!tracker) {
> +		refcount_dec(&dir->untracked);
> +		return -EEXIST;
> +	}
> +	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> +	nr_entries = filter_irq_stacks(entries, nr_entries);
> +	stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
> +
> +	spin_lock_irqsave(&dir->lock, flags);
> +	if (tracker->dead) {
> +		pr_err("reference already released.\n");
> +		if (tracker->alloc_stack_handle) {
> +			pr_err("allocated in:\n");
> +			stack_depot_print(tracker->alloc_stack_handle);
> +		}
> +		if (tracker->free_stack_handle) {
> +			pr_err("freed in:\n");
> +			stack_depot_print(tracker->free_stack_handle);
> +		}
> +		spin_unlock_irqrestore(&dir->lock, flags);
> +		WARN_ON_ONCE(1);
> +		return -EINVAL;
> +	}
> +	tracker->dead = true;
> +
> +	tracker->free_stack_handle = stack_handle;
> +
> +	list_move_tail(&tracker->head, &dir->quarantine);
> +	if (!dir->quarantine_avail) {
> +		tracker = list_first_entry(&dir->quarantine, struct ref_tracker, head);
> +		list_del(&tracker->head);
> +	} else {
> +		dir->quarantine_avail--;
> +		tracker = NULL;
> +	}
> +	spin_unlock_irqrestore(&dir->lock, flags);
> +
> +	kfree(tracker);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ref_tracker_free);
Dmitry Vyukov Dec. 8, 2021, 2:27 p.m. UTC | #2
On Wed, 8 Dec 2021 at 15:09, Andrzej Hajda <andrzej.hajda@intel.com> wrote:
>
> Hi Eric,
>
>
> I've spotted this patchset thanks to LWN, anyway it was merged very
> quickly, I think it missed more broader review.
>
> As the patch touches kernel lib I have added few people who could be
> interested.
>
>
> On 05.12.2021 05:21, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > It can be hard to track where references are taken and released.
> >
> > In networking, we have annoying issues at device or netns dismantles,
> > and we had various proposals to ease root causing them.
> >
> > This patch adds new infrastructure pairing refcount increases
> > and decreases. This will self document code, because programmers
> > will have to associate increments/decrements.
> >
> > This is controled by CONFIG_REF_TRACKER which can be selected
> > by users of this feature.
> >
> > This adds both cpu and memory costs, and thus should probably be
> > used with care.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
> > ---
>
>
> Life is surprising, I was working on my own framework, solving the same
> issue, with intention to publish it in few days :)
>
>
> My approach was little different:
>
> 1. Instead of creating separate framework I have extended debug_objects.
>
> 2. There were no additional fields in refcounted object and trackers -
> infrastructure of debug_objects was reused - debug_objects tracked both
> pointers of refcounted object and its users.
>
> Have you considered using debug_object? it seems to be good place to put
> it there, I am not sure about performance differences.

Hi Andrzej,

How exactly did you do it? Do you have a link to your patch?
There still should be something similar to `struct ref_tracker` in
this patch, right? Or how do you match decrements with increments and
understand when a double-decrement happens?


> One more thing about design - as I understand CONFIG_REF_TRACKER turns
> on all trackers in whole kernel, have you considered possibility/helpers
> to enable/disable tracking per class of objects?
>
>
> >   include/linux/ref_tracker.h |  73 +++++++++++++++++++
> >   lib/Kconfig                 |   5 ++
> >   lib/Makefile                |   2 +
> >   lib/ref_tracker.c           | 140 ++++++++++++++++++++++++++++++++++++
> >   4 files changed, 220 insertions(+)
> >   create mode 100644 include/linux/ref_tracker.h
> >   create mode 100644 lib/ref_tracker.c
> >
> > diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..c11c9db5825cf933acf529c83db441a818135f29
> > --- /dev/null
> > +++ b/include/linux/ref_tracker.h
> > @@ -0,0 +1,73 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +#ifndef _LINUX_REF_TRACKER_H
> > +#define _LINUX_REF_TRACKER_H
> > +#include <linux/refcount.h>
> > +#include <linux/types.h>
> > +#include <linux/spinlock.h>
> > +
> > +struct ref_tracker;
>
>
> With sth similar to:
>
> #ifdef CONFIG_REF_TRACKER
>
> typedef struct ref_tracker *ref_tracker_p;
> #else
> typedef struct {} ref_tracker_p;
> #endif
>
> you can eliminate unused field in user's structures.
>
> Beside this it looks OK to me.
>
>
> Regards
>
> Andrzej
>
>
> > +
> > +struct ref_tracker_dir {
> > +#ifdef CONFIG_REF_TRACKER
> > +     spinlock_t              lock;
> > +     unsigned int            quarantine_avail;
> > +     refcount_t              untracked;
> > +     struct list_head        list; /* List of active trackers */
> > +     struct list_head        quarantine; /* List of dead trackers */
> > +#endif
> > +};
> > +
> > +#ifdef CONFIG_REF_TRACKER
> > +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
> > +                                     unsigned int quarantine_count)
> > +{
> > +     INIT_LIST_HEAD(&dir->list);
> > +     INIT_LIST_HEAD(&dir->quarantine);
> > +     spin_lock_init(&dir->lock);
> > +     dir->quarantine_avail = quarantine_count;
> > +     refcount_set(&dir->untracked, 1);
> > +}
> > +
> > +void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
> > +
> > +void ref_tracker_dir_print(struct ref_tracker_dir *dir,
> > +                        unsigned int display_limit);
> > +
> > +int ref_tracker_alloc(struct ref_tracker_dir *dir,
> > +                   struct ref_tracker **trackerp, gfp_t gfp);
> > +
> > +int ref_tracker_free(struct ref_tracker_dir *dir,
> > +                  struct ref_tracker **trackerp);
> > +
> > +#else /* CONFIG_REF_TRACKER */
> > +
> > +static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
> > +                                     unsigned int quarantine_count)
> > +{
> > +}
> > +
> > +static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
> > +{
> > +}
> > +
> > +static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
> > +                                      unsigned int display_limit)
> > +{
> > +}
> > +
> > +static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
> > +                                 struct ref_tracker **trackerp,
> > +                                 gfp_t gfp)
> > +{
> > +     return 0;
> > +}
> > +
> > +static inline int ref_tracker_free(struct ref_tracker_dir *dir,
> > +                                struct ref_tracker **trackerp)
> > +{
> > +     return 0;
> > +}
> > +
> > +#endif
> > +
> > +#endif /* _LINUX_REF_TRACKER_H */
> > diff --git a/lib/Kconfig b/lib/Kconfig
> > index 5e7165e6a346c9bec878b78c8c8c3d175fc98dfd..655b0e43f260bfca63240794191e3f1890b2e801 100644
> > --- a/lib/Kconfig
> > +++ b/lib/Kconfig
> > @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
> >        Select the hash size as a power of 2 for the stackdepot hash table.
> >        Choose a lower value to reduce the memory impact.
> >
> > +config REF_TRACKER
> > +     bool
> > +     depends on STACKTRACE_SUPPORT
> > +     select STACKDEPOT
> > +
> >   config SBITMAP
> >       bool
> >
> > diff --git a/lib/Makefile b/lib/Makefile
> > index 364c23f1557816f73aebd8304c01224a4846ac6c..c1fd9243ddb9cc1ac5252d7eb8009f9290782c4a 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -270,6 +270,8 @@ obj-$(CONFIG_STACKDEPOT) += stackdepot.o
> >   KASAN_SANITIZE_stackdepot.o := n
> >   KCOV_INSTRUMENT_stackdepot.o := n
> >
> > +obj-$(CONFIG_REF_TRACKER) += ref_tracker.o
> > +
> >   libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
> >              fdt_empty_tree.o fdt_addresses.o
> >   $(foreach file, $(libfdt_files), \
> > diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..0ae2e66dcf0fdb976f4cb99e747c9448b37f22cc
> > --- /dev/null
> > +++ b/lib/ref_tracker.c
> > @@ -0,0 +1,140 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +#include <linux/export.h>
> > +#include <linux/ref_tracker.h>
> > +#include <linux/slab.h>
> > +#include <linux/stacktrace.h>
> > +#include <linux/stackdepot.h>
> > +
> > +#define REF_TRACKER_STACK_ENTRIES 16
> > +
> > +struct ref_tracker {
> > +     struct list_head        head;   /* anchor into dir->list or dir->quarantine */
> > +     bool                    dead;
> > +     depot_stack_handle_t    alloc_stack_handle;
> > +     depot_stack_handle_t    free_stack_handle;
> > +};
> > +
> > +void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
> > +{
> > +     struct ref_tracker *tracker, *n;
> > +     unsigned long flags;
> > +     bool leak = false;
> > +
> > +     spin_lock_irqsave(&dir->lock, flags);
> > +     list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
> > +             list_del(&tracker->head);
> > +             kfree(tracker);
> > +             dir->quarantine_avail++;
> > +     }
> > +     list_for_each_entry_safe(tracker, n, &dir->list, head) {
> > +             pr_err("leaked reference.\n");
> > +             if (tracker->alloc_stack_handle)
> > +                     stack_depot_print(tracker->alloc_stack_handle);
> > +             leak = true;
> > +             list_del(&tracker->head);
> > +             kfree(tracker);
> > +     }
> > +     spin_unlock_irqrestore(&dir->lock, flags);
> > +     WARN_ON_ONCE(leak);
> > +     WARN_ON_ONCE(refcount_read(&dir->untracked) != 1);
> > +}
> > +EXPORT_SYMBOL(ref_tracker_dir_exit);
> > +
> > +void ref_tracker_dir_print(struct ref_tracker_dir *dir,
> > +                        unsigned int display_limit)
> > +{
> > +     struct ref_tracker *tracker;
> > +     unsigned long flags;
> > +     unsigned int i = 0;
> > +
> > +     spin_lock_irqsave(&dir->lock, flags);
> > +     list_for_each_entry(tracker, &dir->list, head) {
> > +             if (i < display_limit) {
> > +                     pr_err("leaked reference.\n");
> > +                     if (tracker->alloc_stack_handle)
> > +                             stack_depot_print(tracker->alloc_stack_handle);
> > +                     i++;
> > +             } else {
> > +                     break;
> > +             }
> > +     }
> > +     spin_unlock_irqrestore(&dir->lock, flags);
> > +}
> > +EXPORT_SYMBOL(ref_tracker_dir_print);
> > +
> > +int ref_tracker_alloc(struct ref_tracker_dir *dir,
> > +                   struct ref_tracker **trackerp,
> > +                   gfp_t gfp)
> > +{
> > +     unsigned long entries[REF_TRACKER_STACK_ENTRIES];
> > +     struct ref_tracker *tracker;
> > +     unsigned int nr_entries;
> > +     unsigned long flags;
> > +
> > +     *trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL);
> > +     if (unlikely(!tracker)) {
> > +             pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
> > +             refcount_inc(&dir->untracked);
> > +             return -ENOMEM;
> > +     }
> > +     nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> > +     nr_entries = filter_irq_stacks(entries, nr_entries);
> > +     tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);
> > +
> > +     spin_lock_irqsave(&dir->lock, flags);
> > +     list_add(&tracker->head, &dir->list);
> > +     spin_unlock_irqrestore(&dir->lock, flags);
> > +     return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(ref_tracker_alloc);
> > +
> > +int ref_tracker_free(struct ref_tracker_dir *dir,
> > +                  struct ref_tracker **trackerp)
> > +{
> > +     unsigned long entries[REF_TRACKER_STACK_ENTRIES];
> > +     struct ref_tracker *tracker = *trackerp;
> > +     depot_stack_handle_t stack_handle;
> > +     unsigned int nr_entries;
> > +     unsigned long flags;
> > +
> > +     if (!tracker) {
> > +             refcount_dec(&dir->untracked);
> > +             return -EEXIST;
> > +     }
> > +     nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> > +     nr_entries = filter_irq_stacks(entries, nr_entries);
> > +     stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
> > +
> > +     spin_lock_irqsave(&dir->lock, flags);
> > +     if (tracker->dead) {
> > +             pr_err("reference already released.\n");
> > +             if (tracker->alloc_stack_handle) {
> > +                     pr_err("allocated in:\n");
> > +                     stack_depot_print(tracker->alloc_stack_handle);
> > +             }
> > +             if (tracker->free_stack_handle) {
> > +                     pr_err("freed in:\n");
> > +                     stack_depot_print(tracker->free_stack_handle);
> > +             }
> > +             spin_unlock_irqrestore(&dir->lock, flags);
> > +             WARN_ON_ONCE(1);
> > +             return -EINVAL;
> > +     }
> > +     tracker->dead = true;
> > +
> > +     tracker->free_stack_handle = stack_handle;
> > +
> > +     list_move_tail(&tracker->head, &dir->quarantine);
> > +     if (!dir->quarantine_avail) {
> > +             tracker = list_first_entry(&dir->quarantine, struct ref_tracker, head);
> > +             list_del(&tracker->head);
> > +     } else {
> > +             dir->quarantine_avail--;
> > +             tracker = NULL;
> > +     }
> > +     spin_unlock_irqrestore(&dir->lock, flags);
> > +
> > +     kfree(tracker);
> > +     return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(ref_tracker_free);
Jakub Kicinski Dec. 8, 2021, 2:59 p.m. UTC | #3
On Wed, 8 Dec 2021 15:09:17 +0100 Andrzej Hajda wrote:
> On 05.12.2021 05:21, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > It can be hard to track where references are taken and released.
> >
> > In networking, we have annoying issues at device or netns dismantles,
> > and we had various proposals to ease root causing them.
> >
> > This patch adds new infrastructure pairing refcount increases
> > and decreases. This will self document code, because programmers
> > will have to associate increments/decrements.
> >
> > This is controled by CONFIG_REF_TRACKER which can be selected
> > by users of this feature.
> >
> > This adds both cpu and memory costs, and thus should probably be
> > used with care.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
> 
> Life is surprising, I was working on my own framework, solving the same 
> issue, with intention to publish it in few days :)
>
> My approach was little different:
> 
> 1. Instead of creating separate framework I have extended debug_objects.
> 
> 2. There were no additional fields in refcounted object and trackers - 
> infrastructure of debug_objects was reused - debug_objects tracked both 
> pointers of refcounted object and its users.
> 
> 
> Have you considered using debug_object? it seems to be good place to put 
> it there, I am not sure about performance differences.

Something along these lines?

https://lore.kernel.org/all/20211117174723.2305681-1-kuba@kernel.org/

;)

> One more thing about design - as I understand CONFIG_REF_TRACKER turns 
> on all trackers in whole kernel, have you considered possibility/helpers 
> to enable/disable tracking per class of objects?
Andrzej Hajda Dec. 8, 2021, 3:04 p.m. UTC | #4
On 08.12.2021 15:27, Dmitry Vyukov wrote:
> On Wed, 8 Dec 2021 at 15:09, Andrzej Hajda <andrzej.hajda@intel.com> wrote:
>> Hi Eric,
>>
>>
>> I've spotted this patchset thanks to LWN, anyway it was merged very
>> quickly, I think it missed more broader review.
>>
>> As the patch touches kernel lib I have added few people who could be
>> interested.
>>
>>
>> On 05.12.2021 05:21, Eric Dumazet wrote:
>>> From: Eric Dumazet <edumazet@google.com>
>>>
>>> It can be hard to track where references are taken and released.
>>>
>>> In networking, we have annoying issues at device or netns dismantles,
>>> and we had various proposals to ease root causing them.
>>>
>>> This patch adds new infrastructure pairing refcount increases
>>> and decreases. This will self document code, because programmers
>>> will have to associate increments/decrements.
>>>
>>> This is controled by CONFIG_REF_TRACKER which can be selected
>>> by users of this feature.
>>>
>>> This adds both cpu and memory costs, and thus should probably be
>>> used with care.
>>>
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>>> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
>>> ---
>>
>> Life is surprising, I was working on my own framework, solving the same
>> issue, with intention to publish it in few days :)
>>
>>
>> My approach was little different:
>>
>> 1. Instead of creating separate framework I have extended debug_objects.
>>
>> 2. There were no additional fields in refcounted object and trackers -
>> infrastructure of debug_objects was reused - debug_objects tracked both
>> pointers of refcounted object and its users.
>>
>> Have you considered using debug_object? it seems to be good place to put
>> it there, I am not sure about performance differences.
> Hi Andrzej,
>
> How exactly did you do it? Do you have a link to your patch?
> There still should be something similar to `struct ref_tracker` in
> this patch, right? Or how do you match decrements with increments and
> understand when a double-decrement happens?


User during taking/dropping reference should pass pointer of the object 
who uses the reference (user).

And this pointer is tracked by debug_objects:

- on taking reference: the pointer is added to in-framework array 
associated with that reference,

- on dropping reference: framework checks if the pointer is in the 
array/quarantine, and the bug is accordingly reported.

- on destroying reference: bug is reported if users array is not empty,

- on taking/dropping reference to non-existing/destroyed object: bug is 
reported.


So instead of adding tracker field to user and passing it to the 
framework, address of the user itself is passed to the framework.


Regards

Andrzej
Andrzej Hajda Dec. 8, 2021, 3:11 p.m. UTC | #5
On 08.12.2021 15:59, Jakub Kicinski wrote:
> On Wed, 8 Dec 2021 15:09:17 +0100 Andrzej Hajda wrote:
>> On 05.12.2021 05:21, Eric Dumazet wrote:
>>> From: Eric Dumazet <edumazet@google.com>
>>>
>>> It can be hard to track where references are taken and released.
>>>
>>> In networking, we have annoying issues at device or netns dismantles,
>>> and we had various proposals to ease root causing them.
>>>
>>> This patch adds new infrastructure pairing refcount increases
>>> and decreases. This will self document code, because programmers
>>> will have to associate increments/decrements.
>>>
>>> This is controled by CONFIG_REF_TRACKER which can be selected
>>> by users of this feature.
>>>
>>> This adds both cpu and memory costs, and thus should probably be
>>> used with care.
>>>
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>>> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
>> Life is surprising, I was working on my own framework, solving the same
>> issue, with intention to publish it in few days :)
>>
>> My approach was little different:
>>
>> 1. Instead of creating separate framework I have extended debug_objects.
>>
>> 2. There were no additional fields in refcounted object and trackers -
>> infrastructure of debug_objects was reused - debug_objects tracked both
>> pointers of refcounted object and its users.
>>
>>
>> Have you considered using debug_object? it seems to be good place to put
>> it there, I am not sure about performance differences.
> Something along these lines?
>
> https://lore.kernel.org/all/20211117174723.2305681-1-kuba@kernel.org/
>
> ;)


Ups, I was not clear in my last sentence, I meant "extend debug_objects 
with tracking users of the object" :)


Regards

Andrzej


>
>> One more thing about design - as I understand CONFIG_REF_TRACKER turns
>> on all trackers in whole kernel, have you considered possibility/helpers
>> to enable/disable tracking per class of objects?
Jiri Slaby Dec. 15, 2021, 10:18 a.m. UTC | #6
On 05. 12. 21, 5:21, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> It can be hard to track where references are taken and released.
> 
> In networking, we have annoying issues at device or netns dismantles,
> and we had various proposals to ease root causing them.
...
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
>   	 Select the hash size as a power of 2 for the stackdepot hash table.
>   	 Choose a lower value to reduce the memory impact.
>   
> +config REF_TRACKER
> +	bool
> +	depends on STACKTRACE_SUPPORT
> +	select STACKDEPOT

Hi,

I have to:
+       select STACKDEPOT_ALWAYS_INIT
here. Otherwise I see this during boot:

> BUG: unable to handle page fault for address: 00000000001e6f80
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 0 P4D 0
> Oops: 0000 [#1] PREEMPT SMP PTI
> CPU: 1 PID: 1 Comm: swapper/0 Tainted: G          I       5.16.0-rc5-next-20211214-vanilla+ #46 2756e36611a8c8a8271884ae04571fc88e1cb566
> Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
> RIP: 0010:__stack_depot_save (lib/stackdepot.c:373) 
> Code: 04 31 fb 83 fe 03 77 97 83 fe 02 74 7a 83 fe 03 74 72 83 fe 01 74 73 48 8b 05 45 ec 11 02 89 d9 81 e1 ff ff 0f 00 48 8d 0c c8 <48> 8b 29 48 85 ed 75 12 e9 9f 00 00 00 48 8b 6d 00 48 85 ed 0f 84
> All code
> ========
>    0:	04 31                	add    $0x31,%al
>    2:	fb                   	sti    
>    3:	83 fe 03             	cmp    $0x3,%esi
>    6:	77 97                	ja     0xffffffffffffff9f
>    8:	83 fe 02             	cmp    $0x2,%esi
>    b:	74 7a                	je     0x87
>    d:	83 fe 03             	cmp    $0x3,%esi
>   10:	74 72                	je     0x84
>   12:	83 fe 01             	cmp    $0x1,%esi
>   15:	74 73                	je     0x8a
>   17:	48 8b 05 45 ec 11 02 	mov    0x211ec45(%rip),%rax        # 0x211ec63
>   1e:	89 d9                	mov    %ebx,%ecx
>   20:	81 e1 ff ff 0f 00    	and    $0xfffff,%ecx
>   26:	48 8d 0c c8          	lea    (%rax,%rcx,8),%rcx
>   2a:*	48 8b 29             	mov    (%rcx),%rbp		<-- trapping instruction
>   2d:	48 85 ed             	test   %rbp,%rbp
>   30:	75 12                	jne    0x44
>   32:	e9 9f 00 00 00       	jmp    0xd6
>   37:	48 8b 6d 00          	mov    0x0(%rbp),%rbp
>   3b:	48 85 ed             	test   %rbp,%rbp
>   3e:	0f                   	.byte 0xf
>   3f:	84                   	.byte 0x84
> 
> Code starting with the faulting instruction
> ===========================================
>    0:	48 8b 29             	mov    (%rcx),%rbp
>    3:	48 85 ed             	test   %rbp,%rbp
>    6:	75 12                	jne    0x1a
>    8:	e9 9f 00 00 00       	jmp    0xac
>    d:	48 8b 6d 00          	mov    0x0(%rbp),%rbp
>   11:	48 85 ed             	test   %rbp,%rbp
>   14:	0f                   	.byte 0xf
>   15:	84                   	.byte 0x84
> RSP: 0000:ffffb3f700027b78 EFLAGS: 00010206
> RAX: 0000000000000000 RBX: 000000004ea3cdf0 RCX: 00000000001e6f80
> RDX: 000000000000000d RSI: 0000000000000002 RDI: 00000000793ec676
> RBP: ffff8b578094f4d0 R08: 0000000043abc8c3 R09: 000000000000000d
> R10: 0000000000000015 R11: 000000000000001c R12: 0000000000000001
> R13: 0000000000000cc0 R14: ffffb3f700027bd8 R15: 000000000000000d
> FS:  0000000000000000(0000) GS:ffff8b5845c80000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000001e6f80 CR3: 0000000199410000 CR4: 00000000000006e0
> Call Trace:
> <TASK>
> ref_tracker_alloc (lib/ref_tracker.c:84) 
> net_rx_queue_update_kobjects (net/core/net-sysfs.c:1049 net/core/net-sysfs.c:1101) 
> netdev_register_kobject (net/core/net-sysfs.c:1761 net/core/net-sysfs.c:2012) 
> register_netdevice (net/core/dev.c:9660) 
> register_netdev (net/core/dev.c:9784) 
> loopback_net_init (drivers/net/loopback.c:217) 
> ops_init (net/core/net_namespace.c:140) 
> register_pernet_operations (net/core/net_namespace.c:1148 net/core/net_namespace.c:1217) 
> register_pernet_device (net/core/net_namespace.c:1304) 
> net_dev_init (net/core/dev.c:11014) 
> ? sysctl_core_init (net/core/dev.c:10958) 
> do_one_initcall (init/main.c:1303) 
> kernel_init_freeable (init/main.c:1377 init/main.c:1394 init/main.c:1413 init/main.c:1618) 
> ? rest_init (init/main.c:1499) 
> kernel_init (init/main.c:1509) 
> ret_from_fork (arch/x86/entry/entry_64.S:301) 
> </TASK>
> Modules linked in:
> CR2: 00000000001e6f80
> ---[ end trace 0000000000000000 ]---
> RIP: 0010:__stack_depot_save (lib/stackdepot.c:373) 
> Code: 04 31 fb 83 fe 03 77 97 83 fe 02 74 7a 83 fe 03 74 72 83 fe 01 74 73 48 8b 05 45 ec 11 02 89 d9 81 e1 ff ff 0f 00 48 8d 0c c8 <48> 8b 29 48 85 ed 75 12 e9 9f 00 00 00 48 8b 6d 00 48 85 ed 0f 84
> All code
> ========
>    0:	04 31                	add    $0x31,%al
>    2:	fb                   	sti    
>    3:	83 fe 03             	cmp    $0x3,%esi
>    6:	77 97                	ja     0xffffffffffffff9f
>    8:	83 fe 02             	cmp    $0x2,%esi
>    b:	74 7a                	je     0x87
>    d:	83 fe 03             	cmp    $0x3,%esi
>   10:	74 72                	je     0x84
>   12:	83 fe 01             	cmp    $0x1,%esi
>   15:	74 73                	je     0x8a
>   17:	48 8b 05 45 ec 11 02 	mov    0x211ec45(%rip),%rax        # 0x211ec63
>   1e:	89 d9                	mov    %ebx,%ecx
>   20:	81 e1 ff ff 0f 00    	and    $0xfffff,%ecx
>   26:	48 8d 0c c8          	lea    (%rax,%rcx,8),%rcx
>   2a:*	48 8b 29             	mov    (%rcx),%rbp		<-- trapping instruction
>   2d:	48 85 ed             	test   %rbp,%rbp
>   30:	75 12                	jne    0x44
>   32:	e9 9f 00 00 00       	jmp    0xd6
>   37:	48 8b 6d 00          	mov    0x0(%rbp),%rbp
>   3b:	48 85 ed             	test   %rbp,%rbp
>   3e:	0f                   	.byte 0xf
>   3f:	84                   	.byte 0x84
> 
> Code starting with the faulting instruction
> ===========================================
>    0:	48 8b 29             	mov    (%rcx),%rbp
>    3:	48 85 ed             	test   %rbp,%rbp
>    6:	75 12                	jne    0x1a
>    8:	e9 9f 00 00 00       	jmp    0xac
>    d:	48 8b 6d 00          	mov    0x0(%rbp),%rbp
>   11:	48 85 ed             	test   %rbp,%rbp
>   14:	0f                   	.byte 0xf
>   15:	84                   	.byte 0x84
> RSP: 0000:ffffb3f700027b78 EFLAGS: 00010206
> RAX: 0000000000000000 RBX: 000000004ea3cdf0 RCX: 00000000001e6f80
> RDX: 000000000000000d RSI: 0000000000000002 RDI: 00000000793ec676
> RBP: ffff8b578094f4d0 R08: 0000000043abc8c3 R09: 000000000000000d
> R10: 0000000000000015 R11: 000000000000001c R12: 0000000000000001
> R13: 0000000000000cc0 R14: ffffb3f700027bd8 R15: 000000000000000d
> FS:  0000000000000000(0000) GS:ffff8b5845c80000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000001e6f80 CR3: 0000000199410000 CR4: 00000000000006e0

regards,
Eric Dumazet Dec. 15, 2021, 10:38 a.m. UTC | #7
On Wed, Dec 15, 2021 at 2:18 AM Jiri Slaby <jirislaby@gmail.com> wrote:
>
> On 05. 12. 21, 5:21, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > It can be hard to track where references are taken and released.
> >
> > In networking, we have annoying issues at device or netns dismantles,
> > and we had various proposals to ease root causing them.
> ...
> > --- a/lib/Kconfig
> > +++ b/lib/Kconfig
> > @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
> >        Select the hash size as a power of 2 for the stackdepot hash table.
> >        Choose a lower value to reduce the memory impact.
> >
> > +config REF_TRACKER
> > +     bool
> > +     depends on STACKTRACE_SUPPORT
> > +     select STACKDEPOT
>
> Hi,
>
> I have to:
> +       select STACKDEPOT_ALWAYS_INIT
> here. Otherwise I see this during boot:
>

Thanks, I am adding Vlastimil Babka to the CC

This stuff has been added in
commit e88cc9f5e2e7a5d28a1adf12615840fab4cbebfd
Author: Vlastimil Babka <vbabka@suse.cz>
Date:   Tue Dec 14 21:50:42 2021 +0000

    lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()



> > BUG: unable to handle page fault for address: 00000000001e6f80
> > #PF: supervisor read access in kernel mode
> > #PF: error_code(0x0000) - not-present page
> > PGD 0 P4D 0
> > Oops: 0000 [#1] PREEMPT SMP PTI
> > CPU: 1 PID: 1 Comm: swapper/0 Tainted: G          I       5.16.0-rc5-next-20211214-vanilla+ #46 2756e36611a8c8a8271884ae04571fc88e1cb566
> > Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
> > RIP: 0010:__stack_depot_save (lib/stackdepot.c:373)
> > Code: 04 31 fb 83 fe 03 77 97 83 fe 02 74 7a 83 fe 03 74 72 83 fe 01 74 73 48 8b 05 45 ec 11 02 89 d9 81 e1 ff ff 0f 00 48 8d 0c c8 <48> 8b 29 48 85 ed 75 12 e9 9f 00 00 00 48 8b 6d 00 48 85 ed 0f 84
> > All code
> > ========
> >    0: 04 31                   add    $0x31,%al
> >    2: fb                      sti
> >    3: 83 fe 03                cmp    $0x3,%esi
> >    6: 77 97                   ja     0xffffffffffffff9f
> >    8: 83 fe 02                cmp    $0x2,%esi
> >    b: 74 7a                   je     0x87
> >    d: 83 fe 03                cmp    $0x3,%esi
> >   10: 74 72                   je     0x84
> >   12: 83 fe 01                cmp    $0x1,%esi
> >   15: 74 73                   je     0x8a
> >   17: 48 8b 05 45 ec 11 02    mov    0x211ec45(%rip),%rax        # 0x211ec63
> >   1e: 89 d9                   mov    %ebx,%ecx
> >   20: 81 e1 ff ff 0f 00       and    $0xfffff,%ecx
> >   26: 48 8d 0c c8             lea    (%rax,%rcx,8),%rcx
> >   2a:*        48 8b 29                mov    (%rcx),%rbp              <-- trapping instruction
> >   2d: 48 85 ed                test   %rbp,%rbp
> >   30: 75 12                   jne    0x44
> >   32: e9 9f 00 00 00          jmp    0xd6
> >   37: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> >   3b: 48 85 ed                test   %rbp,%rbp
> >   3e: 0f                      .byte 0xf
> >   3f: 84                      .byte 0x84
> >
> > Code starting with the faulting instruction
> > ===========================================
> >    0: 48 8b 29                mov    (%rcx),%rbp
> >    3: 48 85 ed                test   %rbp,%rbp
> >    6: 75 12                   jne    0x1a
> >    8: e9 9f 00 00 00          jmp    0xac
> >    d: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> >   11: 48 85 ed                test   %rbp,%rbp
> >   14: 0f                      .byte 0xf
> >   15: 84                      .byte 0x84
> > RSP: 0000:ffffb3f700027b78 EFLAGS: 00010206
> > RAX: 0000000000000000 RBX: 000000004ea3cdf0 RCX: 00000000001e6f80
> > RDX: 000000000000000d RSI: 0000000000000002 RDI: 00000000793ec676
> > RBP: ffff8b578094f4d0 R08: 0000000043abc8c3 R09: 000000000000000d
> > R10: 0000000000000015 R11: 000000000000001c R12: 0000000000000001
> > R13: 0000000000000cc0 R14: ffffb3f700027bd8 R15: 000000000000000d
> > FS:  0000000000000000(0000) GS:ffff8b5845c80000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 00000000001e6f80 CR3: 0000000199410000 CR4: 00000000000006e0
> > Call Trace:
> > <TASK>
> > ref_tracker_alloc (lib/ref_tracker.c:84)
> > net_rx_queue_update_kobjects (net/core/net-sysfs.c:1049 net/core/net-sysfs.c:1101)
> > netdev_register_kobject (net/core/net-sysfs.c:1761 net/core/net-sysfs.c:2012)
> > register_netdevice (net/core/dev.c:9660)
> > register_netdev (net/core/dev.c:9784)
> > loopback_net_init (drivers/net/loopback.c:217)
> > ops_init (net/core/net_namespace.c:140)
> > register_pernet_operations (net/core/net_namespace.c:1148 net/core/net_namespace.c:1217)
> > register_pernet_device (net/core/net_namespace.c:1304)
> > net_dev_init (net/core/dev.c:11014)
> > ? sysctl_core_init (net/core/dev.c:10958)
> > do_one_initcall (init/main.c:1303)
> > kernel_init_freeable (init/main.c:1377 init/main.c:1394 init/main.c:1413 init/main.c:1618)
> > ? rest_init (init/main.c:1499)
> > kernel_init (init/main.c:1509)
> > ret_from_fork (arch/x86/entry/entry_64.S:301)
> > </TASK>
> > Modules linked in:
> > CR2: 00000000001e6f80
> > ---[ end trace 0000000000000000 ]---
> > RIP: 0010:__stack_depot_save (lib/stackdepot.c:373)
> > Code: 04 31 fb 83 fe 03 77 97 83 fe 02 74 7a 83 fe 03 74 72 83 fe 01 74 73 48 8b 05 45 ec 11 02 89 d9 81 e1 ff ff 0f 00 48 8d 0c c8 <48> 8b 29 48 85 ed 75 12 e9 9f 00 00 00 48 8b 6d 00 48 85 ed 0f 84
> > All code
> > ========
> >    0: 04 31                   add    $0x31,%al
> >    2: fb                      sti
> >    3: 83 fe 03                cmp    $0x3,%esi
> >    6: 77 97                   ja     0xffffffffffffff9f
> >    8: 83 fe 02                cmp    $0x2,%esi
> >    b: 74 7a                   je     0x87
> >    d: 83 fe 03                cmp    $0x3,%esi
> >   10: 74 72                   je     0x84
> >   12: 83 fe 01                cmp    $0x1,%esi
> >   15: 74 73                   je     0x8a
> >   17: 48 8b 05 45 ec 11 02    mov    0x211ec45(%rip),%rax        # 0x211ec63
> >   1e: 89 d9                   mov    %ebx,%ecx
> >   20: 81 e1 ff ff 0f 00       and    $0xfffff,%ecx
> >   26: 48 8d 0c c8             lea    (%rax,%rcx,8),%rcx
> >   2a:*        48 8b 29                mov    (%rcx),%rbp              <-- trapping instruction
> >   2d: 48 85 ed                test   %rbp,%rbp
> >   30: 75 12                   jne    0x44
> >   32: e9 9f 00 00 00          jmp    0xd6
> >   37: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> >   3b: 48 85 ed                test   %rbp,%rbp
> >   3e: 0f                      .byte 0xf
> >   3f: 84                      .byte 0x84
> >
> > Code starting with the faulting instruction
> > ===========================================
> >    0: 48 8b 29                mov    (%rcx),%rbp
> >    3: 48 85 ed                test   %rbp,%rbp
> >    6: 75 12                   jne    0x1a
> >    8: e9 9f 00 00 00          jmp    0xac
> >    d: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> >   11: 48 85 ed                test   %rbp,%rbp
> >   14: 0f                      .byte 0xf
> >   15: 84                      .byte 0x84
> > RSP: 0000:ffffb3f700027b78 EFLAGS: 00010206
> > RAX: 0000000000000000 RBX: 000000004ea3cdf0 RCX: 00000000001e6f80
> > RDX: 000000000000000d RSI: 0000000000000002 RDI: 00000000793ec676
> > RBP: ffff8b578094f4d0 R08: 0000000043abc8c3 R09: 000000000000000d
> > R10: 0000000000000015 R11: 000000000000001c R12: 0000000000000001
> > R13: 0000000000000cc0 R14: ffffb3f700027bd8 R15: 000000000000000d
> > FS:  0000000000000000(0000) GS:ffff8b5845c80000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 00000000001e6f80 CR3: 0000000199410000 CR4: 00000000000006e0
>
> regards,
> --
> js
Eric Dumazet Dec. 15, 2021, 10:41 a.m. UTC | #8
On Wed, Dec 15, 2021 at 2:38 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Wed, Dec 15, 2021 at 2:18 AM Jiri Slaby <jirislaby@gmail.com> wrote:
> >
> > On 05. 12. 21, 5:21, Eric Dumazet wrote:
> > > From: Eric Dumazet <edumazet@google.com>
> > >
> > > It can be hard to track where references are taken and released.
> > >
> > > In networking, we have annoying issues at device or netns dismantles,
> > > and we had various proposals to ease root causing them.
> > ...
> > > --- a/lib/Kconfig
> > > +++ b/lib/Kconfig
> > > @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
> > >        Select the hash size as a power of 2 for the stackdepot hash table.
> > >        Choose a lower value to reduce the memory impact.
> > >
> > > +config REF_TRACKER
> > > +     bool
> > > +     depends on STACKTRACE_SUPPORT
> > > +     select STACKDEPOT
> >
> > Hi,
> >
> > I have to:
> > +       select STACKDEPOT_ALWAYS_INIT
> > here. Otherwise I see this during boot:
> >
>
> Thanks, I am adding Vlastimil Babka to the CC
>
> This stuff has been added in
> commit e88cc9f5e2e7a5d28a1adf12615840fab4cbebfd
> Author: Vlastimil Babka <vbabka@suse.cz>
> Date:   Tue Dec 14 21:50:42 2021 +0000
>
>     lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()
>
>

(This is a problem because this patch is not yet in net-next, so I really do
not know how this issue should be handled)

>
> > > BUG: unable to handle page fault for address: 00000000001e6f80
> > > #PF: supervisor read access in kernel mode
> > > #PF: error_code(0x0000) - not-present page
> > > PGD 0 P4D 0
> > > Oops: 0000 [#1] PREEMPT SMP PTI
> > > CPU: 1 PID: 1 Comm: swapper/0 Tainted: G          I       5.16.0-rc5-next-20211214-vanilla+ #46 2756e36611a8c8a8271884ae04571fc88e1cb566
> > > Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
> > > RIP: 0010:__stack_depot_save (lib/stackdepot.c:373)
> > > Code: 04 31 fb 83 fe 03 77 97 83 fe 02 74 7a 83 fe 03 74 72 83 fe 01 74 73 48 8b 05 45 ec 11 02 89 d9 81 e1 ff ff 0f 00 48 8d 0c c8 <48> 8b 29 48 85 ed 75 12 e9 9f 00 00 00 48 8b 6d 00 48 85 ed 0f 84
> > > All code
> > > ========
> > >    0: 04 31                   add    $0x31,%al
> > >    2: fb                      sti
> > >    3: 83 fe 03                cmp    $0x3,%esi
> > >    6: 77 97                   ja     0xffffffffffffff9f
> > >    8: 83 fe 02                cmp    $0x2,%esi
> > >    b: 74 7a                   je     0x87
> > >    d: 83 fe 03                cmp    $0x3,%esi
> > >   10: 74 72                   je     0x84
> > >   12: 83 fe 01                cmp    $0x1,%esi
> > >   15: 74 73                   je     0x8a
> > >   17: 48 8b 05 45 ec 11 02    mov    0x211ec45(%rip),%rax        # 0x211ec63
> > >   1e: 89 d9                   mov    %ebx,%ecx
> > >   20: 81 e1 ff ff 0f 00       and    $0xfffff,%ecx
> > >   26: 48 8d 0c c8             lea    (%rax,%rcx,8),%rcx
> > >   2a:*        48 8b 29                mov    (%rcx),%rbp              <-- trapping instruction
> > >   2d: 48 85 ed                test   %rbp,%rbp
> > >   30: 75 12                   jne    0x44
> > >   32: e9 9f 00 00 00          jmp    0xd6
> > >   37: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> > >   3b: 48 85 ed                test   %rbp,%rbp
> > >   3e: 0f                      .byte 0xf
> > >   3f: 84                      .byte 0x84
> > >
> > > Code starting with the faulting instruction
> > > ===========================================
> > >    0: 48 8b 29                mov    (%rcx),%rbp
> > >    3: 48 85 ed                test   %rbp,%rbp
> > >    6: 75 12                   jne    0x1a
> > >    8: e9 9f 00 00 00          jmp    0xac
> > >    d: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> > >   11: 48 85 ed                test   %rbp,%rbp
> > >   14: 0f                      .byte 0xf
> > >   15: 84                      .byte 0x84
> > > RSP: 0000:ffffb3f700027b78 EFLAGS: 00010206
> > > RAX: 0000000000000000 RBX: 000000004ea3cdf0 RCX: 00000000001e6f80
> > > RDX: 000000000000000d RSI: 0000000000000002 RDI: 00000000793ec676
> > > RBP: ffff8b578094f4d0 R08: 0000000043abc8c3 R09: 000000000000000d
> > > R10: 0000000000000015 R11: 000000000000001c R12: 0000000000000001
> > > R13: 0000000000000cc0 R14: ffffb3f700027bd8 R15: 000000000000000d
> > > FS:  0000000000000000(0000) GS:ffff8b5845c80000(0000) knlGS:0000000000000000
> > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > CR2: 00000000001e6f80 CR3: 0000000199410000 CR4: 00000000000006e0
> > > Call Trace:
> > > <TASK>
> > > ref_tracker_alloc (lib/ref_tracker.c:84)
> > > net_rx_queue_update_kobjects (net/core/net-sysfs.c:1049 net/core/net-sysfs.c:1101)
> > > netdev_register_kobject (net/core/net-sysfs.c:1761 net/core/net-sysfs.c:2012)
> > > register_netdevice (net/core/dev.c:9660)
> > > register_netdev (net/core/dev.c:9784)
> > > loopback_net_init (drivers/net/loopback.c:217)
> > > ops_init (net/core/net_namespace.c:140)
> > > register_pernet_operations (net/core/net_namespace.c:1148 net/core/net_namespace.c:1217)
> > > register_pernet_device (net/core/net_namespace.c:1304)
> > > net_dev_init (net/core/dev.c:11014)
> > > ? sysctl_core_init (net/core/dev.c:10958)
> > > do_one_initcall (init/main.c:1303)
> > > kernel_init_freeable (init/main.c:1377 init/main.c:1394 init/main.c:1413 init/main.c:1618)
> > > ? rest_init (init/main.c:1499)
> > > kernel_init (init/main.c:1509)
> > > ret_from_fork (arch/x86/entry/entry_64.S:301)
> > > </TASK>
> > > Modules linked in:
> > > CR2: 00000000001e6f80
> > > ---[ end trace 0000000000000000 ]---
> > > RIP: 0010:__stack_depot_save (lib/stackdepot.c:373)
> > > Code: 04 31 fb 83 fe 03 77 97 83 fe 02 74 7a 83 fe 03 74 72 83 fe 01 74 73 48 8b 05 45 ec 11 02 89 d9 81 e1 ff ff 0f 00 48 8d 0c c8 <48> 8b 29 48 85 ed 75 12 e9 9f 00 00 00 48 8b 6d 00 48 85 ed 0f 84
> > > All code
> > > ========
> > >    0: 04 31                   add    $0x31,%al
> > >    2: fb                      sti
> > >    3: 83 fe 03                cmp    $0x3,%esi
> > >    6: 77 97                   ja     0xffffffffffffff9f
> > >    8: 83 fe 02                cmp    $0x2,%esi
> > >    b: 74 7a                   je     0x87
> > >    d: 83 fe 03                cmp    $0x3,%esi
> > >   10: 74 72                   je     0x84
> > >   12: 83 fe 01                cmp    $0x1,%esi
> > >   15: 74 73                   je     0x8a
> > >   17: 48 8b 05 45 ec 11 02    mov    0x211ec45(%rip),%rax        # 0x211ec63
> > >   1e: 89 d9                   mov    %ebx,%ecx
> > >   20: 81 e1 ff ff 0f 00       and    $0xfffff,%ecx
> > >   26: 48 8d 0c c8             lea    (%rax,%rcx,8),%rcx
> > >   2a:*        48 8b 29                mov    (%rcx),%rbp              <-- trapping instruction
> > >   2d: 48 85 ed                test   %rbp,%rbp
> > >   30: 75 12                   jne    0x44
> > >   32: e9 9f 00 00 00          jmp    0xd6
> > >   37: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> > >   3b: 48 85 ed                test   %rbp,%rbp
> > >   3e: 0f                      .byte 0xf
> > >   3f: 84                      .byte 0x84
> > >
> > > Code starting with the faulting instruction
> > > ===========================================
> > >    0: 48 8b 29                mov    (%rcx),%rbp
> > >    3: 48 85 ed                test   %rbp,%rbp
> > >    6: 75 12                   jne    0x1a
> > >    8: e9 9f 00 00 00          jmp    0xac
> > >    d: 48 8b 6d 00             mov    0x0(%rbp),%rbp
> > >   11: 48 85 ed                test   %rbp,%rbp
> > >   14: 0f                      .byte 0xf
> > >   15: 84                      .byte 0x84
> > > RSP: 0000:ffffb3f700027b78 EFLAGS: 00010206
> > > RAX: 0000000000000000 RBX: 000000004ea3cdf0 RCX: 00000000001e6f80
> > > RDX: 000000000000000d RSI: 0000000000000002 RDI: 00000000793ec676
> > > RBP: ffff8b578094f4d0 R08: 0000000043abc8c3 R09: 000000000000000d
> > > R10: 0000000000000015 R11: 000000000000001c R12: 0000000000000001
> > > R13: 0000000000000cc0 R14: ffffb3f700027bd8 R15: 000000000000000d
> > > FS:  0000000000000000(0000) GS:ffff8b5845c80000(0000) knlGS:0000000000000000
> > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > CR2: 00000000001e6f80 CR3: 0000000199410000 CR4: 00000000000006e0
> >
> > regards,
> > --
> > js
Vlastimil Babka Dec. 15, 2021, 10:57 a.m. UTC | #9
On 12/15/21 11:41, Eric Dumazet wrote:
> On Wed, Dec 15, 2021 at 2:38 AM Eric Dumazet <edumazet@google.com> wrote:
>>
>> On Wed, Dec 15, 2021 at 2:18 AM Jiri Slaby <jirislaby@gmail.com> wrote:
>> >
>> > On 05. 12. 21, 5:21, Eric Dumazet wrote:
>> > > From: Eric Dumazet <edumazet@google.com>
>> > >
>> > > It can be hard to track where references are taken and released.
>> > >
>> > > In networking, we have annoying issues at device or netns dismantles,
>> > > and we had various proposals to ease root causing them.
>> > ...
>> > > --- a/lib/Kconfig
>> > > +++ b/lib/Kconfig
>> > > @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
>> > >        Select the hash size as a power of 2 for the stackdepot hash table.
>> > >        Choose a lower value to reduce the memory impact.
>> > >
>> > > +config REF_TRACKER
>> > > +     bool
>> > > +     depends on STACKTRACE_SUPPORT
>> > > +     select STACKDEPOT
>> >
>> > Hi,
>> >
>> > I have to:
>> > +       select STACKDEPOT_ALWAYS_INIT
>> > here. Otherwise I see this during boot:
>> >
>>
>> Thanks, I am adding Vlastimil Babka to the CC
>>
>> This stuff has been added in
>> commit e88cc9f5e2e7a5d28a1adf12615840fab4cbebfd
>> Author: Vlastimil Babka <vbabka@suse.cz>
>> Date:   Tue Dec 14 21:50:42 2021 +0000
>>
>>     lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()
>>
>>
> 
> (This is a problem because this patch is not yet in net-next, so I really do
> not know how this issue should be handled)

Looks like multiple new users of stackdepot start appearing as soon as I
touch it :)

The way we solved this with a new DRM user was Andrew adding a fixup to my
patch referenced above, in his "after-next" section of mm tree.
Should work here as well.

----8<----
From 0fa1f25925c05f8c5c4f776913d84904fb4c03a1 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 15 Dec 2021 11:52:10 +0100
Subject: [PATCH] lib/stackdepot: allow optional init and stack_table
 allocation by kvmalloc() - fixup4

Due to 4e66934eaadc ("lib: add reference counting tracking infrastructure")
landing recently to net-next adding a new stack depot user in lib/ref_tracker.c
we need to add an appropriate call to stack_depot_init() there as well.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/ref_tracker.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index c11c9db5825c..60f3453be23e 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -4,6 +4,7 @@
 #include <linux/refcount.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/stackdepot.h>
 
 struct ref_tracker;
 
@@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 	spin_lock_init(&dir->lock);
 	dir->quarantine_avail = quarantine_count;
 	refcount_set(&dir->untracked, 1);
+	stack_depot_init();
 }
 
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
Eric Dumazet Dec. 15, 2021, 11:08 a.m. UTC | #10
On Wed, Dec 15, 2021 at 2:57 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
>
> On 12/15/21 11:41, Eric Dumazet wrote:
> > On Wed, Dec 15, 2021 at 2:38 AM Eric Dumazet <edumazet@google.com> wrote:
> >>
> >> On Wed, Dec 15, 2021 at 2:18 AM Jiri Slaby <jirislaby@gmail.com> wrote:
> >> >
> >> > On 05. 12. 21, 5:21, Eric Dumazet wrote:
> >> > > From: Eric Dumazet <edumazet@google.com>
> >> > >
> >> > > It can be hard to track where references are taken and released.
> >> > >
> >> > > In networking, we have annoying issues at device or netns dismantles,
> >> > > and we had various proposals to ease root causing them.
> >> > ...
> >> > > --- a/lib/Kconfig
> >> > > +++ b/lib/Kconfig
> >> > > @@ -680,6 +680,11 @@ config STACK_HASH_ORDER
> >> > >        Select the hash size as a power of 2 for the stackdepot hash table.
> >> > >        Choose a lower value to reduce the memory impact.
> >> > >
> >> > > +config REF_TRACKER
> >> > > +     bool
> >> > > +     depends on STACKTRACE_SUPPORT
> >> > > +     select STACKDEPOT
> >> >
> >> > Hi,
> >> >
> >> > I have to:
> >> > +       select STACKDEPOT_ALWAYS_INIT
> >> > here. Otherwise I see this during boot:
> >> >
> >>
> >> Thanks, I am adding Vlastimil Babka to the CC
> >>
> >> This stuff has been added in
> >> commit e88cc9f5e2e7a5d28a1adf12615840fab4cbebfd
> >> Author: Vlastimil Babka <vbabka@suse.cz>
> >> Date:   Tue Dec 14 21:50:42 2021 +0000
> >>
> >>     lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()
> >>
> >>
> >
> > (This is a problem because this patch is not yet in net-next, so I really do
> > not know how this issue should be handled)
>
> Looks like multiple new users of stackdepot start appearing as soon as I
> touch it :)
>
> The way we solved this with a new DRM user was Andrew adding a fixup to my
> patch referenced above, in his "after-next" section of mm tree.
> Should work here as well.
>
> ----8<----
> From 0fa1f25925c05f8c5c4f776913d84904fb4c03a1 Mon Sep 17 00:00:00 2001
> From: Vlastimil Babka <vbabka@suse.cz>
> Date: Wed, 15 Dec 2021 11:52:10 +0100
> Subject: [PATCH] lib/stackdepot: allow optional init and stack_table
>  allocation by kvmalloc() - fixup4
>
> Due to 4e66934eaadc ("lib: add reference counting tracking infrastructure")
> landing recently to net-next adding a new stack depot user in lib/ref_tracker.c
> we need to add an appropriate call to stack_depot_init() there as well.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---

I guess this minimal fix will do.

In the future, when net-next (or net tree) has everything in place,
I will probably un-inline ref_tracker_dir_init() to avoid pulling
all these includes...

Reviewed-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jiri Slab <jirislaby@gmail.com>
Jiri Slaby Dec. 15, 2021, 11:09 a.m. UTC | #11
On 15. 12. 21, 12:08, Eric Dumazet wrote:
> Reported-by: Jiri Slab <jirislaby@gmail.com>

(I am not the allocator :P.)
Eric Dumazet Dec. 15, 2021, 11:25 a.m. UTC | #12
On Wed, Dec 15, 2021 at 3:09 AM Jiri Slaby <jirislaby@gmail.com> wrote:
>
> On 15. 12. 21, 12:08, Eric Dumazet wrote:
> > Reported-by: Jiri Slab <jirislaby@gmail.com>
>
> (I am not the allocator :P.)

Ah, it took me a while to understand, sorry for the typo ;)
diff mbox series

Patch

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
new file mode 100644
index 0000000000000000000000000000000000000000..c11c9db5825cf933acf529c83db441a818135f29
--- /dev/null
+++ b/include/linux/ref_tracker.h
@@ -0,0 +1,73 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+#ifndef _LINUX_REF_TRACKER_H
+#define _LINUX_REF_TRACKER_H
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+struct ref_tracker;
+
+struct ref_tracker_dir {
+#ifdef CONFIG_REF_TRACKER
+	spinlock_t		lock;
+	unsigned int		quarantine_avail;
+	refcount_t		untracked;
+	struct list_head	list; /* List of active trackers */
+	struct list_head	quarantine; /* List of dead trackers */
+#endif
+};
+
+#ifdef CONFIG_REF_TRACKER
+static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count)
+{
+	INIT_LIST_HEAD(&dir->list);
+	INIT_LIST_HEAD(&dir->quarantine);
+	spin_lock_init(&dir->lock);
+	dir->quarantine_avail = quarantine_count;
+	refcount_set(&dir->untracked, 1);
+}
+
+void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit);
+
+int ref_tracker_alloc(struct ref_tracker_dir *dir,
+		      struct ref_tracker **trackerp, gfp_t gfp);
+
+int ref_tracker_free(struct ref_tracker_dir *dir,
+		     struct ref_tracker **trackerp);
+
+#else /* CONFIG_REF_TRACKER */
+
+static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count)
+{
+}
+
+static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
+{
+}
+
+static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+					 unsigned int display_limit)
+{
+}
+
+static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
+				    struct ref_tracker **trackerp,
+				    gfp_t gfp)
+{
+	return 0;
+}
+
+static inline int ref_tracker_free(struct ref_tracker_dir *dir,
+				   struct ref_tracker **trackerp)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* _LINUX_REF_TRACKER_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5e7165e6a346c9bec878b78c8c8c3d175fc98dfd..655b0e43f260bfca63240794191e3f1890b2e801 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -680,6 +680,11 @@  config STACK_HASH_ORDER
 	 Select the hash size as a power of 2 for the stackdepot hash table.
 	 Choose a lower value to reduce the memory impact.
 
+config REF_TRACKER
+	bool
+	depends on STACKTRACE_SUPPORT
+	select STACKDEPOT
+
 config SBITMAP
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 364c23f1557816f73aebd8304c01224a4846ac6c..c1fd9243ddb9cc1ac5252d7eb8009f9290782c4a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -270,6 +270,8 @@  obj-$(CONFIG_STACKDEPOT) += stackdepot.o
 KASAN_SANITIZE_stackdepot.o := n
 KCOV_INSTRUMENT_stackdepot.o := n
 
+obj-$(CONFIG_REF_TRACKER) += ref_tracker.o
+
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
 	       fdt_empty_tree.o fdt_addresses.o
 $(foreach file, $(libfdt_files), \
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
new file mode 100644
index 0000000000000000000000000000000000000000..0ae2e66dcf0fdb976f4cb99e747c9448b37f22cc
--- /dev/null
+++ b/lib/ref_tracker.c
@@ -0,0 +1,140 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/export.h>
+#include <linux/ref_tracker.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+
+#define REF_TRACKER_STACK_ENTRIES 16
+
+struct ref_tracker {
+	struct list_head	head;   /* anchor into dir->list or dir->quarantine */
+	bool			dead;
+	depot_stack_handle_t	alloc_stack_handle;
+	depot_stack_handle_t	free_stack_handle;
+};
+
+void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
+{
+	struct ref_tracker *tracker, *n;
+	unsigned long flags;
+	bool leak = false;
+
+	spin_lock_irqsave(&dir->lock, flags);
+	list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
+		list_del(&tracker->head);
+		kfree(tracker);
+		dir->quarantine_avail++;
+	}
+	list_for_each_entry_safe(tracker, n, &dir->list, head) {
+		pr_err("leaked reference.\n");
+		if (tracker->alloc_stack_handle)
+			stack_depot_print(tracker->alloc_stack_handle);
+		leak = true;
+		list_del(&tracker->head);
+		kfree(tracker);
+	}
+	spin_unlock_irqrestore(&dir->lock, flags);
+	WARN_ON_ONCE(leak);
+	WARN_ON_ONCE(refcount_read(&dir->untracked) != 1);
+}
+EXPORT_SYMBOL(ref_tracker_dir_exit);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit)
+{
+	struct ref_tracker *tracker;
+	unsigned long flags;
+	unsigned int i = 0;
+
+	spin_lock_irqsave(&dir->lock, flags);
+	list_for_each_entry(tracker, &dir->list, head) {
+		if (i < display_limit) {
+			pr_err("leaked reference.\n");
+			if (tracker->alloc_stack_handle)
+				stack_depot_print(tracker->alloc_stack_handle);
+			i++;
+		} else {
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&dir->lock, flags);
+}
+EXPORT_SYMBOL(ref_tracker_dir_print);
+
+int ref_tracker_alloc(struct ref_tracker_dir *dir,
+		      struct ref_tracker **trackerp,
+		      gfp_t gfp)
+{
+	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
+	struct ref_tracker *tracker;
+	unsigned int nr_entries;
+	unsigned long flags;
+
+	*trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL);
+	if (unlikely(!tracker)) {
+		pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
+		refcount_inc(&dir->untracked);
+		return -ENOMEM;
+	}
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+	nr_entries = filter_irq_stacks(entries, nr_entries);
+	tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);
+
+	spin_lock_irqsave(&dir->lock, flags);
+	list_add(&tracker->head, &dir->list);
+	spin_unlock_irqrestore(&dir->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ref_tracker_alloc);
+
+int ref_tracker_free(struct ref_tracker_dir *dir,
+		     struct ref_tracker **trackerp)
+{
+	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
+	struct ref_tracker *tracker = *trackerp;
+	depot_stack_handle_t stack_handle;
+	unsigned int nr_entries;
+	unsigned long flags;
+
+	if (!tracker) {
+		refcount_dec(&dir->untracked);
+		return -EEXIST;
+	}
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+	nr_entries = filter_irq_stacks(entries, nr_entries);
+	stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
+
+	spin_lock_irqsave(&dir->lock, flags);
+	if (tracker->dead) {
+		pr_err("reference already released.\n");
+		if (tracker->alloc_stack_handle) {
+			pr_err("allocated in:\n");
+			stack_depot_print(tracker->alloc_stack_handle);
+		}
+		if (tracker->free_stack_handle) {
+			pr_err("freed in:\n");
+			stack_depot_print(tracker->free_stack_handle);
+		}
+		spin_unlock_irqrestore(&dir->lock, flags);
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+	tracker->dead = true;
+
+	tracker->free_stack_handle = stack_handle;
+
+	list_move_tail(&tracker->head, &dir->quarantine);
+	if (!dir->quarantine_avail) {
+		tracker = list_first_entry(&dir->quarantine, struct ref_tracker, head);
+		list_del(&tracker->head);
+	} else {
+		dir->quarantine_avail--;
+		tracker = NULL;
+	}
+	spin_unlock_irqrestore(&dir->lock, flags);
+
+	kfree(tracker);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ref_tracker_free);