diff mbox series

lib/stackdepot: Use page allocator if both slab and memblock is unavailable

Message ID YhrrM7NTYXG5JluY@ip-172-31-19-208.ap-northeast-1.compute.internal (mailing list archive)
State New
Headers show
Series lib/stackdepot: Use page allocator if both slab and memblock is unavailable | expand

Commit Message

Hyeonggon Yoo Feb. 27, 2022, 3:08 a.m. UTC
After commit 2dba5eb1c73b ("lib/stackdepot: allow optional init and
stack_table allocation by kvmalloc()"), stack_depot_init() is called
later if CONFIG_STACKDEPOT_ALWAYS_INIT=n to remove unnecessary memory
usage. It allocates stack_table using memblock_alloc() or kvmalloc()
depending on availability of slab allocator.

But when stack_depot_init() is called while creating boot slab caches,
both slab allocator and memblock is not available. So kernel crashes.
Allocate stack_table from page allocator when both slab allocator and
memblock is unavailable.

Limit size of stack_table when using page allocator because vmalloc()
is also unavailable in kmem_cache_init(). it must not be larger than
(PAGE_SIZE << (MAX_ORDER - 1)).

This patch was tested on both CONFIG_STACKDEPOT_ALWAYS_INIT=y and n.

Fixes: 2dba5eb1c73b ("lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()")
Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 lib/stackdepot.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

Comments

kernel test robot Feb. 27, 2022, 5:06 a.m. UTC | #1
Hi Hyeonggon,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.17-rc5 next-20220225]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Hyeonggon-Yoo/lib-stackdepot-Use-page-allocator-if-both-slab-and-memblock-is-unavailable/20220227-111029
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2293be58d6a18cab800e25e42081bacb75c05752
config: hexagon-randconfig-r005-20220227 (https://download.01.org/0day-ci/archive/20220227/202202271324.VLaJlMYW-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d271fc04d5b97b12e6b797c6067d3c96a8d7470e)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/fd37f88eccc357002cc03a6a5fac60fb42552bc7
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Hyeonggon-Yoo/lib-stackdepot-Use-page-allocator-if-both-slab-and-memblock-is-unavailable/20220227-111029
        git checkout fd37f88eccc357002cc03a6a5fac60fb42552bc7
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> lib/stackdepot.c:187:11: warning: comparison of distinct pointer types ('typeof (size) *' (aka 'unsigned int *') and 'typeof ((1UL << 14) << (11 - 1)) *' (aka 'unsigned long *')) [-Wcompare-distinct-pointer-types]
                           size = min(size, PAGE_SIZE << (MAX_ORDER - 1));
                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/minmax.h:45:19: note: expanded from macro 'min'
   #define min(x, y)       __careful_cmp(x, y, <)
                           ^~~~~~~~~~~~~~~~~~~~~~
   include/linux/minmax.h:36:24: note: expanded from macro '__careful_cmp'
           __builtin_choose_expr(__safe_cmp(x, y), \
                                 ^~~~~~~~~~~~~~~~
   include/linux/minmax.h:26:4: note: expanded from macro '__safe_cmp'
                   (__typecheck(x, y) && __no_side_effects(x, y))
                    ^~~~~~~~~~~~~~~~~
   include/linux/minmax.h:20:28: note: expanded from macro '__typecheck'
           (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
                      ~~~~~~~~~~~~~~ ^  ~~~~~~~~~~~~~~
   1 warning generated.


vim +187 lib/stackdepot.c

   168	
   169	/*
   170	 * __ref because of memblock_alloc(), which will not be actually called after
   171	 * the __init code is gone, because at that point slab_is_available() is true
   172	 */
   173	__ref int stack_depot_init(void)
   174	{
   175		static DEFINE_MUTEX(stack_depot_init_mutex);
   176	
   177		mutex_lock(&stack_depot_init_mutex);
   178		if (!stack_depot_disable && !stack_table) {
   179			size_t size = (stack_hash_size * sizeof(struct stack_record *));
   180			int i;
   181	
   182			if (slab_is_available()) {
   183				pr_info("Stack Depot allocating hash table with kvmalloc\n");
   184				stack_table = kvmalloc(size, GFP_KERNEL);
   185			} else if (totalram_pages() > 0) {
   186				/* Reduce size because vmalloc may be unavailable */
 > 187				size = min(size, PAGE_SIZE << (MAX_ORDER - 1));
   188				stack_hash_size = size / sizeof(struct stack_record *);
   189	
   190				pr_info("Stack Depot allocating hash table with __get_free_pages\n");
   191				stack_table = (struct stack_record **)
   192					      __get_free_pages(GFP_KERNEL, get_order(size));
   193			} else {
   194				pr_info("Stack Depot allocating hash table with memblock_alloc\n");
   195				stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
   196			}
   197	
   198			if (stack_table) {
   199				pr_info("Stack Depot hash table size=%u\n", stack_hash_size);
   200				for (i = 0; i < stack_hash_size;  i++)
   201					stack_table[i] = NULL;
   202			} else {
   203				pr_err("Stack Depot hash table allocation failed, disabling\n");
   204				stack_depot_disable = true;
   205				mutex_unlock(&stack_depot_init_mutex);
   206				return -ENOMEM;
   207			}
   208		}
   209		mutex_unlock(&stack_depot_init_mutex);
   210		return 0;
   211	}
   212	EXPORT_SYMBOL_GPL(stack_depot_init);
   213	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Feb. 27, 2022, 10 a.m. UTC | #2
Hi Hyeonggon,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.17-rc5 next-20220225]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Hyeonggon-Yoo/lib-stackdepot-Use-page-allocator-if-both-slab-and-memblock-is-unavailable/20220227-111029
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2293be58d6a18cab800e25e42081bacb75c05752
config: i386-randconfig-s002 (https://download.01.org/0day-ci/archive/20220227/202202271714.D69JHjzb-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.4-dirty
        # https://github.com/0day-ci/linux/commit/fd37f88eccc357002cc03a6a5fac60fb42552bc7
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Hyeonggon-Yoo/lib-stackdepot-Use-page-allocator-if-both-slab-and-memblock-is-unavailable/20220227-111029
        git checkout fd37f88eccc357002cc03a6a5fac60fb42552bc7
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> lib/stackdepot.c:187:32: sparse: sparse: incompatible types in comparison expression (different type sizes):
>> lib/stackdepot.c:187:32: sparse:    unsigned int *
>> lib/stackdepot.c:187:32: sparse:    unsigned long *

vim +187 lib/stackdepot.c

   168	
   169	/*
   170	 * __ref because of memblock_alloc(), which will not be actually called after
   171	 * the __init code is gone, because at that point slab_is_available() is true
   172	 */
   173	__ref int stack_depot_init(void)
   174	{
   175		static DEFINE_MUTEX(stack_depot_init_mutex);
   176	
   177		mutex_lock(&stack_depot_init_mutex);
   178		if (!stack_depot_disable && !stack_table) {
   179			size_t size = (stack_hash_size * sizeof(struct stack_record *));
   180			int i;
   181	
   182			if (slab_is_available()) {
   183				pr_info("Stack Depot allocating hash table with kvmalloc\n");
   184				stack_table = kvmalloc(size, GFP_KERNEL);
   185			} else if (totalram_pages() > 0) {
   186				/* Reduce size because vmalloc may be unavailable */
 > 187				size = min(size, PAGE_SIZE << (MAX_ORDER - 1));
   188				stack_hash_size = size / sizeof(struct stack_record *);
   189	
   190				pr_info("Stack Depot allocating hash table with __get_free_pages\n");
   191				stack_table = (struct stack_record **)
   192					      __get_free_pages(GFP_KERNEL, get_order(size));
   193			} else {
   194				pr_info("Stack Depot allocating hash table with memblock_alloc\n");
   195				stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
   196			}
   197	
   198			if (stack_table) {
   199				pr_info("Stack Depot hash table size=%u\n", stack_hash_size);
   200				for (i = 0; i < stack_hash_size;  i++)
   201					stack_table[i] = NULL;
   202			} else {
   203				pr_err("Stack Depot hash table allocation failed, disabling\n");
   204				stack_depot_disable = true;
   205				mutex_unlock(&stack_depot_init_mutex);
   206				return -ENOMEM;
   207			}
   208		}
   209		mutex_unlock(&stack_depot_init_mutex);
   210		return 0;
   211	}
   212	EXPORT_SYMBOL_GPL(stack_depot_init);
   213	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Marco Elver Feb. 28, 2022, 7 a.m. UTC | #3
On Sun, 27 Feb 2022 at 04:08, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
>
> After commit 2dba5eb1c73b ("lib/stackdepot: allow optional init and
> stack_table allocation by kvmalloc()"), stack_depot_init() is called
> later if CONFIG_STACKDEPOT_ALWAYS_INIT=n to remove unnecessary memory
> usage. It allocates stack_table using memblock_alloc() or kvmalloc()
> depending on availability of slab allocator.
>
> But when stack_depot_init() is called while creating boot slab caches,
> both slab allocator and memblock is not available. So kernel crashes.
> Allocate stack_table from page allocator when both slab allocator and
> memblock is unavailable.

This is odd - who is calling stack_depot_init() while neither slab nor
memblock are available? Do you have a stacktrace?

> Limit size of stack_table when using page allocator because vmalloc()
> is also unavailable in kmem_cache_init(). it must not be larger than
> (PAGE_SIZE << (MAX_ORDER - 1)).
>
> This patch was tested on both CONFIG_STACKDEPOT_ALWAYS_INIT=y and n.
>
> Fixes: 2dba5eb1c73b ("lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()")
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> ---
>  lib/stackdepot.c | 28 +++++++++++++++++++++-------
>  1 file changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index bf5ba9af0500..606f80ae2bf7 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -73,6 +73,14 @@ static int next_slab_inited;
>  static size_t depot_offset;
>  static DEFINE_RAW_SPINLOCK(depot_lock);
>
> +static unsigned int stack_hash_size = (1 << CONFIG_STACK_HASH_ORDER);
> +static inline unsigned int stack_hash_mask(void)
> +{
> +       return stack_hash_size - 1;
> +}
> +
> +#define STACK_HASH_SEED 0x9747b28c
> +
>  static bool init_stack_slab(void **prealloc)
>  {
>         if (!*prealloc)
> @@ -142,10 +150,6 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
>         return stack;
>  }
>
> -#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
> -#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
> -#define STACK_HASH_SEED 0x9747b28c
> -
>  static bool stack_depot_disable;
>  static struct stack_record **stack_table;
>
> @@ -172,18 +176,28 @@ __ref int stack_depot_init(void)
>
>         mutex_lock(&stack_depot_init_mutex);
>         if (!stack_depot_disable && !stack_table) {
> -               size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
> +               size_t size = (stack_hash_size * sizeof(struct stack_record *));
>                 int i;
>
>                 if (slab_is_available()) {
>                         pr_info("Stack Depot allocating hash table with kvmalloc\n");
>                         stack_table = kvmalloc(size, GFP_KERNEL);
> +               } else if (totalram_pages() > 0) {
> +                       /* Reduce size because vmalloc may be unavailable */
> +                       size = min(size, PAGE_SIZE << (MAX_ORDER - 1));
> +                       stack_hash_size = size / sizeof(struct stack_record *);
> +
> +                       pr_info("Stack Depot allocating hash table with __get_free_pages\n");
> +                       stack_table = (struct stack_record **)
> +                                     __get_free_pages(GFP_KERNEL, get_order(size));
>                 } else {
>                         pr_info("Stack Depot allocating hash table with memblock_alloc\n");
>                         stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
>                 }
> +
>                 if (stack_table) {
> -                       for (i = 0; i < STACK_HASH_SIZE;  i++)
> +                       pr_info("Stack Depot hash table size=%u\n", stack_hash_size);
> +                       for (i = 0; i < stack_hash_size;  i++)
>                                 stack_table[i] = NULL;
>                 } else {
>                         pr_err("Stack Depot hash table allocation failed, disabling\n");
> @@ -363,7 +377,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>                 goto fast_exit;
>
>         hash = hash_stack(entries, nr_entries);
> -       bucket = &stack_table[hash & STACK_HASH_MASK];
> +       bucket = &stack_table[hash & stack_hash_mask()];
>
>         /*
>          * Fast path: look the stack trace up without locking.
> --
> 2.33.1
Hyeonggon Yoo Feb. 28, 2022, 10:05 a.m. UTC | #4
On Mon, Feb 28, 2022 at 08:00:00AM +0100, Marco Elver wrote:
> On Sun, 27 Feb 2022 at 04:08, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> >
> > After commit 2dba5eb1c73b ("lib/stackdepot: allow optional init and
> > stack_table allocation by kvmalloc()"), stack_depot_init() is called
> > later if CONFIG_STACKDEPOT_ALWAYS_INIT=n to remove unnecessary memory
> > usage. It allocates stack_table using memblock_alloc() or kvmalloc()
> > depending on availability of slab allocator.
> >
> > But when stack_depot_init() is called while creating boot slab caches,
> > both slab allocator and memblock is not available. So kernel crashes.
> > Allocate stack_table from page allocator when both slab allocator and
> > memblock is unavailable.
> 
> This is odd - who is calling stack_depot_init() while neither slab nor
> memblock are available? 

It's not merged yet - but Oliver's patch (2/5) in his series [1] does:
If user is debugging cache, it calls stack_depot_init() when creating
cache.

> @@ -4221,6 +4220,9 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
>  	s->remote_node_defrag_ratio = 1000;
>  #endif
> 
> +	if (s->flags & SLAB_STORE_USER && IS_ENABLED(CONFIG_STACKDEPOT))
> +		stack_depot_init();
> +

Oliver's patch series enables stack depot when arch supports stacktrace,
to store slab objects' stack traces. (as slub debugging feature.)

Because slub debugging is turned on by default, the commit 2dba5eb1c73b
("lib/stackdepot: allow optional init and stack_table allocation by
kvmalloc()") made stack_depot_init() can be called later.

With Oliver's patch applied, stack_depot_init() can be called in
contexts below:

  1) only memblock available (for kasan)
  2) only buddy available, vmalloc/memblock unavailable (for boot caches)
  3) buddy/slab available, vmalloc/memblock unavailable (vmap_area cache)
  4) buddy/slab/vmalloc available, memblock unavailable (other caches)

SLUB supports enabling debugging for specific cache by passing
slub_debug boot parameter. As slab caches can be created in
various context, stack_depot_init() should consider all contexts above.

Writing this, I realized my patch does not handle case 3).. I'll send v3.

[1] https://lore.kernel.org/linux-mm/YhoakP7Kih%2FYUgiN@ip-172-31-19-208.ap-northeast-1.compute.internal/T/#t
[2] https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/linux.git/log/?h=slub-stackdepot-v1

> Do you have a stacktrace?

Yeah, here:

You can reproduce this on vbabka's slab-stackdepot-v1 branch [2] with
slub_debug=U, and CONFIG_STACKDEPOT_ALWAYS_INIT=n

[    0.000000] Stack Depot allocating hash table with memblock_alloc
[    0.000000] Unable to handle kernel paging request at virtual address ffff000097400000
[    0.000000] Mem abort info:
[    0.000000]   ESR = 0x96000047
[    0.000000]   EC = 0x25: DABT (current EL), IL = 32 bits
[    0.000000]   SET = 0, FnV = 0
[    0.000000]   EA = 0, S1PTW = 0
[    0.000000]   FSC = 0x07: level 3 translation fault
[    0.000000] Data abort info:
[    0.000000]   ISV = 0, ISS = 0x00000047
[    0.000000]   CM = 0, WnR = 1
[    0.000000] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000041719000
[    0.000000] [ffff000097400000] pgd=18000000dcff8003, p4d=18000000dcff8003, pud=18000000dcbfe003, pmd=18000000dcb43003, pte=00680000d7400706
[    0.000000] Internal error: Oops: 96000047 [#1] PREEMPT SMP
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.17.0-rc1-11918-gbf5d03166d75 #51
[    0.000000] Hardware name: linux,dummy-virt (DT)
[    0.000000] pstate: 400000c5 (nZcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    0.000000] pc : __memset+0x16c/0x188
[    0.000000] lr : memblock_alloc_try_nid+0xcc/0xe4
[    0.000000] sp : ffff800009a33cd0
[    0.000000] x29: ffff800009a33cd0 x28: 0000000041720018 x27: ffff800009362640
[    0.000000] x26: ffff800009362640 x25: 0000000000000000 x24: 0000000000000000
[    0.000000] x23: 0000000000002000 x22: ffff80000932bb50 x21: 00000000ffffffff
[    0.000000] x20: ffff000097400000 x19: 0000000000800000 x18: ffffffffffffffff
[    0.000000] x17: 373578302f383278 x16: 302b657461657263 x15: 0000001000000000
[    0.000000] x14: 0000000000000360 x13: 0000000000009f8c x12: 00000000dcb0c070
[    0.000000] x11: 0000001000000000 x10: 00000000004ea000 x9 : 0000000000000000
[    0.000000] x8 : ffff000097400000 x7 : 0000000000000000 x6 : 000000000000003f
[    0.000000] x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004
[    0.000000] x2 : 00000000007fffc0 x1 : 0000000000000000 x0 : ffff000097400000
[    0.000000] Call trace:
[    0.000000]  __memset+0x16c/0x188
[    0.000000]  stack_depot_init+0xc8/0x100
[    0.000000]  __kmem_cache_create+0x454/0x570
[    0.000000]  create_boot_cache+0xa0/0xe0
[    0.000000]  kmem_cache_init+0xf8/0x204
[    0.000000]  start_kernel+0x3ec/0x668
[    0.000000]  __primary_switched+0xc0/0xc8
[    0.000000] Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428)
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
[    0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---

Thanks!

> > Limit size of stack_table when using page allocator because vmalloc()
> > is also unavailable in kmem_cache_init(). it must not be larger than
> > (PAGE_SIZE << (MAX_ORDER - 1)).
> >
> > This patch was tested on both CONFIG_STACKDEPOT_ALWAYS_INIT=y and n.
> >
> > Fixes: 2dba5eb1c73b ("lib/stackdepot: allow optional init and stack_table allocation by kvmalloc()")
> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> > ---
> >  lib/stackdepot.c | 28 +++++++++++++++++++++-------
> >  1 file changed, 21 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> > index bf5ba9af0500..606f80ae2bf7 100644
> > --- a/lib/stackdepot.c
> > +++ b/lib/stackdepot.c
> > @@ -73,6 +73,14 @@ static int next_slab_inited;
> >  static size_t depot_offset;
> >  static DEFINE_RAW_SPINLOCK(depot_lock);
> >
> > +static unsigned int stack_hash_size = (1 << CONFIG_STACK_HASH_ORDER);
> > +static inline unsigned int stack_hash_mask(void)
> > +{
> > +       return stack_hash_size - 1;
> > +}
> > +
> > +#define STACK_HASH_SEED 0x9747b28c
> > +
> >  static bool init_stack_slab(void **prealloc)
> >  {
> >         if (!*prealloc)
> > @@ -142,10 +150,6 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
> >         return stack;
> >  }
> >
> > -#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
> > -#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
> > -#define STACK_HASH_SEED 0x9747b28c
> > -
> >  static bool stack_depot_disable;
> >  static struct stack_record **stack_table;
> >
> > @@ -172,18 +176,28 @@ __ref int stack_depot_init(void)
> >
> >         mutex_lock(&stack_depot_init_mutex);
> >         if (!stack_depot_disable && !stack_table) {
> > -               size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
> > +               size_t size = (stack_hash_size * sizeof(struct stack_record *));
> >                 int i;
> >
> >                 if (slab_is_available()) {
> >                         pr_info("Stack Depot allocating hash table with kvmalloc\n");
> >                         stack_table = kvmalloc(size, GFP_KERNEL);
> > +               } else if (totalram_pages() > 0) {
> > +                       /* Reduce size because vmalloc may be unavailable */
> > +                       size = min(size, PAGE_SIZE << (MAX_ORDER - 1));
> > +                       stack_hash_size = size / sizeof(struct stack_record *);
> > +
> > +                       pr_info("Stack Depot allocating hash table with __get_free_pages\n");
> > +                       stack_table = (struct stack_record **)
> > +                                     __get_free_pages(GFP_KERNEL, get_order(size));
> >                 } else {
> >                         pr_info("Stack Depot allocating hash table with memblock_alloc\n");
> >                         stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
> >                 }
> > +
> >                 if (stack_table) {
> > -                       for (i = 0; i < STACK_HASH_SIZE;  i++)
> > +                       pr_info("Stack Depot hash table size=%u\n", stack_hash_size);
> > +                       for (i = 0; i < stack_hash_size;  i++)
> >                                 stack_table[i] = NULL;
> >                 } else {
> >                         pr_err("Stack Depot hash table allocation failed, disabling\n");
> > @@ -363,7 +377,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> >                 goto fast_exit;
> >
> >         hash = hash_stack(entries, nr_entries);
> > -       bucket = &stack_table[hash & STACK_HASH_MASK];
> > +       bucket = &stack_table[hash & stack_hash_mask()];
> >
> >         /*
> >          * Fast path: look the stack trace up without locking.
> > --
> > 2.33.1
Marco Elver Feb. 28, 2022, 10:50 a.m. UTC | #5
On Mon, 28 Feb 2022 at 11:05, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
[...]
> > This is odd - who is calling stack_depot_init() while neither slab nor
> > memblock are available?
>
> It's not merged yet - but Oliver's patch (2/5) in his series [1] does:
> If user is debugging cache, it calls stack_depot_init() when creating
> cache.
>
> > @@ -4221,6 +4220,9 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
> >       s->remote_node_defrag_ratio = 1000;
> >  #endif
> >
> > +     if (s->flags & SLAB_STORE_USER && IS_ENABLED(CONFIG_STACKDEPOT))
> > +             stack_depot_init();
> > +
>
> Oliver's patch series enables stack depot when arch supports stacktrace,
> to store slab objects' stack traces. (as slub debugging feature.)
>
> Because slub debugging is turned on by default, the commit 2dba5eb1c73b
> ("lib/stackdepot: allow optional init and stack_table allocation by
> kvmalloc()") made stack_depot_init() can be called later.
>
> With Oliver's patch applied, stack_depot_init() can be called in
> contexts below:
>
>   1) only memblock available (for kasan)
>   2) only buddy available, vmalloc/memblock unavailable (for boot caches)
>   3) buddy/slab available, vmalloc/memblock unavailable (vmap_area cache)
>   4) buddy/slab/vmalloc available, memblock unavailable (other caches)
>
> SLUB supports enabling debugging for specific cache by passing
> slub_debug boot parameter. As slab caches can be created in
> various context, stack_depot_init() should consider all contexts above.
>
> Writing this, I realized my patch does not handle case 3).. I'll send v3.
>
> [1] https://lore.kernel.org/linux-mm/YhoakP7Kih%2FYUgiN@ip-172-31-19-208.ap-northeast-1.compute.internal/T/#t
> [2] https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/linux.git/log/?h=slub-stackdepot-v1
>
> > Do you have a stacktrace?
>
> Yeah, here:
>
> You can reproduce this on vbabka's slab-stackdepot-v1 branch [2] with
> slub_debug=U, and CONFIG_STACKDEPOT_ALWAYS_INIT=n
>
[...]
> [    0.000000] Call trace:
> [    0.000000]  __memset+0x16c/0x188
> [    0.000000]  stack_depot_init+0xc8/0x100
> [    0.000000]  __kmem_cache_create+0x454/0x570
> [    0.000000]  create_boot_cache+0xa0/0xe0

I think even before this point you have all the information required
to determine if stackdepot will be required. It's available after
setup_slub_debug().

So why can't you just call stack_depot_init() somewhere else and avoid
all this complexity?

> [    0.000000]  kmem_cache_init+0xf8/0x204
> [    0.000000]  start_kernel+0x3ec/0x668
> [    0.000000]  __primary_switched+0xc0/0xc8
> [    0.000000] Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428)
> [    0.000000] ---[ end trace 0000000000000000 ]---
> [    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
> [    0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
Hyeonggon Yoo Feb. 28, 2022, 11:48 a.m. UTC | #6
On Mon, Feb 28, 2022 at 11:50:49AM +0100, Marco Elver wrote:
> On Mon, 28 Feb 2022 at 11:05, Hyeonggon Yoo <42.hyeyoo@gmail.com> wrote:
> [...]
> > > This is odd - who is calling stack_depot_init() while neither slab nor
> > > memblock are available?
> >
> > It's not merged yet - but Oliver's patch (2/5) in his series [1] does:
> > If user is debugging cache, it calls stack_depot_init() when creating
> > cache.
> >
> > > @@ -4221,6 +4220,9 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
> > >       s->remote_node_defrag_ratio = 1000;
> > >  #endif
> > >
> > > +     if (s->flags & SLAB_STORE_USER && IS_ENABLED(CONFIG_STACKDEPOT))
> > > +             stack_depot_init();
> > > +
> >
> > Oliver's patch series enables stack depot when arch supports stacktrace,
> > to store slab objects' stack traces. (as slub debugging feature.)
> >
> > Because slub debugging is turned on by default, the commit 2dba5eb1c73b
> > ("lib/stackdepot: allow optional init and stack_table allocation by
> > kvmalloc()") made stack_depot_init() can be called later.
> >
> > With Oliver's patch applied, stack_depot_init() can be called in
> > contexts below:
> >
> >   1) only memblock available (for kasan)
> >   2) only buddy available, vmalloc/memblock unavailable (for boot caches)
> >   3) buddy/slab available, vmalloc/memblock unavailable (vmap_area cache)
> >   4) buddy/slab/vmalloc available, memblock unavailable (other caches)
> >
> > SLUB supports enabling debugging for specific cache by passing
> > slub_debug boot parameter. As slab caches can be created in
> > various context, stack_depot_init() should consider all contexts above.
> >
> > Writing this, I realized my patch does not handle case 3).. I'll send v3.
> >
> > [1] https://lore.kernel.org/linux-mm/YhoakP7Kih%2FYUgiN@ip-172-31-19-208.ap-northeast-1.compute.internal/T/#t
> > [2] https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/linux.git/log/?h=slub-stackdepot-v1
> >
> > > Do you have a stacktrace?
> >
> > Yeah, here:
> >
> > You can reproduce this on vbabka's slab-stackdepot-v1 branch [2] with
> > slub_debug=U, and CONFIG_STACKDEPOT_ALWAYS_INIT=n
> >
> [...]
> > [    0.000000] Call trace:
> > [    0.000000]  __memset+0x16c/0x188
> > [    0.000000]  stack_depot_init+0xc8/0x100
> > [    0.000000]  __kmem_cache_create+0x454/0x570
> > [    0.000000]  create_boot_cache+0xa0/0xe0
> 
> I think even before this point you have all the information required
> to determine if stackdepot will be required. It's available after
> setup_slub_debug().
> 
> So why can't you just call stack_depot_init() somewhere else and avoid
> all this complexity?
>

You are right. That is much simpler and sound good as SLUB does not
support enabling SLAB_STORE_USER flag when system is up.

I'll try this approach.
Thank you!

> > [    0.000000]  kmem_cache_init+0xf8/0x204
> > [    0.000000]  start_kernel+0x3ec/0x668
> > [    0.000000]  __primary_switched+0xc0/0xc8
> > [    0.000000] Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428)
> > [    0.000000] ---[ end trace 0000000000000000 ]---
> > [    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
> > [    0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
diff mbox series

Patch

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index bf5ba9af0500..606f80ae2bf7 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -73,6 +73,14 @@  static int next_slab_inited;
 static size_t depot_offset;
 static DEFINE_RAW_SPINLOCK(depot_lock);
 
+static unsigned int stack_hash_size = (1 << CONFIG_STACK_HASH_ORDER);
+static inline unsigned int stack_hash_mask(void)
+{
+	return stack_hash_size - 1;
+}
+
+#define STACK_HASH_SEED 0x9747b28c
+
 static bool init_stack_slab(void **prealloc)
 {
 	if (!*prealloc)
@@ -142,10 +150,6 @@  depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
 	return stack;
 }
 
-#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
-#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
-#define STACK_HASH_SEED 0x9747b28c
-
 static bool stack_depot_disable;
 static struct stack_record **stack_table;
 
@@ -172,18 +176,28 @@  __ref int stack_depot_init(void)
 
 	mutex_lock(&stack_depot_init_mutex);
 	if (!stack_depot_disable && !stack_table) {
-		size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+		size_t size = (stack_hash_size * sizeof(struct stack_record *));
 		int i;
 
 		if (slab_is_available()) {
 			pr_info("Stack Depot allocating hash table with kvmalloc\n");
 			stack_table = kvmalloc(size, GFP_KERNEL);
+		} else if (totalram_pages() > 0) {
+			/* Reduce size because vmalloc may be unavailable */
+			size = min(size, PAGE_SIZE << (MAX_ORDER - 1));
+			stack_hash_size = size / sizeof(struct stack_record *);
+
+			pr_info("Stack Depot allocating hash table with __get_free_pages\n");
+			stack_table = (struct stack_record **)
+				      __get_free_pages(GFP_KERNEL, get_order(size));
 		} else {
 			pr_info("Stack Depot allocating hash table with memblock_alloc\n");
 			stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
 		}
+
 		if (stack_table) {
-			for (i = 0; i < STACK_HASH_SIZE;  i++)
+			pr_info("Stack Depot hash table size=%u\n", stack_hash_size);
+			for (i = 0; i < stack_hash_size;  i++)
 				stack_table[i] = NULL;
 		} else {
 			pr_err("Stack Depot hash table allocation failed, disabling\n");
@@ -363,7 +377,7 @@  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 		goto fast_exit;
 
 	hash = hash_stack(entries, nr_entries);
-	bucket = &stack_table[hash & STACK_HASH_MASK];
+	bucket = &stack_table[hash & stack_hash_mask()];
 
 	/*
 	 * Fast path: look the stack trace up without locking.