diff mbox series

[v2,1/6] lib/stackdepot: allow requesting early initialization dynamically

Message ID 20220302173122.11939-2-vbabka@suse.cz (mailing list archive)
State New
Headers show
Series SLUB debugfs improvements based on stackdepot | expand

Commit Message

Vlastimil Babka March 2, 2022, 5:31 p.m. UTC
In a later patch we want to add stackdepot support for object owner
tracking in slub caches, which is enabled by slub_debug boot parameter.
This creates a bootstrap problem as some caches are created early in
boot when slab_is_available() is false and thus stack_depot_init()
tries to use memblock. But, as reported by Hyeonggon Yoo [1] we are
already beyond memblock_free_all(). Ideally memblock allocation should
fail, yet it succeeds, but later the system crashes, which is a
separately handled issue.

To resolve this boostrap issue in a robust way, this patch adds another
way to request stack_depot_early_init(), which happens at a well-defined
point of time. In addition to build-time CONFIG_STACKDEPOT_ALWAYS_INIT,
code that's e.g. processing boot parmeters (which happens early enough)
can set a new variable stack_depot_want_early_init as true.

In this patch we also convert page_owner to this approach. While it
doesn't have the bootstrap issue as slub, it's also a functionality
enabled by a boot param and can thus request stack_depot_early_init()
with memblock allocation instead of later initialization with
kvmalloc().

[1] https://lore.kernel.org/all/YhnUcqyeMgCrWZbd@ip-172-31-19-208.ap-northeast-1.compute.internal/

Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/stackdepot.h | 16 ++++++++++++++--
 lib/stackdepot.c           |  2 ++
 mm/page_owner.c            |  9 ++++++---
 3 files changed, 22 insertions(+), 5 deletions(-)

Comments

Marco Elver March 2, 2022, 5:47 p.m. UTC | #1
On Wed, Mar 02, 2022 at 06:31PM +0100, Vlastimil Babka wrote:
> In a later patch we want to add stackdepot support for object owner
> tracking in slub caches, which is enabled by slub_debug boot parameter.
> This creates a bootstrap problem as some caches are created early in
> boot when slab_is_available() is false and thus stack_depot_init()
> tries to use memblock. But, as reported by Hyeonggon Yoo [1] we are
> already beyond memblock_free_all(). Ideally memblock allocation should
> fail, yet it succeeds, but later the system crashes, which is a
> separately handled issue.
> 
> To resolve this boostrap issue in a robust way, this patch adds another
> way to request stack_depot_early_init(), which happens at a well-defined
> point of time. In addition to build-time CONFIG_STACKDEPOT_ALWAYS_INIT,
> code that's e.g. processing boot parmeters (which happens early enough)
> can set a new variable stack_depot_want_early_init as true.

Agree, I think this is the best solution.

> In this patch we also convert page_owner to this approach. While it
> doesn't have the bootstrap issue as slub, it's also a functionality
> enabled by a boot param and can thus request stack_depot_early_init()
> with memblock allocation instead of later initialization with
> kvmalloc().
> 
> [1] https://lore.kernel.org/all/YhnUcqyeMgCrWZbd@ip-172-31-19-208.ap-northeast-1.compute.internal/
> 
> Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  include/linux/stackdepot.h | 16 ++++++++++++++--
>  lib/stackdepot.c           |  2 ++
>  mm/page_owner.c            |  9 ++++++---
>  3 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
> index 17f992fe6355..1217ba2b636e 100644
> --- a/include/linux/stackdepot.h
> +++ b/include/linux/stackdepot.h
> @@ -15,6 +15,8 @@
>  
>  typedef u32 depot_stack_handle_t;
>  
> +extern bool stack_depot_want_early_init;
> +
>  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>  					unsigned int nr_entries,
>  					gfp_t gfp_flags, bool can_alloc);
> @@ -26,11 +28,21 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>   * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
>   * enabled as part of mm_init(), for subsystems where it's known at compile time
>   * that stack depot will be used.
> + *
> + * Another alternative is to set stack_depot_want_early_init as true, when the
> + * decision to use stack depot is taken e.g. when evaluating kernel boot
> + * parameters, which precedes the call to stack_depot_want_early_init().
>   */
>  int stack_depot_init(void);

I think for stack_depot_init() it might now be convenient to provide a
no-op version automatically if !STACKDEPOT, which would avoid
some 'if (.. && IS_ENABLED(CONFIG_STACKDEPOT))' in a later patch.

Similarly, for stack_depot_want_early_init, where instead you could
simply provide stack_depot_want_early_init() as a function, which simply
sets a boolean __stack_depot_want_early_init. If !STACKDEPOT, it'll also
just be a no-op function.

>  
> -#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
> -static inline int stack_depot_early_init(void)	{ return stack_depot_init(); }
> +#ifdef CONFIG_STACKDEPOT
> +static inline int stack_depot_early_init(void)
> +{
> +	if (IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT)
> +	    || stack_depot_want_early_init)
> +		return stack_depot_init();
> +	return 0;
> +}
>  #else
>  static inline int stack_depot_early_init(void)	{ return 0; }
>  #endif
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index bf5ba9af0500..02e2b5fcbf3b 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -66,6 +66,8 @@ struct stack_record {
>  	unsigned long entries[];	/* Variable-sized array of entries. */
>  };
>  
> +bool stack_depot_want_early_init = false;
> +

This can be __initdata, right?

>  static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
>  
>  static int depot_index;
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index 99e360df9465..40dce2b81d13 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -42,7 +42,12 @@ static void init_early_allocated_pages(void);
>  
>  static int __init early_page_owner_param(char *buf)
>  {
> -	return kstrtobool(buf, &page_owner_enabled);
> +	int ret = kstrtobool(buf, &page_owner_enabled);
> +
> +	if (page_owner_enabled)
> +		stack_depot_want_early_init = true;
> +
> +	return ret;
>  }
>  early_param("page_owner", early_page_owner_param);
>  
> @@ -80,8 +85,6 @@ static __init void init_page_owner(void)
>  	if (!page_owner_enabled)
>  		return;
>  
> -	stack_depot_init();
> -
>  	register_dummy_stack();
>  	register_failure_stack();
>  	register_early_stack();
> -- 
> 2.35.1
>
Mike Rapoport March 2, 2022, 6:01 p.m. UTC | #2
On Wed, Mar 02, 2022 at 06:31:17PM +0100, Vlastimil Babka wrote:
> In a later patch we want to add stackdepot support for object owner
> tracking in slub caches, which is enabled by slub_debug boot parameter.
> This creates a bootstrap problem as some caches are created early in
> boot when slab_is_available() is false and thus stack_depot_init()
> tries to use memblock. But, as reported by Hyeonggon Yoo [1] we are
> already beyond memblock_free_all(). Ideally memblock allocation should
> fail, yet it succeeds, but later the system crashes, which is a
> separately handled issue.
> 
> To resolve this boostrap issue in a robust way, this patch adds another
> way to request stack_depot_early_init(), which happens at a well-defined
> point of time. In addition to build-time CONFIG_STACKDEPOT_ALWAYS_INIT,
> code that's e.g. processing boot parmeters (which happens early enough)
> can set a new variable stack_depot_want_early_init as true.
> 
> In this patch we also convert page_owner to this approach. While it
> doesn't have the bootstrap issue as slub, it's also a functionality
> enabled by a boot param and can thus request stack_depot_early_init()
> with memblock allocation instead of later initialization with
> kvmalloc().
> 
> [1] https://lore.kernel.org/all/YhnUcqyeMgCrWZbd@ip-172-31-19-208.ap-northeast-1.compute.internal/
> 
> Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  include/linux/stackdepot.h | 16 ++++++++++++++--
>  lib/stackdepot.c           |  2 ++
>  mm/page_owner.c            |  9 ++++++---
>  3 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
> index 17f992fe6355..1217ba2b636e 100644
> --- a/include/linux/stackdepot.h
> +++ b/include/linux/stackdepot.h
> @@ -15,6 +15,8 @@
>  
>  typedef u32 depot_stack_handle_t;
>  
> +extern bool stack_depot_want_early_init;
> +
>  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>  					unsigned int nr_entries,
>  					gfp_t gfp_flags, bool can_alloc);
> @@ -26,11 +28,21 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>   * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
>   * enabled as part of mm_init(), for subsystems where it's known at compile time
>   * that stack depot will be used.
> + *
> + * Another alternative is to set stack_depot_want_early_init as true, when the
> + * decision to use stack depot is taken e.g. when evaluating kernel boot
> + * parameters, which precedes the call to stack_depot_want_early_init().
>   */
>  int stack_depot_init(void);
>  
> -#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
> -static inline int stack_depot_early_init(void)	{ return stack_depot_init(); }
> +#ifdef CONFIG_STACKDEPOT
> +static inline int stack_depot_early_init(void)
> +{
> +	if (IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT)
> +	    || stack_depot_want_early_init)
> +		return stack_depot_init();
> +	return 0;
> +}

I'd also suggest splitting memblock allocation from stack_depot_init() to
stack_depot_early_init().

>  #else
>  static inline int stack_depot_early_init(void)	{ return 0; }
>  #endif
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index bf5ba9af0500..02e2b5fcbf3b 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -66,6 +66,8 @@ struct stack_record {
>  	unsigned long entries[];	/* Variable-sized array of entries. */
>  };
>  
> +bool stack_depot_want_early_init = false;
> +
>  static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
>  
>  static int depot_index;
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index 99e360df9465..40dce2b81d13 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -42,7 +42,12 @@ static void init_early_allocated_pages(void);
>  
>  static int __init early_page_owner_param(char *buf)
>  {
> -	return kstrtobool(buf, &page_owner_enabled);
> +	int ret = kstrtobool(buf, &page_owner_enabled);
> +
> +	if (page_owner_enabled)
> +		stack_depot_want_early_init = true;
> +
> +	return ret;
>  }
>  early_param("page_owner", early_page_owner_param);
>  
> @@ -80,8 +85,6 @@ static __init void init_page_owner(void)
>  	if (!page_owner_enabled)
>  		return;
>  
> -	stack_depot_init();
> -
>  	register_dummy_stack();
>  	register_failure_stack();
>  	register_early_stack();
> -- 
> 2.35.1
>
Vlastimil Babka March 2, 2022, 6:02 p.m. UTC | #3
On 3/2/22 18:47, Marco Elver wrote:
> On Wed, Mar 02, 2022 at 06:31PM +0100, Vlastimil Babka wrote:
>> In a later patch we want to add stackdepot support for object owner
>> tracking in slub caches, which is enabled by slub_debug boot parameter.
>> This creates a bootstrap problem as some caches are created early in
>> boot when slab_is_available() is false and thus stack_depot_init()
>> tries to use memblock. But, as reported by Hyeonggon Yoo [1] we are
>> already beyond memblock_free_all(). Ideally memblock allocation should
>> fail, yet it succeeds, but later the system crashes, which is a
>> separately handled issue.
>> 
>> To resolve this boostrap issue in a robust way, this patch adds another
>> way to request stack_depot_early_init(), which happens at a well-defined
>> point of time. In addition to build-time CONFIG_STACKDEPOT_ALWAYS_INIT,
>> code that's e.g. processing boot parmeters (which happens early enough)
>> can set a new variable stack_depot_want_early_init as true.
> 
> Agree, I think this is the best solution.

Thanks.

>> In this patch we also convert page_owner to this approach. While it
>> doesn't have the bootstrap issue as slub, it's also a functionality
>> enabled by a boot param and can thus request stack_depot_early_init()
>> with memblock allocation instead of later initialization with
>> kvmalloc().
>> 
>> [1] https://lore.kernel.org/all/YhnUcqyeMgCrWZbd@ip-172-31-19-208.ap-northeast-1.compute.internal/
>> 
>> Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
>>  include/linux/stackdepot.h | 16 ++++++++++++++--
>>  lib/stackdepot.c           |  2 ++
>>  mm/page_owner.c            |  9 ++++++---
>>  3 files changed, 22 insertions(+), 5 deletions(-)
>> 
>> diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
>> index 17f992fe6355..1217ba2b636e 100644
>> --- a/include/linux/stackdepot.h
>> +++ b/include/linux/stackdepot.h
>> @@ -15,6 +15,8 @@
>>  
>>  typedef u32 depot_stack_handle_t;
>>  
>> +extern bool stack_depot_want_early_init;
>> +
>>  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>>  					unsigned int nr_entries,
>>  					gfp_t gfp_flags, bool can_alloc);
>> @@ -26,11 +28,21 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>>   * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
>>   * enabled as part of mm_init(), for subsystems where it's known at compile time
>>   * that stack depot will be used.
>> + *
>> + * Another alternative is to set stack_depot_want_early_init as true, when the
>> + * decision to use stack depot is taken e.g. when evaluating kernel boot
>> + * parameters, which precedes the call to stack_depot_want_early_init().
>>   */
>>  int stack_depot_init(void);
> 
> I think for stack_depot_init() it might now be convenient to provide a
> no-op version automatically if !STACKDEPOT, which would avoid
> some 'if (.. && IS_ENABLED(CONFIG_STACKDEPOT))' in a later patch.
> 
> Similarly, for stack_depot_want_early_init, where instead you could
> simply provide stack_depot_want_early_init() as a function, which simply
> sets a boolean __stack_depot_want_early_init. If !STACKDEPOT, it'll also
> just be a no-op function.

Yeah, makes sense. I guess I have patch 3/6 wrong now anyway as with
!STACKDEPOT it should fail linking due to missing stack_depot_want_early_init...

>>  
>> -#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
>> -static inline int stack_depot_early_init(void)	{ return stack_depot_init(); }
>> +#ifdef CONFIG_STACKDEPOT
>> +static inline int stack_depot_early_init(void)
>> +{
>> +	if (IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT)
>> +	    || stack_depot_want_early_init)
>> +		return stack_depot_init();
>> +	return 0;
>> +}
>>  #else
>>  static inline int stack_depot_early_init(void)	{ return 0; }
>>  #endif
>> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
>> index bf5ba9af0500..02e2b5fcbf3b 100644
>> --- a/lib/stackdepot.c
>> +++ b/lib/stackdepot.c
>> @@ -66,6 +66,8 @@ struct stack_record {
>>  	unsigned long entries[];	/* Variable-sized array of entries. */
>>  };
>>  
>> +bool stack_depot_want_early_init = false;
>> +
> 
> This can be __initdata, right?
 
I initially thought so too, but in include/linux/init.h found
 * Don't forget to initialize data not at file scope, i.e. within a function,
 * as gcc otherwise puts the data into the bss section and not into the init
 * section.
But maybe that's just outdated as everyone seems to init them at file scope.

>>  static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
>>  
>>  static int depot_index;
>> diff --git a/mm/page_owner.c b/mm/page_owner.c
>> index 99e360df9465..40dce2b81d13 100644
>> --- a/mm/page_owner.c
>> +++ b/mm/page_owner.c
>> @@ -42,7 +42,12 @@ static void init_early_allocated_pages(void);
>>  
>>  static int __init early_page_owner_param(char *buf)
>>  {
>> -	return kstrtobool(buf, &page_owner_enabled);
>> +	int ret = kstrtobool(buf, &page_owner_enabled);
>> +
>> +	if (page_owner_enabled)
>> +		stack_depot_want_early_init = true;
>> +
>> +	return ret;
>>  }
>>  early_param("page_owner", early_page_owner_param);
>>  
>> @@ -80,8 +85,6 @@ static __init void init_page_owner(void)
>>  	if (!page_owner_enabled)
>>  		return;
>>  
>> -	stack_depot_init();
>> -
>>  	register_dummy_stack();
>>  	register_failure_stack();
>>  	register_early_stack();
>> -- 
>> 2.35.1
>>
Marco Elver March 2, 2022, 6:15 p.m. UTC | #4
On Wed, 2 Mar 2022 at 19:02, Vlastimil Babka <vbabka@suse.cz> wrote:
[...]
> > Similarly, for stack_depot_want_early_init, where instead you could
> > simply provide stack_depot_want_early_init() as a function, which simply
> > sets a boolean __stack_depot_want_early_init. If !STACKDEPOT, it'll also
> > just be a no-op function.
>
> Yeah, makes sense. I guess I have patch 3/6 wrong now anyway as with
> !STACKDEPOT it should fail linking due to missing stack_depot_want_early_init...

Right. It probably still worked because the compiler likely optimizes
out the dead call, but you never know...

> >> +bool stack_depot_want_early_init = false;
> >> +
> >
> > This can be __initdata, right?
>
> I initially thought so too, but in include/linux/init.h found
>  * Don't forget to initialize data not at file scope, i.e. within a function,
>  * as gcc otherwise puts the data into the bss section and not into the init
>  * section.
> But maybe that's just outdated as everyone seems to init them at file scope.

I think that comment is just about static variables inside functions?
Here it's at file scope, so that caveat shouldn't apply. As an aside,
you could omit '= false' because it'd zero-init by default.

Thanks,
-- Marco
diff mbox series

Patch

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 17f992fe6355..1217ba2b636e 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -15,6 +15,8 @@ 
 
 typedef u32 depot_stack_handle_t;
 
+extern bool stack_depot_want_early_init;
+
 depot_stack_handle_t __stack_depot_save(unsigned long *entries,
 					unsigned int nr_entries,
 					gfp_t gfp_flags, bool can_alloc);
@@ -26,11 +28,21 @@  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
  * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
  * enabled as part of mm_init(), for subsystems where it's known at compile time
  * that stack depot will be used.
+ *
+ * Another alternative is to set stack_depot_want_early_init as true, when the
+ * decision to use stack depot is taken e.g. when evaluating kernel boot
+ * parameters, which precedes the call to stack_depot_want_early_init().
  */
 int stack_depot_init(void);
 
-#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
-static inline int stack_depot_early_init(void)	{ return stack_depot_init(); }
+#ifdef CONFIG_STACKDEPOT
+static inline int stack_depot_early_init(void)
+{
+	if (IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT)
+	    || stack_depot_want_early_init)
+		return stack_depot_init();
+	return 0;
+}
 #else
 static inline int stack_depot_early_init(void)	{ return 0; }
 #endif
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index bf5ba9af0500..02e2b5fcbf3b 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -66,6 +66,8 @@  struct stack_record {
 	unsigned long entries[];	/* Variable-sized array of entries. */
 };
 
+bool stack_depot_want_early_init = false;
+
 static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
 
 static int depot_index;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 99e360df9465..40dce2b81d13 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -42,7 +42,12 @@  static void init_early_allocated_pages(void);
 
 static int __init early_page_owner_param(char *buf)
 {
-	return kstrtobool(buf, &page_owner_enabled);
+	int ret = kstrtobool(buf, &page_owner_enabled);
+
+	if (page_owner_enabled)
+		stack_depot_want_early_init = true;
+
+	return ret;
 }
 early_param("page_owner", early_page_owner_param);
 
@@ -80,8 +85,6 @@  static __init void init_page_owner(void)
 	if (!page_owner_enabled)
 		return;
 
-	stack_depot_init();
-
 	register_dummy_stack();
 	register_failure_stack();
 	register_early_stack();