diff mbox series

[v2,6/7] mm, slab: add static key for should_failslab()

Message ID 20240620-fault-injection-statickeys-v2-6-e23947d3d84b@suse.cz (mailing list archive)
State Changes Requested
Headers show
Series static key support for error injection functions | expand

Commit Message

Vlastimil Babka June 19, 2024, 10:49 p.m. UTC
Since commit 4f6923fbb352 ("mm: make should_failslab always available for
fault injection") should_failslab() is unconditionally a noinline
function. This adds visible overhead to the slab allocation hotpath,
even if the function is empty. With CONFIG_FAILSLAB=y there's additional
overhead, even when the functionality is not activated by a boot
parameter or via debugfs.

The overhead can be eliminated with a static key around the callsite.
Fault injection and error injection frameworks including bpf can now be
told that this function has a static key associated, and are able to
enable and disable it accordingly.

Additionally, compile out all relevant code if neither CONFIG_FAILSLAB
nor CONFIG_FUNCTION_ERROR_INJECTION is enabled. When only the latter is
not enabled, make should_failslab() static inline instead of noinline.

To demonstrate the reduced overhead of calling an empty
should_failslab() function, a kernel build with
CONFIG_FUNCTION_ERROR_INJECTION enabled but CONFIG_FAILSLAB disabled,
and CPU mitigations enabled, was used in a qemu-kvm (virtme-ng) on AMD
Ryzen 7 2700 machine, and execution of a program trying to open() a
non-existent file was measured 3 times:

    for (int i = 0; i < 10000000; i++) {
        open("non_existent", O_RDONLY);
    }

After this patch, the measured real time was 4.3% smaller. Using perf
profiling it was verified that should_failslab was gone from the
profile.

With CONFIG_FAILSLAB also enabled, the patched kernel performace was
unaffected, as expected, while unpatched kernel's performance was worse,
resulting in the relative speedup being 10.5%. This means it no longer
needs to be an option suitable only for debug kernel builds.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/fault-inject.h |  4 +++-
 mm/failslab.c                |  2 +-
 mm/slab.h                    |  3 +++
 mm/slub.c                    | 30 +++++++++++++++++++++++++++---
 4 files changed, 34 insertions(+), 5 deletions(-)

Comments

Vlastimil Babka June 25, 2024, 2:24 p.m. UTC | #1
On 6/20/24 12:49 AM, Vlastimil Babka wrote:
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
>  			0, sizeof(void *));
>  }
>  
> -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
> +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
> +
> +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> +noinline
> +#else
> +static inline
> +#endif
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)

Note that it has been found that (regardless of this series) gcc may clone
this to a should_failslab.constprop.0 in case the function is empty because
__should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
doesn't help - the original function stays but only the clone is actually
being called, thus overriding the original function achieves nothing, see:
https://github.com/bpftrace/bpftrace/issues/3258

So we could use __noclone to prevent that, and I was thinking by adding
something this to error-injection.h:

#ifdef CONFIG_FUNCTION_ERROR_INJECTION
#define __error_injectable(alternative)		noinline __noclone
#else
#define __error_injectable(alternative)		alternative
#endif

and the usage here would be:

__error_injectable(static inline) int should_failslab(...)

Does that look acceptable, or is it too confusing that "static inline" is
specified there as the storage class to use when error injection is actually
disabled?

>  {
>  	if (__should_failslab(s, gfpflags))
>  		return -ENOMEM;
>  	return 0;
>  }
> -ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, &should_failslab_active);
> +
> +static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
> +						   gfp_t gfp)
> +{
> +	if (static_branch_unlikely(&should_failslab_active))
> +		return should_failslab(s, gfp);
> +	else
> +		return 0;
> +}
> +#else
> +static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
> +						   gfp_t gfp)
> +{
> +	return false;
> +}
> +#endif
>  
>  static __fastpath_inline
>  struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
> @@ -3889,7 +3913,7 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
>  
>  	might_alloc(flags);
>  
> -	if (unlikely(should_failslab(s, flags)))
> +	if (should_failslab_wrapped(s, flags))
>  		return NULL;
>  
>  	return s;
>
Alexei Starovoitov June 25, 2024, 5:12 p.m. UTC | #2
On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 6/20/24 12:49 AM, Vlastimil Babka wrote:
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
> >                       0, sizeof(void *));
> >  }
> >
> > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
> > +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
> > +
> > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> > +noinline
> > +#else
> > +static inline
> > +#endif
> > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>
> Note that it has been found that (regardless of this series) gcc may clone
> this to a should_failslab.constprop.0 in case the function is empty because
> __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
> doesn't help - the original function stays but only the clone is actually
> being called, thus overriding the original function achieves nothing, see:
> https://github.com/bpftrace/bpftrace/issues/3258
>
> So we could use __noclone to prevent that, and I was thinking by adding
> something this to error-injection.h:
>
> #ifdef CONFIG_FUNCTION_ERROR_INJECTION
> #define __error_injectable(alternative)         noinline __noclone

To prevent such compiler transformations we typically use
__used noinline

We didn't have a need for __noclone yet. If __used is enough I'd stick to that.
Vlastimil Babka June 25, 2024, 5:53 p.m. UTC | #3
On 6/25/24 7:12 PM, Alexei Starovoitov wrote:
> On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>>
>> On 6/20/24 12:49 AM, Vlastimil Babka wrote:
>> > --- a/mm/slub.c
>> > +++ b/mm/slub.c
>> > @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
>> >                       0, sizeof(void *));
>> >  }
>> >
>> > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
>> > +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
>> > +
>> > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
>> > +noinline
>> > +#else
>> > +static inline
>> > +#endif
>> > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>
>> Note that it has been found that (regardless of this series) gcc may clone
>> this to a should_failslab.constprop.0 in case the function is empty because
>> __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
>> doesn't help - the original function stays but only the clone is actually
>> being called, thus overriding the original function achieves nothing, see:
>> https://github.com/bpftrace/bpftrace/issues/3258
>>
>> So we could use __noclone to prevent that, and I was thinking by adding
>> something this to error-injection.h:
>>
>> #ifdef CONFIG_FUNCTION_ERROR_INJECTION
>> #define __error_injectable(alternative)         noinline __noclone
> 
> To prevent such compiler transformations we typically use
> __used noinline
> 
> We didn't have a need for __noclone yet. If __used is enough I'd stick to that.

__used made no difference here (gcc 13.3), __noclone did
diff mbox series

Patch

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index cfe75cc1bac4..0d0fa94dc1c8 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -107,9 +107,11 @@  static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 }
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
 int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+#endif
 #ifdef CONFIG_FAILSLAB
-extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
 static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
diff --git a/mm/failslab.c b/mm/failslab.c
index ffc420c0e767..878fd08e5dac 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -9,7 +9,7 @@  static struct {
 	bool ignore_gfp_reclaim;
 	bool cache_filter;
 } failslab = {
-	.attr = FAULT_ATTR_INITIALIZER,
+	.attr = FAULT_ATTR_INITIALIZER_KEY(&should_failslab_active.key),
 	.ignore_gfp_reclaim = true,
 	.cache_filter = false,
 };
diff --git a/mm/slab.h b/mm/slab.h
index 5f8f47c5bee0..792e19cb37b8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -11,6 +11,7 @@ 
 #include <linux/memcontrol.h>
 #include <linux/kfence.h>
 #include <linux/kasan.h>
+#include <linux/jump_label.h>
 
 /*
  * Internal slab definitions
@@ -160,6 +161,8 @@  static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
  */
 #define slab_page(s) folio_page(slab_folio(s), 0)
 
+DECLARE_STATIC_KEY_FALSE(should_failslab_active);
+
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
diff --git a/mm/slub.c b/mm/slub.c
index 0809760cf789..11980aa94631 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3874,13 +3874,37 @@  static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
 			0, sizeof(void *));
 }
 
-noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
+DEFINE_STATIC_KEY_FALSE(should_failslab_active);
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+noinline
+#else
+static inline
+#endif
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
 	if (__should_failslab(s, gfpflags))
 		return -ENOMEM;
 	return 0;
 }
-ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, &should_failslab_active);
+
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+						   gfp_t gfp)
+{
+	if (static_branch_unlikely(&should_failslab_active))
+		return should_failslab(s, gfp);
+	else
+		return 0;
+}
+#else
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+						   gfp_t gfp)
+{
+	return false;
+}
+#endif
 
 static __fastpath_inline
 struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
@@ -3889,7 +3913,7 @@  struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 
 	might_alloc(flags);
 
-	if (unlikely(should_failslab(s, flags)))
+	if (should_failslab_wrapped(s, flags))
 		return NULL;
 
 	return s;