Message ID | 20231120-slab-remove-slab-v2-20-9c9c70177183@suse.cz (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | remove the SLAB allocator | expand |
On Mon, Nov 20, 2023 at 07:34:31PM +0100, Vlastimil Babka wrote: > With allocation fastpaths no longer divided between two .c files, we > have better inlining, however checking the disassembly of > kmem_cache_alloc() reveals we can do better to make the fastpaths > smaller and move the less common situations out of line or to separate > functions, to reduce instruction cache pressure. > > - split memcg pre/post alloc hooks to inlined checks that use likely() > to assume there will be no objcg handling necessary, and non-inline > functions doing the actual handling > > - add some more likely/unlikely() to pre/post alloc hooks to indicate > which scenarios should be out of line > > - change gfp_allowed_mask handling in slab_post_alloc_hook() so the > code can be optimized away when kasan/kmsan/kmemleak is configured out > > bloat-o-meter shows: > add/remove: 4/2 grow/shrink: 1/8 up/down: 521/-2924 (-2403) > Function old new delta > __memcg_slab_post_alloc_hook - 461 +461 > kmem_cache_alloc_bulk 775 791 +16 > __pfx_should_failslab.constprop - 16 +16 > __pfx___memcg_slab_post_alloc_hook - 16 +16 > should_failslab.constprop - 12 +12 > __pfx_memcg_slab_post_alloc_hook 16 - -16 > kmem_cache_alloc_lru 1295 1023 -272 > kmem_cache_alloc_node 1118 817 -301 > kmem_cache_alloc 1076 772 -304 > kmalloc_node_trace 1149 838 -311 > kmalloc_trace 1102 789 -313 > __kmalloc_node_track_caller 1393 1080 -313 > __kmalloc_node 1397 1082 -315 > __kmalloc 1374 1059 -315 > memcg_slab_post_alloc_hook 464 - -464 > > Note that gcc still decided to inline __memcg_pre_alloc_hook(), but the > code is out of line. Forcing noinline did not improve the results. As a > result the fastpaths are shorter and overal code size is reduced. > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > --- > mm/slub.c | 89 ++++++++++++++++++++++++++++++++++++++------------------------- > 1 file changed, 54 insertions(+), 35 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index 5683f1d02e4f..77d259f3d592 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1866,25 +1866,17 @@ static inline size_t obj_full_size(struct kmem_cache *s) > /* > * Returns false if the allocation should fail. > */ > -static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, > - struct list_lru *lru, > - struct obj_cgroup **objcgp, > - size_t objects, gfp_t flags) > +static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s, > + struct list_lru *lru, > + struct obj_cgroup **objcgp, > + size_t objects, gfp_t flags) > { > - struct obj_cgroup *objcg; > - > - if (!memcg_kmem_online()) > - return true; > - > - if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)) > - return true; > - > /* > * The obtained objcg pointer is safe to use within the current scope, > * defined by current task or set_active_memcg() pair. > * obj_cgroup_get() is used to get a permanent reference. > */ > - objcg = current_obj_cgroup(); > + struct obj_cgroup *objcg = current_obj_cgroup(); > if (!objcg) > return true; > > @@ -1907,17 +1899,34 @@ static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, > return true; > } > > -static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, > - struct obj_cgroup *objcg, > - gfp_t flags, size_t size, > - void **p) > +/* > + * Returns false if the allocation should fail. > + */ > +static __fastpath_inline > +bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru, > + struct obj_cgroup **objcgp, size_t objects, > + gfp_t flags) > +{ > + if (!memcg_kmem_online()) > + return true; > + > + if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))) > + return true; > + > + return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects, > + flags)); > +} > + > +static void __memcg_slab_post_alloc_hook(struct kmem_cache *s, > + struct obj_cgroup *objcg, > + gfp_t flags, size_t size, > + void **p) > { > struct slab *slab; > unsigned long off; > size_t i; > > - if (!memcg_kmem_online() || !objcg) > - return; > + flags &= gfp_allowed_mask; > > for (i = 0; i < size; i++) { > if (likely(p[i])) { > @@ -1940,6 +1949,16 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, > } > } > > +static __fastpath_inline > +void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, > + gfp_t flags, size_t size, void **p) > +{ > + if (likely(!memcg_kmem_online() || !objcg)) > + return; > + > + return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p); > +} > + > static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, > void **p, int objects) > { > @@ -3709,34 +3728,34 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > } > ALLOW_ERROR_INJECTION(should_failslab, ERRNO); > > -static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, > - struct list_lru *lru, > - struct obj_cgroup **objcgp, > - size_t size, gfp_t flags) > +static __fastpath_inline > +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, > + struct list_lru *lru, > + struct obj_cgroup **objcgp, > + size_t size, gfp_t flags) > { > flags &= gfp_allowed_mask; > > might_alloc(flags); > > - if (should_failslab(s, flags)) > + if (unlikely(should_failslab(s, flags))) > return NULL; > > - if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)) > + if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))) > return NULL; > > return s; > } > > -static inline void slab_post_alloc_hook(struct kmem_cache *s, > - struct obj_cgroup *objcg, gfp_t flags, > - size_t size, void **p, bool init, > - unsigned int orig_size) > +static __fastpath_inline > +void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, > + gfp_t flags, size_t size, void **p, bool init, > + unsigned int orig_size) > { > unsigned int zero_size = s->object_size; > bool kasan_init = init; > size_t i; > - > - flags &= gfp_allowed_mask; > + gfp_t init_flags = flags & gfp_allowed_mask; > > /* > * For kmalloc object, the allocated memory size(object_size) is likely > @@ -3769,13 +3788,13 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, > * As p[i] might get tagged, memset and kmemleak hook come after KASAN. > */ > for (i = 0; i < size; i++) { > - p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init); > + p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init); > if (p[i] && init && (!kasan_init || > !kasan_has_integrated_init())) > memset(p[i], 0, zero_size); > kmemleak_alloc_recursive(p[i], s->object_size, 1, > - s->flags, flags); > - kmsan_slab_alloc(s, p[i], flags); > + s->flags, init_flags); > + kmsan_slab_alloc(s, p[i], init_flags); > } > > memcg_slab_post_alloc_hook(s, objcg, flags, size, p); > @@ -3799,7 +3818,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list > bool init = false; > > s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); > - if (!s) > + if (unlikely(!s)) > return NULL; > > object = kfence_alloc(s, orig_size, gfpflags); > > -- Looks good to me, Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > 2.42.1 > >
diff --git a/mm/slub.c b/mm/slub.c index 5683f1d02e4f..77d259f3d592 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1866,25 +1866,17 @@ static inline size_t obj_full_size(struct kmem_cache *s) /* * Returns false if the allocation should fail. */ -static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - struct obj_cgroup **objcgp, - size_t objects, gfp_t flags) +static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) { - struct obj_cgroup *objcg; - - if (!memcg_kmem_online()) - return true; - - if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)) - return true; - /* * The obtained objcg pointer is safe to use within the current scope, * defined by current task or set_active_memcg() pair. * obj_cgroup_get() is used to get a permanent reference. */ - objcg = current_obj_cgroup(); + struct obj_cgroup *objcg = current_obj_cgroup(); if (!objcg) return true; @@ -1907,17 +1899,34 @@ static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, return true; } -static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, - struct obj_cgroup *objcg, - gfp_t flags, size_t size, - void **p) +/* + * Returns false if the allocation should fail. + */ +static __fastpath_inline +bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + struct obj_cgroup **objcgp, size_t objects, + gfp_t flags) +{ + if (!memcg_kmem_online()) + return true; + + if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))) + return true; + + return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects, + flags)); +} + +static void __memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct obj_cgroup *objcg, + gfp_t flags, size_t size, + void **p) { struct slab *slab; unsigned long off; size_t i; - if (!memcg_kmem_online() || !objcg) - return; + flags &= gfp_allowed_mask; for (i = 0; i < size; i++) { if (likely(p[i])) { @@ -1940,6 +1949,16 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, } } +static __fastpath_inline +void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, + gfp_t flags, size_t size, void **p) +{ + if (likely(!memcg_kmem_online() || !objcg)) + return; + + return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p); +} + static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects) { @@ -3709,34 +3728,34 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) } ALLOW_ERROR_INJECTION(should_failslab, ERRNO); -static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - struct obj_cgroup **objcgp, - size_t size, gfp_t flags) +static __fastpath_inline +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, + struct obj_cgroup **objcgp, + size_t size, gfp_t flags) { flags &= gfp_allowed_mask; might_alloc(flags); - if (should_failslab(s, flags)) + if (unlikely(should_failslab(s, flags))) return NULL; - if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)) + if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))) return NULL; return s; } -static inline void slab_post_alloc_hook(struct kmem_cache *s, - struct obj_cgroup *objcg, gfp_t flags, - size_t size, void **p, bool init, - unsigned int orig_size) +static __fastpath_inline +void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, + gfp_t flags, size_t size, void **p, bool init, + unsigned int orig_size) { unsigned int zero_size = s->object_size; bool kasan_init = init; size_t i; - - flags &= gfp_allowed_mask; + gfp_t init_flags = flags & gfp_allowed_mask; /* * For kmalloc object, the allocated memory size(object_size) is likely @@ -3769,13 +3788,13 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, * As p[i] might get tagged, memset and kmemleak hook come after KASAN. */ for (i = 0; i < size; i++) { - p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init); + p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init); if (p[i] && init && (!kasan_init || !kasan_has_integrated_init())) memset(p[i], 0, zero_size); kmemleak_alloc_recursive(p[i], s->object_size, 1, - s->flags, flags); - kmsan_slab_alloc(s, p[i], flags); + s->flags, init_flags); + kmsan_slab_alloc(s, p[i], init_flags); } memcg_slab_post_alloc_hook(s, objcg, flags, size, p); @@ -3799,7 +3818,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list bool init = false; s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); - if (!s) + if (unlikely(!s)) return NULL; object = kfence_alloc(s, orig_size, gfpflags);
With allocation fastpaths no longer divided between two .c files, we have better inlining, however checking the disassembly of kmem_cache_alloc() reveals we can do better to make the fastpaths smaller and move the less common situations out of line or to separate functions, to reduce instruction cache pressure. - split memcg pre/post alloc hooks to inlined checks that use likely() to assume there will be no objcg handling necessary, and non-inline functions doing the actual handling - add some more likely/unlikely() to pre/post alloc hooks to indicate which scenarios should be out of line - change gfp_allowed_mask handling in slab_post_alloc_hook() so the code can be optimized away when kasan/kmsan/kmemleak is configured out bloat-o-meter shows: add/remove: 4/2 grow/shrink: 1/8 up/down: 521/-2924 (-2403) Function old new delta __memcg_slab_post_alloc_hook - 461 +461 kmem_cache_alloc_bulk 775 791 +16 __pfx_should_failslab.constprop - 16 +16 __pfx___memcg_slab_post_alloc_hook - 16 +16 should_failslab.constprop - 12 +12 __pfx_memcg_slab_post_alloc_hook 16 - -16 kmem_cache_alloc_lru 1295 1023 -272 kmem_cache_alloc_node 1118 817 -301 kmem_cache_alloc 1076 772 -304 kmalloc_node_trace 1149 838 -311 kmalloc_trace 1102 789 -313 __kmalloc_node_track_caller 1393 1080 -313 __kmalloc_node 1397 1082 -315 __kmalloc 1374 1059 -315 memcg_slab_post_alloc_hook 464 - -464 Note that gcc still decided to inline __memcg_pre_alloc_hook(), but the code is out of line. Forcing noinline did not improve the results. As a result the fastpaths are shorter and overal code size is reduced. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> --- mm/slub.c | 89 ++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 35 deletions(-)