diff mbox series

[v2,21/21] mm/slub: optimize free fast path code layout

Message ID 20231120-slab-remove-slab-v2-21-9c9c70177183@suse.cz (mailing list archive)
State Mainlined
Commit ecf9a253ce120082ce0a8aff806c4de4865cfcc5
Headers show
Series remove the SLAB allocator | expand

Commit Message

Vlastimil Babka Nov. 20, 2023, 6:34 p.m. UTC
Inspection of kmem_cache_free() disassembly showed we could make the
fast path smaller by providing few more hints to the compiler, and
splitting the memcg_slab_free_hook() into an inline part that only
checks if there's work to do, and an out of line part doing the actual
uncharge.

bloat-o-meter results:
add/remove: 2/0 grow/shrink: 0/3 up/down: 286/-554 (-268)
Function                                     old     new   delta
__memcg_slab_free_hook                         -     270    +270
__pfx___memcg_slab_free_hook                   -      16     +16
kfree                                        828     665    -163
kmem_cache_free                             1116     948    -168
kmem_cache_free_bulk.part                   1701    1478    -223

Checking kmem_cache_free() disassembly now shows the non-fastpath
cases are handled out of line, which should reduce instruction cache
usage.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

Comments

Hyeonggon Yoo Dec. 7, 2023, 2:40 a.m. UTC | #1
On Mon, Nov 20, 2023 at 07:34:32PM +0100, Vlastimil Babka wrote:
> Inspection of kmem_cache_free() disassembly showed we could make the
> fast path smaller by providing few more hints to the compiler, and
> splitting the memcg_slab_free_hook() into an inline part that only
> checks if there's work to do, and an out of line part doing the actual
> uncharge.
> 
> bloat-o-meter results:
> add/remove: 2/0 grow/shrink: 0/3 up/down: 286/-554 (-268)
> Function                                     old     new   delta
> __memcg_slab_free_hook                         -     270    +270
> __pfx___memcg_slab_free_hook                   -      16     +16
> kfree                                        828     665    -163
> kmem_cache_free                             1116     948    -168
> kmem_cache_free_bulk.part                   1701    1478    -223
> 
> Checking kmem_cache_free() disassembly now shows the non-fastpath
> cases are handled out of line, which should reduce instruction cache
> usage.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 40 ++++++++++++++++++++++++----------------
>  1 file changed, 24 insertions(+), 16 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 77d259f3d592..3f8b95757106 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1959,20 +1959,11 @@ void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
>  	return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
>  }
>  
> -static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> -					void **p, int objects)
> +static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> +				   void **p, int objects,
> +				   struct obj_cgroup **objcgs)
>  {
> -	struct obj_cgroup **objcgs;
> -	int i;
> -
> -	if (!memcg_kmem_online())
> -		return;
> -
> -	objcgs = slab_objcgs(slab);
> -	if (!objcgs)
> -		return;
> -
> -	for (i = 0; i < objects; i++) {
> +	for (int i = 0; i < objects; i++) {
>  		struct obj_cgroup *objcg;
>  		unsigned int off;
>  
> @@ -1988,6 +1979,22 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
>  		obj_cgroup_put(objcg);
>  	}
>  }
> +
> +static __fastpath_inline
> +void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
> +			  int objects)
> +{
> +	struct obj_cgroup **objcgs;
> +
> +	if (!memcg_kmem_online())
> +		return;
> +
> +	objcgs = slab_objcgs(slab);
> +	if (likely(!objcgs))
> +		return;
> +
> +	__memcg_slab_free_hook(s, slab, p, objects, objcgs);
> +}
>  #else /* CONFIG_MEMCG_KMEM */
>  static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
>  {
> @@ -2047,7 +2054,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
>  	 * The initialization memset's clear the object and the metadata,
>  	 * but don't touch the SLAB redzone.
>  	 */
> -	if (init) {
> +	if (unlikely(init)) {
>  		int rsize;
>  
>  		if (!kasan_has_integrated_init())
> @@ -2083,7 +2090,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
>  		next = get_freepointer(s, object);
>  
>  		/* If object's reuse doesn't have to be delayed */
> -		if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
> +		if (likely(!slab_free_hook(s, object,
> +					   slab_want_init_on_free(s)))) {
>  			/* Move object to the new freelist */
>  			set_freepointer(s, object, *head);
>  			*head = object;
> @@ -4282,7 +4290,7 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
>  	 * With KASAN enabled slab_free_freelist_hook modifies the freelist
>  	 * to remove objects, whose reuse must be delayed.
>  	 */
> -	if (slab_free_freelist_hook(s, &head, &tail, &cnt))
> +	if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
>  		do_slab_free(s, slab, head, tail, cnt, addr);
>  }
>  
> 
> -- 

Looks good to me,
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

> 2.42.1
> 
>
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index 77d259f3d592..3f8b95757106 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1959,20 +1959,11 @@  void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
 	return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
 }
 
-static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
-					void **p, int objects)
+static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+				   void **p, int objects,
+				   struct obj_cgroup **objcgs)
 {
-	struct obj_cgroup **objcgs;
-	int i;
-
-	if (!memcg_kmem_online())
-		return;
-
-	objcgs = slab_objcgs(slab);
-	if (!objcgs)
-		return;
-
-	for (i = 0; i < objects; i++) {
+	for (int i = 0; i < objects; i++) {
 		struct obj_cgroup *objcg;
 		unsigned int off;
 
@@ -1988,6 +1979,22 @@  static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
 		obj_cgroup_put(objcg);
 	}
 }
+
+static __fastpath_inline
+void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+			  int objects)
+{
+	struct obj_cgroup **objcgs;
+
+	if (!memcg_kmem_online())
+		return;
+
+	objcgs = slab_objcgs(slab);
+	if (likely(!objcgs))
+		return;
+
+	__memcg_slab_free_hook(s, slab, p, objects, objcgs);
+}
 #else /* CONFIG_MEMCG_KMEM */
 static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
 {
@@ -2047,7 +2054,7 @@  static __always_inline bool slab_free_hook(struct kmem_cache *s,
 	 * The initialization memset's clear the object and the metadata,
 	 * but don't touch the SLAB redzone.
 	 */
-	if (init) {
+	if (unlikely(init)) {
 		int rsize;
 
 		if (!kasan_has_integrated_init())
@@ -2083,7 +2090,8 @@  static inline bool slab_free_freelist_hook(struct kmem_cache *s,
 		next = get_freepointer(s, object);
 
 		/* If object's reuse doesn't have to be delayed */
-		if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
+		if (likely(!slab_free_hook(s, object,
+					   slab_want_init_on_free(s)))) {
 			/* Move object to the new freelist */
 			set_freepointer(s, object, *head);
 			*head = object;
@@ -4282,7 +4290,7 @@  static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
 	 * With KASAN enabled slab_free_freelist_hook modifies the freelist
 	 * to remove objects, whose reuse must be delayed.
 	 */
-	if (slab_free_freelist_hook(s, &head, &tail, &cnt))
+	if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt)))
 		do_slab_free(s, slab, head, tail, cnt, addr);
 }