diff mbox series

[RFC,2/4] mm, slab: move slab_memcg hooks to mm/memcontrol.c

Message ID 20240301-slab-memcg-v1-2-359328a46596@suse.cz (mailing list archive)
State New
Headers show
Series memcg_kmem hooks refactoring and kmem_cache_charge() | expand

Commit Message

Vlastimil Babka March 1, 2024, 5:07 p.m. UTC
The hooks make multiple calls to functions in mm/memcontrol.c, including
to th current_obj_cgroup() marked __always_inline. It might be faster to
make a single call to the hook in mm/memcontrol.c instead. The hooks
also don't use almost anything from mm/slub.c. obj_full_size() can move
with the hooks and cache_vmstat_idx() to the internal mm/slab.h

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/memcontrol.c |  90 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.h       |  10 ++++++
 mm/slub.c       | 100 --------------------------------------------------------
 3 files changed, 100 insertions(+), 100 deletions(-)

Comments

Roman Gushchin March 12, 2024, 6:56 p.m. UTC | #1
On Fri, Mar 01, 2024 at 06:07:09PM +0100, Vlastimil Babka wrote:
> The hooks make multiple calls to functions in mm/memcontrol.c, including
> to th current_obj_cgroup() marked __always_inline. It might be faster to
> make a single call to the hook in mm/memcontrol.c instead. The hooks
> also don't use almost anything from mm/slub.c. obj_full_size() can move
> with the hooks and cache_vmstat_idx() to the internal mm/slab.h
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/memcontrol.c |  90 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  mm/slab.h       |  10 ++++++
>  mm/slub.c       | 100 --------------------------------------------------------
>  3 files changed, 100 insertions(+), 100 deletions(-)

Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Btw, even before your change:
$ cat mm/memcontrol.c | wc -l
8318
so I wonder if soon we might want to split it into some smaller parts.

Thanks!

> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index e4c8735e7c85..37ee9356a26c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3575,6 +3575,96 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
>  	refill_obj_stock(objcg, size, true);
>  }
>  
> +static inline size_t obj_full_size(struct kmem_cache *s)
> +{
> +	/*
> +	 * For each accounted object there is an extra space which is used
> +	 * to store obj_cgroup membership. Charge it too.
> +	 */
> +	return s->size + sizeof(struct obj_cgroup *);
> +}
> +
> +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> +				  gfp_t flags, size_t size, void **p)
> +{
> +	struct obj_cgroup *objcg;
> +	struct slab *slab;
> +	unsigned long off;
> +	size_t i;
> +
> +	/*
> +	 * The obtained objcg pointer is safe to use within the current scope,
> +	 * defined by current task or set_active_memcg() pair.
> +	 * obj_cgroup_get() is used to get a permanent reference.
> +	 */
> +	objcg = current_obj_cgroup();
> +	if (!objcg)
> +		return true;
> +
> +	/*
> +	 * slab_alloc_node() avoids the NULL check, so we might be called with a
> +	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
> +	 * the whole requested size.
> +	 * return success as there's nothing to free back
> +	 */
> +	if (unlikely(*p == NULL))
> +		return true;
> +
> +	flags &= gfp_allowed_mask;
> +
> +	if (lru) {
> +		int ret;
> +		struct mem_cgroup *memcg;
> +
> +		memcg = get_mem_cgroup_from_objcg(objcg);
> +		ret = memcg_list_lru_alloc(memcg, lru, flags);
> +		css_put(&memcg->css);
> +
> +		if (ret)
> +			return false;
> +	}
> +
> +	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
> +		return false;
> +
> +	for (i = 0; i < size; i++) {
> +		slab = virt_to_slab(p[i]);
> +
> +		if (!slab_objcgs(slab) &&
> +		    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
> +			obj_cgroup_uncharge(objcg, obj_full_size(s));
> +			continue;
> +		}
> +
> +		off = obj_to_index(s, slab, p[i]);
> +		obj_cgroup_get(objcg);
> +		slab_objcgs(slab)[off] = objcg;
> +		mod_objcg_state(objcg, slab_pgdat(slab),
> +				cache_vmstat_idx(s), obj_full_size(s));
> +	}
> +
> +	return true;
> +}
> +
> +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> +			    void **p, int objects, struct obj_cgroup **objcgs)
> +{
> +	for (int i = 0; i < objects; i++) {
> +		struct obj_cgroup *objcg;
> +		unsigned int off;
> +
> +		off = obj_to_index(s, slab, p[i]);
> +		objcg = objcgs[off];
> +		if (!objcg)
> +			continue;
> +
> +		objcgs[off] = NULL;
> +		obj_cgroup_uncharge(objcg, obj_full_size(s));
> +		mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
> +				-obj_full_size(s));
> +		obj_cgroup_put(objcg);
> +	}
> +}
>  #endif /* CONFIG_MEMCG_KMEM */
>  
>  /*
> diff --git a/mm/slab.h b/mm/slab.h
> index 54deeb0428c6..3f170673fa55 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -541,6 +541,12 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
>  	return false;
>  }
>  
> +static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
> +{
> +	return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
> +		NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
> +}
> +
>  #ifdef CONFIG_MEMCG_KMEM
>  /*
>   * slab_objcgs - get the object cgroups vector associated with a slab
> @@ -564,6 +570,10 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
>  				 gfp_t gfp, bool new_slab);
>  void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
>  		     enum node_stat_item idx, int nr);
> +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> +				  gfp_t flags, size_t size, void **p);
> +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> +			    void **p, int objects, struct obj_cgroup **objcgs);
>  #else /* CONFIG_MEMCG_KMEM */
>  static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
>  {
> diff --git a/mm/slub.c b/mm/slub.c
> index 7022a1246bab..64da169d672a 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1875,12 +1875,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
>  #endif
>  #endif /* CONFIG_SLUB_DEBUG */
>  
> -static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
> -{
> -	return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
> -		NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
> -}
> -
>  #ifdef CONFIG_MEMCG_KMEM
>  static inline void memcg_free_slab_cgroups(struct slab *slab)
>  {
> @@ -1888,79 +1882,6 @@ static inline void memcg_free_slab_cgroups(struct slab *slab)
>  	slab->memcg_data = 0;
>  }
>  
> -static inline size_t obj_full_size(struct kmem_cache *s)
> -{
> -	/*
> -	 * For each accounted object there is an extra space which is used
> -	 * to store obj_cgroup membership. Charge it too.
> -	 */
> -	return s->size + sizeof(struct obj_cgroup *);
> -}
> -
> -static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
> -					 struct list_lru *lru,
> -					 gfp_t flags, size_t size,
> -					 void **p)
> -{
> -	struct obj_cgroup *objcg;
> -	struct slab *slab;
> -	unsigned long off;
> -	size_t i;
> -
> -	/*
> -	 * The obtained objcg pointer is safe to use within the current scope,
> -	 * defined by current task or set_active_memcg() pair.
> -	 * obj_cgroup_get() is used to get a permanent reference.
> -	 */
> -	objcg = current_obj_cgroup();
> -	if (!objcg)
> -		return true;
> -
> -	/*
> -	 * slab_alloc_node() avoids the NULL check, so we might be called with a
> -	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
> -	 * the whole requested size.
> -	 * return success as there's nothing to free back
> -	 */
> -	if (unlikely(*p == NULL))
> -		return true;
> -
> -	flags &= gfp_allowed_mask;
> -
> -	if (lru) {
> -		int ret;
> -		struct mem_cgroup *memcg;
> -
> -		memcg = get_mem_cgroup_from_objcg(objcg);
> -		ret = memcg_list_lru_alloc(memcg, lru, flags);
> -		css_put(&memcg->css);
> -
> -		if (ret)
> -			return false;
> -	}
> -
> -	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
> -		return false;
> -
> -	for (i = 0; i < size; i++) {
> -		slab = virt_to_slab(p[i]);
> -
> -		if (!slab_objcgs(slab) &&
> -		    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
> -			obj_cgroup_uncharge(objcg, obj_full_size(s));
> -			continue;
> -		}
> -
> -		off = obj_to_index(s, slab, p[i]);
> -		obj_cgroup_get(objcg);
> -		slab_objcgs(slab)[off] = objcg;
> -		mod_objcg_state(objcg, slab_pgdat(slab),
> -				cache_vmstat_idx(s), obj_full_size(s));
> -	}
> -
> -	return true;
> -}
> -
>  static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
>  
>  static __fastpath_inline
> @@ -1986,27 +1907,6 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  	return false;
>  }
>  
> -static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> -				   void **p, int objects,
> -				   struct obj_cgroup **objcgs)
> -{
> -	for (int i = 0; i < objects; i++) {
> -		struct obj_cgroup *objcg;
> -		unsigned int off;
> -
> -		off = obj_to_index(s, slab, p[i]);
> -		objcg = objcgs[off];
> -		if (!objcg)
> -			continue;
> -
> -		objcgs[off] = NULL;
> -		obj_cgroup_uncharge(objcg, obj_full_size(s));
> -		mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
> -				-obj_full_size(s));
> -		obj_cgroup_put(objcg);
> -	}
> -}
> -
>  static __fastpath_inline
>  void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
>  			  int objects)
> 
> -- 
> 2.44.0
> 
>
Matthew Wilcox March 12, 2024, 7:32 p.m. UTC | #2
On Tue, Mar 12, 2024 at 11:56:31AM -0700, Roman Gushchin wrote:
> On Fri, Mar 01, 2024 at 06:07:09PM +0100, Vlastimil Babka wrote:
> > The hooks make multiple calls to functions in mm/memcontrol.c, including
> > to th current_obj_cgroup() marked __always_inline. It might be faster to
> > make a single call to the hook in mm/memcontrol.c instead. The hooks
> > also don't use almost anything from mm/slub.c. obj_full_size() can move
> > with the hooks and cache_vmstat_idx() to the internal mm/slab.h
> > 
> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> > ---
> >  mm/memcontrol.c |  90 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  mm/slab.h       |  10 ++++++
> >  mm/slub.c       | 100 --------------------------------------------------------
> >  3 files changed, 100 insertions(+), 100 deletions(-)
> 
> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
> 
> Btw, even before your change:
> $ cat mm/memcontrol.c | wc -l
> 8318
> so I wonder if soon we might want to split it into some smaller parts.

If we are going to split it, perhaps a mm/memcg-v1.c would make sense,
because I certainly don't have a good idea about what's v1 and what's
v2.  And maybe we could even conditionally compile the v1 file ;-)
Roman Gushchin March 12, 2024, 8:36 p.m. UTC | #3
On Tue, Mar 12, 2024 at 07:32:57PM +0000, Matthew Wilcox wrote:
> On Tue, Mar 12, 2024 at 11:56:31AM -0700, Roman Gushchin wrote:
> > On Fri, Mar 01, 2024 at 06:07:09PM +0100, Vlastimil Babka wrote:
> > > The hooks make multiple calls to functions in mm/memcontrol.c, including
> > > to th current_obj_cgroup() marked __always_inline. It might be faster to
> > > make a single call to the hook in mm/memcontrol.c instead. The hooks
> > > also don't use almost anything from mm/slub.c. obj_full_size() can move
> > > with the hooks and cache_vmstat_idx() to the internal mm/slab.h
> > > 
> > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> > > ---
> > >  mm/memcontrol.c |  90 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  mm/slab.h       |  10 ++++++
> > >  mm/slub.c       | 100 --------------------------------------------------------
> > >  3 files changed, 100 insertions(+), 100 deletions(-)
> > 
> > Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
> > 
> > Btw, even before your change:
> > $ cat mm/memcontrol.c | wc -l
> > 8318
> > so I wonder if soon we might want to split it into some smaller parts.
> 
> If we are going to split it, perhaps a mm/memcg-v1.c would make sense,
> because I certainly don't have a good idea about what's v1 and what's
> v2.  And maybe we could even conditionally compile the v1 file ;-)

Good call.
We already have cgroup/cgroup-v1.c and cgroup/legacy_freezer.c.

Thanks!
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e4c8735e7c85..37ee9356a26c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3575,6 +3575,96 @@  void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 	refill_obj_stock(objcg, size, true);
 }
 
+static inline size_t obj_full_size(struct kmem_cache *s)
+{
+	/*
+	 * For each accounted object there is an extra space which is used
+	 * to store obj_cgroup membership. Charge it too.
+	 */
+	return s->size + sizeof(struct obj_cgroup *);
+}
+
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+				  gfp_t flags, size_t size, void **p)
+{
+	struct obj_cgroup *objcg;
+	struct slab *slab;
+	unsigned long off;
+	size_t i;
+
+	/*
+	 * The obtained objcg pointer is safe to use within the current scope,
+	 * defined by current task or set_active_memcg() pair.
+	 * obj_cgroup_get() is used to get a permanent reference.
+	 */
+	objcg = current_obj_cgroup();
+	if (!objcg)
+		return true;
+
+	/*
+	 * slab_alloc_node() avoids the NULL check, so we might be called with a
+	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
+	 * the whole requested size.
+	 * return success as there's nothing to free back
+	 */
+	if (unlikely(*p == NULL))
+		return true;
+
+	flags &= gfp_allowed_mask;
+
+	if (lru) {
+		int ret;
+		struct mem_cgroup *memcg;
+
+		memcg = get_mem_cgroup_from_objcg(objcg);
+		ret = memcg_list_lru_alloc(memcg, lru, flags);
+		css_put(&memcg->css);
+
+		if (ret)
+			return false;
+	}
+
+	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
+		return false;
+
+	for (i = 0; i < size; i++) {
+		slab = virt_to_slab(p[i]);
+
+		if (!slab_objcgs(slab) &&
+		    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
+			obj_cgroup_uncharge(objcg, obj_full_size(s));
+			continue;
+		}
+
+		off = obj_to_index(s, slab, p[i]);
+		obj_cgroup_get(objcg);
+		slab_objcgs(slab)[off] = objcg;
+		mod_objcg_state(objcg, slab_pgdat(slab),
+				cache_vmstat_idx(s), obj_full_size(s));
+	}
+
+	return true;
+}
+
+void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+			    void **p, int objects, struct obj_cgroup **objcgs)
+{
+	for (int i = 0; i < objects; i++) {
+		struct obj_cgroup *objcg;
+		unsigned int off;
+
+		off = obj_to_index(s, slab, p[i]);
+		objcg = objcgs[off];
+		if (!objcg)
+			continue;
+
+		objcgs[off] = NULL;
+		obj_cgroup_uncharge(objcg, obj_full_size(s));
+		mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
+				-obj_full_size(s));
+		obj_cgroup_put(objcg);
+	}
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 /*
diff --git a/mm/slab.h b/mm/slab.h
index 54deeb0428c6..3f170673fa55 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -541,6 +541,12 @@  static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
 	return false;
 }
 
+static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
+{
+	return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
+		NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
+}
+
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * slab_objcgs - get the object cgroups vector associated with a slab
@@ -564,6 +570,10 @@  int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
 				 gfp_t gfp, bool new_slab);
 void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 		     enum node_stat_item idx, int nr);
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+				  gfp_t flags, size_t size, void **p);
+void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+			    void **p, int objects, struct obj_cgroup **objcgs);
 #else /* CONFIG_MEMCG_KMEM */
 static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
 {
diff --git a/mm/slub.c b/mm/slub.c
index 7022a1246bab..64da169d672a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1875,12 +1875,6 @@  static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
 #endif
 #endif /* CONFIG_SLUB_DEBUG */
 
-static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
-{
-	return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
-		NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
-}
-
 #ifdef CONFIG_MEMCG_KMEM
 static inline void memcg_free_slab_cgroups(struct slab *slab)
 {
@@ -1888,79 +1882,6 @@  static inline void memcg_free_slab_cgroups(struct slab *slab)
 	slab->memcg_data = 0;
 }
 
-static inline size_t obj_full_size(struct kmem_cache *s)
-{
-	/*
-	 * For each accounted object there is an extra space which is used
-	 * to store obj_cgroup membership. Charge it too.
-	 */
-	return s->size + sizeof(struct obj_cgroup *);
-}
-
-static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
-					 struct list_lru *lru,
-					 gfp_t flags, size_t size,
-					 void **p)
-{
-	struct obj_cgroup *objcg;
-	struct slab *slab;
-	unsigned long off;
-	size_t i;
-
-	/*
-	 * The obtained objcg pointer is safe to use within the current scope,
-	 * defined by current task or set_active_memcg() pair.
-	 * obj_cgroup_get() is used to get a permanent reference.
-	 */
-	objcg = current_obj_cgroup();
-	if (!objcg)
-		return true;
-
-	/*
-	 * slab_alloc_node() avoids the NULL check, so we might be called with a
-	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
-	 * the whole requested size.
-	 * return success as there's nothing to free back
-	 */
-	if (unlikely(*p == NULL))
-		return true;
-
-	flags &= gfp_allowed_mask;
-
-	if (lru) {
-		int ret;
-		struct mem_cgroup *memcg;
-
-		memcg = get_mem_cgroup_from_objcg(objcg);
-		ret = memcg_list_lru_alloc(memcg, lru, flags);
-		css_put(&memcg->css);
-
-		if (ret)
-			return false;
-	}
-
-	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
-		return false;
-
-	for (i = 0; i < size; i++) {
-		slab = virt_to_slab(p[i]);
-
-		if (!slab_objcgs(slab) &&
-		    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
-			obj_cgroup_uncharge(objcg, obj_full_size(s));
-			continue;
-		}
-
-		off = obj_to_index(s, slab, p[i]);
-		obj_cgroup_get(objcg);
-		slab_objcgs(slab)[off] = objcg;
-		mod_objcg_state(objcg, slab_pgdat(slab),
-				cache_vmstat_idx(s), obj_full_size(s));
-	}
-
-	return true;
-}
-
 static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
 
 static __fastpath_inline
@@ -1986,27 +1907,6 @@  bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 	return false;
 }
 
-static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
-				   void **p, int objects,
-				   struct obj_cgroup **objcgs)
-{
-	for (int i = 0; i < objects; i++) {
-		struct obj_cgroup *objcg;
-		unsigned int off;
-
-		off = obj_to_index(s, slab, p[i]);
-		objcg = objcgs[off];
-		if (!objcg)
-			continue;
-
-		objcgs[off] = NULL;
-		obj_cgroup_uncharge(objcg, obj_full_size(s));
-		mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-				-obj_full_size(s));
-		obj_cgroup_put(objcg);
-	}
-}
-
 static __fastpath_inline
 void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
 			  int objects)