Message ID | 20240301-slab-memcg-v1-2-359328a46596@suse.cz (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | memcg_kmem hooks refactoring and kmem_cache_charge() | expand |
On Fri, Mar 01, 2024 at 06:07:09PM +0100, Vlastimil Babka wrote: > The hooks make multiple calls to functions in mm/memcontrol.c, including > to th current_obj_cgroup() marked __always_inline. It might be faster to > make a single call to the hook in mm/memcontrol.c instead. The hooks > also don't use almost anything from mm/slub.c. obj_full_size() can move > with the hooks and cache_vmstat_idx() to the internal mm/slab.h > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > --- > mm/memcontrol.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ > mm/slab.h | 10 ++++++ > mm/slub.c | 100 -------------------------------------------------------- > 3 files changed, 100 insertions(+), 100 deletions(-) Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> Btw, even before your change: $ cat mm/memcontrol.c | wc -l 8318 so I wonder if soon we might want to split it into some smaller parts. Thanks! > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index e4c8735e7c85..37ee9356a26c 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -3575,6 +3575,96 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) > refill_obj_stock(objcg, size, true); > } > > +static inline size_t obj_full_size(struct kmem_cache *s) > +{ > + /* > + * For each accounted object there is an extra space which is used > + * to store obj_cgroup membership. Charge it too. > + */ > + return s->size + sizeof(struct obj_cgroup *); > +} > + > +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, > + gfp_t flags, size_t size, void **p) > +{ > + struct obj_cgroup *objcg; > + struct slab *slab; > + unsigned long off; > + size_t i; > + > + /* > + * The obtained objcg pointer is safe to use within the current scope, > + * defined by current task or set_active_memcg() pair. > + * obj_cgroup_get() is used to get a permanent reference. > + */ > + objcg = current_obj_cgroup(); > + if (!objcg) > + return true; > + > + /* > + * slab_alloc_node() avoids the NULL check, so we might be called with a > + * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill > + * the whole requested size. > + * return success as there's nothing to free back > + */ > + if (unlikely(*p == NULL)) > + return true; > + > + flags &= gfp_allowed_mask; > + > + if (lru) { > + int ret; > + struct mem_cgroup *memcg; > + > + memcg = get_mem_cgroup_from_objcg(objcg); > + ret = memcg_list_lru_alloc(memcg, lru, flags); > + css_put(&memcg->css); > + > + if (ret) > + return false; > + } > + > + if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) > + return false; > + > + for (i = 0; i < size; i++) { > + slab = virt_to_slab(p[i]); > + > + if (!slab_objcgs(slab) && > + memcg_alloc_slab_cgroups(slab, s, flags, false)) { > + obj_cgroup_uncharge(objcg, obj_full_size(s)); > + continue; > + } > + > + off = obj_to_index(s, slab, p[i]); > + obj_cgroup_get(objcg); > + slab_objcgs(slab)[off] = objcg; > + mod_objcg_state(objcg, slab_pgdat(slab), > + cache_vmstat_idx(s), obj_full_size(s)); > + } > + > + return true; > +} > + > +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, > + void **p, int objects, struct obj_cgroup **objcgs) > +{ > + for (int i = 0; i < objects; i++) { > + struct obj_cgroup *objcg; > + unsigned int off; > + > + off = obj_to_index(s, slab, p[i]); > + objcg = objcgs[off]; > + if (!objcg) > + continue; > + > + objcgs[off] = NULL; > + obj_cgroup_uncharge(objcg, obj_full_size(s)); > + mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), > + -obj_full_size(s)); > + obj_cgroup_put(objcg); > + } > +} > #endif /* CONFIG_MEMCG_KMEM */ > > /* > diff --git a/mm/slab.h b/mm/slab.h > index 54deeb0428c6..3f170673fa55 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -541,6 +541,12 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla > return false; > } > > +static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) > +{ > + return (s->flags & SLAB_RECLAIM_ACCOUNT) ? > + NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; > +} > + > #ifdef CONFIG_MEMCG_KMEM > /* > * slab_objcgs - get the object cgroups vector associated with a slab > @@ -564,6 +570,10 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, > gfp_t gfp, bool new_slab); > void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, > enum node_stat_item idx, int nr); > +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, > + gfp_t flags, size_t size, void **p); > +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, > + void **p, int objects, struct obj_cgroup **objcgs); > #else /* CONFIG_MEMCG_KMEM */ > static inline struct obj_cgroup **slab_objcgs(struct slab *slab) > { > diff --git a/mm/slub.c b/mm/slub.c > index 7022a1246bab..64da169d672a 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -1875,12 +1875,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, > #endif > #endif /* CONFIG_SLUB_DEBUG */ > > -static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) > -{ > - return (s->flags & SLAB_RECLAIM_ACCOUNT) ? > - NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; > -} > - > #ifdef CONFIG_MEMCG_KMEM > static inline void memcg_free_slab_cgroups(struct slab *slab) > { > @@ -1888,79 +1882,6 @@ static inline void memcg_free_slab_cgroups(struct slab *slab) > slab->memcg_data = 0; > } > > -static inline size_t obj_full_size(struct kmem_cache *s) > -{ > - /* > - * For each accounted object there is an extra space which is used > - * to store obj_cgroup membership. Charge it too. > - */ > - return s->size + sizeof(struct obj_cgroup *); > -} > - > -static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, > - struct list_lru *lru, > - gfp_t flags, size_t size, > - void **p) > -{ > - struct obj_cgroup *objcg; > - struct slab *slab; > - unsigned long off; > - size_t i; > - > - /* > - * The obtained objcg pointer is safe to use within the current scope, > - * defined by current task or set_active_memcg() pair. > - * obj_cgroup_get() is used to get a permanent reference. > - */ > - objcg = current_obj_cgroup(); > - if (!objcg) > - return true; > - > - /* > - * slab_alloc_node() avoids the NULL check, so we might be called with a > - * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill > - * the whole requested size. > - * return success as there's nothing to free back > - */ > - if (unlikely(*p == NULL)) > - return true; > - > - flags &= gfp_allowed_mask; > - > - if (lru) { > - int ret; > - struct mem_cgroup *memcg; > - > - memcg = get_mem_cgroup_from_objcg(objcg); > - ret = memcg_list_lru_alloc(memcg, lru, flags); > - css_put(&memcg->css); > - > - if (ret) > - return false; > - } > - > - if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) > - return false; > - > - for (i = 0; i < size; i++) { > - slab = virt_to_slab(p[i]); > - > - if (!slab_objcgs(slab) && > - memcg_alloc_slab_cgroups(slab, s, flags, false)) { > - obj_cgroup_uncharge(objcg, obj_full_size(s)); > - continue; > - } > - > - off = obj_to_index(s, slab, p[i]); > - obj_cgroup_get(objcg); > - slab_objcgs(slab)[off] = objcg; > - mod_objcg_state(objcg, slab_pgdat(slab), > - cache_vmstat_idx(s), obj_full_size(s)); > - } > - > - return true; > -} > - > static void memcg_alloc_abort_single(struct kmem_cache *s, void *object); > > static __fastpath_inline > @@ -1986,27 +1907,6 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, > return false; > } > > -static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, > - void **p, int objects, > - struct obj_cgroup **objcgs) > -{ > - for (int i = 0; i < objects; i++) { > - struct obj_cgroup *objcg; > - unsigned int off; > - > - off = obj_to_index(s, slab, p[i]); > - objcg = objcgs[off]; > - if (!objcg) > - continue; > - > - objcgs[off] = NULL; > - obj_cgroup_uncharge(objcg, obj_full_size(s)); > - mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), > - -obj_full_size(s)); > - obj_cgroup_put(objcg); > - } > -} > - > static __fastpath_inline > void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, > int objects) > > -- > 2.44.0 > >
On Tue, Mar 12, 2024 at 11:56:31AM -0700, Roman Gushchin wrote: > On Fri, Mar 01, 2024 at 06:07:09PM +0100, Vlastimil Babka wrote: > > The hooks make multiple calls to functions in mm/memcontrol.c, including > > to th current_obj_cgroup() marked __always_inline. It might be faster to > > make a single call to the hook in mm/memcontrol.c instead. The hooks > > also don't use almost anything from mm/slub.c. obj_full_size() can move > > with the hooks and cache_vmstat_idx() to the internal mm/slab.h > > > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > > --- > > mm/memcontrol.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > mm/slab.h | 10 ++++++ > > mm/slub.c | 100 -------------------------------------------------------- > > 3 files changed, 100 insertions(+), 100 deletions(-) > > Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> > > Btw, even before your change: > $ cat mm/memcontrol.c | wc -l > 8318 > so I wonder if soon we might want to split it into some smaller parts. If we are going to split it, perhaps a mm/memcg-v1.c would make sense, because I certainly don't have a good idea about what's v1 and what's v2. And maybe we could even conditionally compile the v1 file ;-)
On Tue, Mar 12, 2024 at 07:32:57PM +0000, Matthew Wilcox wrote: > On Tue, Mar 12, 2024 at 11:56:31AM -0700, Roman Gushchin wrote: > > On Fri, Mar 01, 2024 at 06:07:09PM +0100, Vlastimil Babka wrote: > > > The hooks make multiple calls to functions in mm/memcontrol.c, including > > > to th current_obj_cgroup() marked __always_inline. It might be faster to > > > make a single call to the hook in mm/memcontrol.c instead. The hooks > > > also don't use almost anything from mm/slub.c. obj_full_size() can move > > > with the hooks and cache_vmstat_idx() to the internal mm/slab.h > > > > > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > > > --- > > > mm/memcontrol.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > > mm/slab.h | 10 ++++++ > > > mm/slub.c | 100 -------------------------------------------------------- > > > 3 files changed, 100 insertions(+), 100 deletions(-) > > > > Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev> > > > > Btw, even before your change: > > $ cat mm/memcontrol.c | wc -l > > 8318 > > so I wonder if soon we might want to split it into some smaller parts. > > If we are going to split it, perhaps a mm/memcg-v1.c would make sense, > because I certainly don't have a good idea about what's v1 and what's > v2. And maybe we could even conditionally compile the v1 file ;-) Good call. We already have cgroup/cgroup-v1.c and cgroup/legacy_freezer.c. Thanks!
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4c8735e7c85..37ee9356a26c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3575,6 +3575,96 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) refill_obj_stock(objcg, size, true); } +static inline size_t obj_full_size(struct kmem_cache *s) +{ + /* + * For each accounted object there is an extra space which is used + * to store obj_cgroup membership. Charge it too. + */ + return s->size + sizeof(struct obj_cgroup *); +} + +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + gfp_t flags, size_t size, void **p) +{ + struct obj_cgroup *objcg; + struct slab *slab; + unsigned long off; + size_t i; + + /* + * The obtained objcg pointer is safe to use within the current scope, + * defined by current task or set_active_memcg() pair. + * obj_cgroup_get() is used to get a permanent reference. + */ + objcg = current_obj_cgroup(); + if (!objcg) + return true; + + /* + * slab_alloc_node() avoids the NULL check, so we might be called with a + * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill + * the whole requested size. + * return success as there's nothing to free back + */ + if (unlikely(*p == NULL)) + return true; + + flags &= gfp_allowed_mask; + + if (lru) { + int ret; + struct mem_cgroup *memcg; + + memcg = get_mem_cgroup_from_objcg(objcg); + ret = memcg_list_lru_alloc(memcg, lru, flags); + css_put(&memcg->css); + + if (ret) + return false; + } + + if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) + return false; + + for (i = 0; i < size; i++) { + slab = virt_to_slab(p[i]); + + if (!slab_objcgs(slab) && + memcg_alloc_slab_cgroups(slab, s, flags, false)) { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + continue; + } + + off = obj_to_index(s, slab, p[i]); + obj_cgroup_get(objcg); + slab_objcgs(slab)[off] = objcg; + mod_objcg_state(objcg, slab_pgdat(slab), + cache_vmstat_idx(s), obj_full_size(s)); + } + + return true; +} + +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, + void **p, int objects, struct obj_cgroup **objcgs) +{ + for (int i = 0; i < objects; i++) { + struct obj_cgroup *objcg; + unsigned int off; + + off = obj_to_index(s, slab, p[i]); + objcg = objcgs[off]; + if (!objcg) + continue; + + objcgs[off] = NULL; + obj_cgroup_uncharge(objcg, obj_full_size(s)); + mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), + -obj_full_size(s)); + obj_cgroup_put(objcg); + } +} #endif /* CONFIG_MEMCG_KMEM */ /* diff --git a/mm/slab.h b/mm/slab.h index 54deeb0428c6..3f170673fa55 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -541,6 +541,12 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla return false; } +static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) +{ + return (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; +} + #ifdef CONFIG_MEMCG_KMEM /* * slab_objcgs - get the object cgroups vector associated with a slab @@ -564,6 +570,10 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab); void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr); +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + gfp_t flags, size_t size, void **p); +void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, + void **p, int objects, struct obj_cgroup **objcgs); #else /* CONFIG_MEMCG_KMEM */ static inline struct obj_cgroup **slab_objcgs(struct slab *slab) { diff --git a/mm/slub.c b/mm/slub.c index 7022a1246bab..64da169d672a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1875,12 +1875,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, #endif #endif /* CONFIG_SLUB_DEBUG */ -static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) -{ - return (s->flags & SLAB_RECLAIM_ACCOUNT) ? - NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; -} - #ifdef CONFIG_MEMCG_KMEM static inline void memcg_free_slab_cgroups(struct slab *slab) { @@ -1888,79 +1882,6 @@ static inline void memcg_free_slab_cgroups(struct slab *slab) slab->memcg_data = 0; } -static inline size_t obj_full_size(struct kmem_cache *s) -{ - /* - * For each accounted object there is an extra space which is used - * to store obj_cgroup membership. Charge it too. - */ - return s->size + sizeof(struct obj_cgroup *); -} - -static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, - struct list_lru *lru, - gfp_t flags, size_t size, - void **p) -{ - struct obj_cgroup *objcg; - struct slab *slab; - unsigned long off; - size_t i; - - /* - * The obtained objcg pointer is safe to use within the current scope, - * defined by current task or set_active_memcg() pair. - * obj_cgroup_get() is used to get a permanent reference. - */ - objcg = current_obj_cgroup(); - if (!objcg) - return true; - - /* - * slab_alloc_node() avoids the NULL check, so we might be called with a - * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill - * the whole requested size. - * return success as there's nothing to free back - */ - if (unlikely(*p == NULL)) - return true; - - flags &= gfp_allowed_mask; - - if (lru) { - int ret; - struct mem_cgroup *memcg; - - memcg = get_mem_cgroup_from_objcg(objcg); - ret = memcg_list_lru_alloc(memcg, lru, flags); - css_put(&memcg->css); - - if (ret) - return false; - } - - if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s))) - return false; - - for (i = 0; i < size; i++) { - slab = virt_to_slab(p[i]); - - if (!slab_objcgs(slab) && - memcg_alloc_slab_cgroups(slab, s, flags, false)) { - obj_cgroup_uncharge(objcg, obj_full_size(s)); - continue; - } - - off = obj_to_index(s, slab, p[i]); - obj_cgroup_get(objcg); - slab_objcgs(slab)[off] = objcg; - mod_objcg_state(objcg, slab_pgdat(slab), - cache_vmstat_idx(s), obj_full_size(s)); - } - - return true; -} - static void memcg_alloc_abort_single(struct kmem_cache *s, void *object); static __fastpath_inline @@ -1986,27 +1907,6 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, return false; } -static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, - void **p, int objects, - struct obj_cgroup **objcgs) -{ - for (int i = 0; i < objects; i++) { - struct obj_cgroup *objcg; - unsigned int off; - - off = obj_to_index(s, slab, p[i]); - objcg = objcgs[off]; - if (!objcg) - continue; - - objcgs[off] = NULL; - obj_cgroup_uncharge(objcg, obj_full_size(s)); - mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), - -obj_full_size(s)); - obj_cgroup_put(objcg); - } -} - static __fastpath_inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects)
The hooks make multiple calls to functions in mm/memcontrol.c, including to th current_obj_cgroup() marked __always_inline. It might be faster to make a single call to the hook in mm/memcontrol.c instead. The hooks also don't use almost anything from mm/slub.c. obj_full_size() can move with the hooks and cache_vmstat_idx() to the internal mm/slab.h Signed-off-by: Vlastimil Babka <vbabka@suse.cz> --- mm/memcontrol.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ mm/slab.h | 10 ++++++ mm/slub.c | 100 -------------------------------------------------------- 3 files changed, 100 insertions(+), 100 deletions(-)