diff mbox series

[RFC,1/4] mm, slab: move memcg charging to post-alloc hook

Message ID 20240301-slab-memcg-v1-1-359328a46596@suse.cz (mailing list archive)
State New
Headers show
Series memcg_kmem hooks refactoring and kmem_cache_charge() | expand

Commit Message

Vlastimil Babka March 1, 2024, 5:07 p.m. UTC
The MEMCG_KMEM integration with slab currently relies on two hooks
during allocation. memcg_slab_pre_alloc_hook() determines the objcg and
charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer
to the allocated object(s).

As Linus pointed out, this is unnecessarily complex. Failing to charge
due to memcg limits should be rare, so we can optimistically allocate
the object(s) and do the charging together with assigning the objcg
pointer in a single post_alloc hook. In the rare case the charging
fails, we can free the object(s) back.

This simplifies the code (no need to pass around the objcg pointer) and
potentially allows to separate charging from allocation in cases where
it's common that the allocation would be immediately freed, and the
memcg handling overhead could be saved.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 180 +++++++++++++++++++++++++++-----------------------------------
 1 file changed, 77 insertions(+), 103 deletions(-)

Comments

Roman Gushchin March 12, 2024, 6:52 p.m. UTC | #1
On Fri, Mar 01, 2024 at 06:07:08PM +0100, Vlastimil Babka wrote:
> The MEMCG_KMEM integration with slab currently relies on two hooks
> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and
> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer
> to the allocated object(s).
> 
> As Linus pointed out, this is unnecessarily complex. Failing to charge
> due to memcg limits should be rare, so we can optimistically allocate
> the object(s) and do the charging together with assigning the objcg
> pointer in a single post_alloc hook. In the rare case the charging
> fails, we can free the object(s) back.
> 
> This simplifies the code (no need to pass around the objcg pointer) and
> potentially allows to separate charging from allocation in cases where
> it's common that the allocation would be immediately freed, and the
> memcg handling overhead could be saved.
> 
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Nice cleanup, Vlastimil!
Couple of small nits below, but otherwise, please, add my

Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!

 ---
>  mm/slub.c | 180 +++++++++++++++++++++++++++-----------------------------------
>  1 file changed, 77 insertions(+), 103 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 2ef88bbf56a3..7022a1246bab 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1897,23 +1897,36 @@ static inline size_t obj_full_size(struct kmem_cache *s)
>  	return s->size + sizeof(struct obj_cgroup *);
>  }
>  
> -/*
> - * Returns false if the allocation should fail.
> - */
> -static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> -					struct list_lru *lru,
> -					struct obj_cgroup **objcgp,
> -					size_t objects, gfp_t flags)
> +static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
> +					 struct list_lru *lru,
> +					 gfp_t flags, size_t size,
> +					 void **p)
>  {
> +	struct obj_cgroup *objcg;
> +	struct slab *slab;
> +	unsigned long off;
> +	size_t i;
> +
>  	/*
>  	 * The obtained objcg pointer is safe to use within the current scope,
>  	 * defined by current task or set_active_memcg() pair.
>  	 * obj_cgroup_get() is used to get a permanent reference.
>  	 */
> -	struct obj_cgroup *objcg = current_obj_cgroup();
> +	objcg = current_obj_cgroup();
>  	if (!objcg)
>  		return true;
>  
> +	/*
> +	 * slab_alloc_node() avoids the NULL check, so we might be called with a
> +	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
> +	 * the whole requested size.
> +	 * return success as there's nothing to free back
> +	 */
> +	if (unlikely(*p == NULL))
> +		return true;

Probably better to move this check up? current_obj_cgroup() != NULL check is more
expensive.

> +
> +	flags &= gfp_allowed_mask;
> +
>  	if (lru) {
>  		int ret;
>  		struct mem_cgroup *memcg;
> @@ -1926,71 +1939,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
>  			return false;
>  	}
>  
> -	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
> +	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
>  		return false;
>  
> -	*objcgp = objcg;
> +	for (i = 0; i < size; i++) {
> +		slab = virt_to_slab(p[i]);

Not specific to this change, but I wonder if it makes sense to introduce virt_to_slab()
variant without any extra checks for this and similar cases, where we know for sure
that p resides on a slab page. What do you think?
Matthew Wilcox March 12, 2024, 6:59 p.m. UTC | #2
On Tue, Mar 12, 2024 at 11:52:46AM -0700, Roman Gushchin wrote:
> On Fri, Mar 01, 2024 at 06:07:08PM +0100, Vlastimil Babka wrote:
> > @@ -1926,71 +1939,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> >  			return false;
> >  	}
> >  
> > -	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
> > +	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
> >  		return false;
> >  
> > -	*objcgp = objcg;
> > +	for (i = 0; i < size; i++) {
> > +		slab = virt_to_slab(p[i]);
> 
> Not specific to this change, but I wonder if it makes sense to introduce virt_to_slab()
> variant without any extra checks for this and similar cases, where we know for sure
> that p resides on a slab page. What do you think?

You'd only save a single test_bit() ... is it really worth doing?
Cache misses are the expensive thing, not instructions.  And debugging
time: if somehow p[i] becomes not-on-a-slab-anymore, getting a NULL
pointer splat here before we go any further might be worth all the CPU
time wasted doing that test_bit().
Roman Gushchin March 12, 2024, 8:35 p.m. UTC | #3
On Tue, Mar 12, 2024 at 06:59:37PM +0000, Matthew Wilcox wrote:
> On Tue, Mar 12, 2024 at 11:52:46AM -0700, Roman Gushchin wrote:
> > On Fri, Mar 01, 2024 at 06:07:08PM +0100, Vlastimil Babka wrote:
> > > @@ -1926,71 +1939,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> > >  			return false;
> > >  	}
> > >  
> > > -	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
> > > +	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
> > >  		return false;
> > >  
> > > -	*objcgp = objcg;
> > > +	for (i = 0; i < size; i++) {
> > > +		slab = virt_to_slab(p[i]);
> > 
> > Not specific to this change, but I wonder if it makes sense to introduce virt_to_slab()
> > variant without any extra checks for this and similar cases, where we know for sure
> > that p resides on a slab page. What do you think?
> 
> You'd only save a single test_bit() ... is it really worth doing?
> Cache misses are the expensive thing, not instructions.

I agree here, unlikely it will produce a significant difference.

> And debugging
> time: if somehow p[i] becomes not-on-a-slab-anymore, getting a NULL
> pointer splat here before we go any further might be worth all the CPU
> time wasted doing that test_bit().

Well, Idk if it's a feasible concern here, hard to imagine how p[i]
wouldn't belong to a slab page without something like a major memory
corruption.

Overall I agree it's not a big deal and the current code is fine.

Thanks!
Vlastimil Babka March 13, 2024, 10:55 a.m. UTC | #4
On 3/12/24 19:52, Roman Gushchin wrote:
> On Fri, Mar 01, 2024 at 06:07:08PM +0100, Vlastimil Babka wrote:
>> The MEMCG_KMEM integration with slab currently relies on two hooks
>> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and
>> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer
>> to the allocated object(s).
>>
>> As Linus pointed out, this is unnecessarily complex. Failing to charge
>> due to memcg limits should be rare, so we can optimistically allocate
>> the object(s) and do the charging together with assigning the objcg
>> pointer in a single post_alloc hook. In the rare case the charging
>> fails, we can free the object(s) back.
>>
>> This simplifies the code (no need to pass around the objcg pointer) and
>> potentially allows to separate charging from allocation in cases where
>> it's common that the allocation would be immediately freed, and the
>> memcg handling overhead could be saved.
>>
>> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
>> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> 
> Nice cleanup, Vlastimil!
> Couple of small nits below, but otherwise, please, add my
> 
> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!

>>  	/*
>>  	 * The obtained objcg pointer is safe to use within the current scope,
>>  	 * defined by current task or set_active_memcg() pair.
>>  	 * obj_cgroup_get() is used to get a permanent reference.
>>  	 */
>> -	struct obj_cgroup *objcg = current_obj_cgroup();
>> +	objcg = current_obj_cgroup();
>>  	if (!objcg)
>>  		return true;
>>  
>> +	/*
>> +	 * slab_alloc_node() avoids the NULL check, so we might be called with a
>> +	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
>> +	 * the whole requested size.
>> +	 * return success as there's nothing to free back
>> +	 */
>> +	if (unlikely(*p == NULL))
>> +		return true;
> 
> Probably better to move this check up? current_obj_cgroup() != NULL check is more
> expensive.

It probably doesn't matter in practice anyway, but my thinking was that
*p == NULL is so rare (the object allocation failed) it shouldn't matter
that we did current_obj_cgroup() uselessly in case it happens.
OTOH current_obj_cgroup() returning NULL is not that rare (?) so it
could be useful to not check *p in those cases?

>> +
>> +	flags &= gfp_allowed_mask;
>> +
>>  	if (lru) {
>>  		int ret;
>>  		struct mem_cgroup *memcg;
>> @@ -1926,71 +1939,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
>>  			return false;
>>  	}
>>  
>> -	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
>> +	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
>>  		return false;
>>  
>> -	*objcgp = objcg;
>> +	for (i = 0; i < size; i++) {
>> +		slab = virt_to_slab(p[i]);
> 
> Not specific to this change, but I wonder if it makes sense to introduce virt_to_slab()
> variant without any extra checks for this and similar cases, where we know for sure
> that p resides on a slab page. What do you think?
Roman Gushchin March 13, 2024, 5:34 p.m. UTC | #5
On Wed, Mar 13, 2024 at 11:55:04AM +0100, Vlastimil Babka wrote:
> On 3/12/24 19:52, Roman Gushchin wrote:
> > On Fri, Mar 01, 2024 at 06:07:08PM +0100, Vlastimil Babka wrote:
> >> The MEMCG_KMEM integration with slab currently relies on two hooks
> >> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and
> >> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer
> >> to the allocated object(s).
> >>
> >> As Linus pointed out, this is unnecessarily complex. Failing to charge
> >> due to memcg limits should be rare, so we can optimistically allocate
> >> the object(s) and do the charging together with assigning the objcg
> >> pointer in a single post_alloc hook. In the rare case the charging
> >> fails, we can free the object(s) back.
> >>
> >> This simplifies the code (no need to pass around the objcg pointer) and
> >> potentially allows to separate charging from allocation in cases where
> >> it's common that the allocation would be immediately freed, and the
> >> memcg handling overhead could be saved.
> >>
> >> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> >> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/
> >> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> > 
> > Nice cleanup, Vlastimil!
> > Couple of small nits below, but otherwise, please, add my
> > 
> > Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
> 
> Thanks!
> 
> >>  	/*
> >>  	 * The obtained objcg pointer is safe to use within the current scope,
> >>  	 * defined by current task or set_active_memcg() pair.
> >>  	 * obj_cgroup_get() is used to get a permanent reference.
> >>  	 */
> >> -	struct obj_cgroup *objcg = current_obj_cgroup();
> >> +	objcg = current_obj_cgroup();
> >>  	if (!objcg)
> >>  		return true;
> >>  
> >> +	/*
> >> +	 * slab_alloc_node() avoids the NULL check, so we might be called with a
> >> +	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
> >> +	 * the whole requested size.
> >> +	 * return success as there's nothing to free back
> >> +	 */
> >> +	if (unlikely(*p == NULL))
> >> +		return true;
> > 
> > Probably better to move this check up? current_obj_cgroup() != NULL check is more
> > expensive.
> 
> It probably doesn't matter in practice anyway, but my thinking was that
> *p == NULL is so rare (the object allocation failed) it shouldn't matter
> that we did current_obj_cgroup() uselessly in case it happens.
> OTOH current_obj_cgroup() returning NULL is not that rare (?) so it
> could be useful to not check *p in those cases?

I see... Hm, I'd generally expect a speculative execution of the second check
anyway (especially with an unlikely() hint for the first one), and because as
you said p == NULL is almost never true, the additional cost is zero.
But the same is true otherwise, so it really doesn't matter that much.
Thanks for explaining your logic, it wasn't obvious to me.
Chengming Zhou March 15, 2024, 3:23 a.m. UTC | #6
On 2024/3/2 01:07, Vlastimil Babka wrote:
> The MEMCG_KMEM integration with slab currently relies on two hooks
> during allocation. memcg_slab_pre_alloc_hook() determines the objcg and
> charges it, and memcg_slab_post_alloc_hook() assigns the objcg pointer
> to the allocated object(s).
> 
> As Linus pointed out, this is unnecessarily complex. Failing to charge
> due to memcg limits should be rare, so we can optimistically allocate
> the object(s) and do the charging together with assigning the objcg
> pointer in a single post_alloc hook. In the rare case the charging
> fails, we can free the object(s) back.
> 
> This simplifies the code (no need to pass around the objcg pointer) and
> potentially allows to separate charging from allocation in cases where
> it's common that the allocation would be immediately freed, and the
> memcg handling overhead could be saved.
> 
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Link: https://lore.kernel.org/all/CAHk-=whYOOdM7jWy5jdrAm8LxcgCMFyk2bt8fYYvZzM4U-zAQA@mail.gmail.com/
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

Nice!

Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>

Thanks.

> ---
>  mm/slub.c | 180 +++++++++++++++++++++++++++-----------------------------------
>  1 file changed, 77 insertions(+), 103 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 2ef88bbf56a3..7022a1246bab 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1897,23 +1897,36 @@ static inline size_t obj_full_size(struct kmem_cache *s)
>  	return s->size + sizeof(struct obj_cgroup *);
>  }
>  
> -/*
> - * Returns false if the allocation should fail.
> - */
> -static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> -					struct list_lru *lru,
> -					struct obj_cgroup **objcgp,
> -					size_t objects, gfp_t flags)
> +static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
> +					 struct list_lru *lru,
> +					 gfp_t flags, size_t size,
> +					 void **p)
>  {
> +	struct obj_cgroup *objcg;
> +	struct slab *slab;
> +	unsigned long off;
> +	size_t i;
> +
>  	/*
>  	 * The obtained objcg pointer is safe to use within the current scope,
>  	 * defined by current task or set_active_memcg() pair.
>  	 * obj_cgroup_get() is used to get a permanent reference.
>  	 */
> -	struct obj_cgroup *objcg = current_obj_cgroup();
> +	objcg = current_obj_cgroup();
>  	if (!objcg)
>  		return true;
>  
> +	/*
> +	 * slab_alloc_node() avoids the NULL check, so we might be called with a
> +	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
> +	 * the whole requested size.
> +	 * return success as there's nothing to free back
> +	 */
> +	if (unlikely(*p == NULL))
> +		return true;
> +
> +	flags &= gfp_allowed_mask;
> +
>  	if (lru) {
>  		int ret;
>  		struct mem_cgroup *memcg;
> @@ -1926,71 +1939,51 @@ static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
>  			return false;
>  	}
>  
> -	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
> +	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
>  		return false;
>  
> -	*objcgp = objcg;
> +	for (i = 0; i < size; i++) {
> +		slab = virt_to_slab(p[i]);
> +
> +		if (!slab_objcgs(slab) &&
> +		    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
> +			obj_cgroup_uncharge(objcg, obj_full_size(s));
> +			continue;
> +		}
> +
> +		off = obj_to_index(s, slab, p[i]);
> +		obj_cgroup_get(objcg);
> +		slab_objcgs(slab)[off] = objcg;
> +		mod_objcg_state(objcg, slab_pgdat(slab),
> +				cache_vmstat_idx(s), obj_full_size(s));
> +	}
> +
>  	return true;
>  }
>  
> -/*
> - * Returns false if the allocation should fail.
> - */
> +static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
> +
>  static __fastpath_inline
> -bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> -			       struct obj_cgroup **objcgp, size_t objects,
> -			       gfp_t flags)
> +bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> +				gfp_t flags, size_t size, void **p)
>  {
> -	if (!memcg_kmem_online())
> +	if (likely(!memcg_kmem_online()))
>  		return true;
>  
>  	if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
>  		return true;
>  
> -	return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
> -						  flags));
> -}
> -
> -static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
> -					 struct obj_cgroup *objcg,
> -					 gfp_t flags, size_t size,
> -					 void **p)
> -{
> -	struct slab *slab;
> -	unsigned long off;
> -	size_t i;
> -
> -	flags &= gfp_allowed_mask;
> -
> -	for (i = 0; i < size; i++) {
> -		if (likely(p[i])) {
> -			slab = virt_to_slab(p[i]);
> -
> -			if (!slab_objcgs(slab) &&
> -			    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
> -				obj_cgroup_uncharge(objcg, obj_full_size(s));
> -				continue;
> -			}
> +	if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
> +		return true;
>  
> -			off = obj_to_index(s, slab, p[i]);
> -			obj_cgroup_get(objcg);
> -			slab_objcgs(slab)[off] = objcg;
> -			mod_objcg_state(objcg, slab_pgdat(slab),
> -					cache_vmstat_idx(s), obj_full_size(s));
> -		} else {
> -			obj_cgroup_uncharge(objcg, obj_full_size(s));
> -		}
> +	if (likely(size == 1)) {
> +		memcg_alloc_abort_single(s, p);
> +		*p = NULL;
> +	} else {
> +		kmem_cache_free_bulk(s, size, p);
>  	}
> -}
> -
> -static __fastpath_inline
> -void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
> -				gfp_t flags, size_t size, void **p)
> -{
> -	if (likely(!memcg_kmem_online() || !objcg))
> -		return;
>  
> -	return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
> +	return false;
>  }
>  
>  static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> @@ -2029,14 +2022,6 @@ void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
>  
>  	__memcg_slab_free_hook(s, slab, p, objects, objcgs);
>  }
> -
> -static inline
> -void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
> -			   struct obj_cgroup *objcg)
> -{
> -	if (objcg)
> -		obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
> -}
>  #else /* CONFIG_MEMCG_KMEM */
>  static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
>  {
> @@ -2047,31 +2032,18 @@ static inline void memcg_free_slab_cgroups(struct slab *slab)
>  {
>  }
>  
> -static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> -					     struct list_lru *lru,
> -					     struct obj_cgroup **objcgp,
> -					     size_t objects, gfp_t flags)
> -{
> -	return true;
> -}
> -
> -static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
> -					      struct obj_cgroup *objcg,
> +static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
> +					      struct list_lru *lru,
>  					      gfp_t flags, size_t size,
>  					      void **p)
>  {
> +	return true;
>  }
>  
>  static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
>  					void **p, int objects)
>  {
>  }
> -
> -static inline
> -void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
> -				 struct obj_cgroup *objcg)
> -{
> -}
>  #endif /* CONFIG_MEMCG_KMEM */
>  
>  /*
> @@ -3751,10 +3723,7 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
>  
>  static __fastpath_inline
> -struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
> -				       struct list_lru *lru,
> -				       struct obj_cgroup **objcgp,
> -				       size_t size, gfp_t flags)
> +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
>  {
>  	flags &= gfp_allowed_mask;
>  
> @@ -3763,14 +3732,11 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>  	if (unlikely(should_failslab(s, flags)))
>  		return NULL;
>  
> -	if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
> -		return NULL;
> -
>  	return s;
>  }
>  
>  static __fastpath_inline
> -void slab_post_alloc_hook(struct kmem_cache *s,	struct obj_cgroup *objcg,
> +bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  			  gfp_t flags, size_t size, void **p, bool init,
>  			  unsigned int orig_size)
>  {
> @@ -3819,7 +3785,7 @@ void slab_post_alloc_hook(struct kmem_cache *s,	struct obj_cgroup *objcg,
>  		kmsan_slab_alloc(s, p[i], init_flags);
>  	}
>  
> -	memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
> +	return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
>  }
>  
>  /*
> @@ -3836,10 +3802,9 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
>  		gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
>  {
>  	void *object;
> -	struct obj_cgroup *objcg = NULL;
>  	bool init = false;
>  
> -	s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
> +	s = slab_pre_alloc_hook(s, gfpflags);
>  	if (unlikely(!s))
>  		return NULL;
>  
> @@ -3856,8 +3821,10 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
>  	/*
>  	 * When init equals 'true', like for kzalloc() family, only
>  	 * @orig_size bytes might be zeroed instead of s->object_size
> +	 * In case this fails due to memcg_slab_post_alloc_hook(),
> +	 * object is set to NULL
>  	 */
> -	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
> +	slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
>  
>  	return object;
>  }
> @@ -4300,6 +4267,16 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
>  		do_slab_free(s, slab, object, object, 1, addr);
>  }
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +/* Do not inline the rare memcg charging failed path into the allocation path */
> +static noinline
> +void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
> +{
> +	if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
> +		do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
> +}
> +#endif
> +
>  static __fastpath_inline
>  void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
>  		    void *tail, void **p, int cnt, unsigned long addr)
> @@ -4635,29 +4612,26 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
>  			  void **p)
>  {
>  	int i;
> -	struct obj_cgroup *objcg = NULL;
>  
>  	if (!size)
>  		return 0;
>  
> -	/* memcg and kmem_cache debug support */
> -	s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
> +	s = slab_pre_alloc_hook(s, flags);
>  	if (unlikely(!s))
>  		return 0;
>  
>  	i = __kmem_cache_alloc_bulk(s, flags, size, p);
> +	if (unlikely(i == 0))
> +		return 0;
>  
>  	/*
>  	 * memcg and kmem_cache debug support and memory initialization.
>  	 * Done outside of the IRQ disabled fastpath loop.
>  	 */
> -	if (likely(i != 0)) {
> -		slab_post_alloc_hook(s, objcg, flags, size, p,
> -			slab_want_init_on_alloc(flags, s), s->object_size);
> -	} else {
> -		memcg_slab_alloc_error_hook(s, size, objcg);
> +	if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
> +		    slab_want_init_on_alloc(flags, s), s->object_size))) {
> +		return 0;
>  	}
> -
>  	return i;
>  }
>  EXPORT_SYMBOL(kmem_cache_alloc_bulk);
>
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index 2ef88bbf56a3..7022a1246bab 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1897,23 +1897,36 @@  static inline size_t obj_full_size(struct kmem_cache *s)
 	return s->size + sizeof(struct obj_cgroup *);
 }
 
-/*
- * Returns false if the allocation should fail.
- */
-static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
-					struct list_lru *lru,
-					struct obj_cgroup **objcgp,
-					size_t objects, gfp_t flags)
+static bool __memcg_slab_post_alloc_hook(struct kmem_cache *s,
+					 struct list_lru *lru,
+					 gfp_t flags, size_t size,
+					 void **p)
 {
+	struct obj_cgroup *objcg;
+	struct slab *slab;
+	unsigned long off;
+	size_t i;
+
 	/*
 	 * The obtained objcg pointer is safe to use within the current scope,
 	 * defined by current task or set_active_memcg() pair.
 	 * obj_cgroup_get() is used to get a permanent reference.
 	 */
-	struct obj_cgroup *objcg = current_obj_cgroup();
+	objcg = current_obj_cgroup();
 	if (!objcg)
 		return true;
 
+	/*
+	 * slab_alloc_node() avoids the NULL check, so we might be called with a
+	 * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
+	 * the whole requested size.
+	 * return success as there's nothing to free back
+	 */
+	if (unlikely(*p == NULL))
+		return true;
+
+	flags &= gfp_allowed_mask;
+
 	if (lru) {
 		int ret;
 		struct mem_cgroup *memcg;
@@ -1926,71 +1939,51 @@  static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
 			return false;
 	}
 
-	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
+	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
 		return false;
 
-	*objcgp = objcg;
+	for (i = 0; i < size; i++) {
+		slab = virt_to_slab(p[i]);
+
+		if (!slab_objcgs(slab) &&
+		    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
+			obj_cgroup_uncharge(objcg, obj_full_size(s));
+			continue;
+		}
+
+		off = obj_to_index(s, slab, p[i]);
+		obj_cgroup_get(objcg);
+		slab_objcgs(slab)[off] = objcg;
+		mod_objcg_state(objcg, slab_pgdat(slab),
+				cache_vmstat_idx(s), obj_full_size(s));
+	}
+
 	return true;
 }
 
-/*
- * Returns false if the allocation should fail.
- */
+static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
+
 static __fastpath_inline
-bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
-			       struct obj_cgroup **objcgp, size_t objects,
-			       gfp_t flags)
+bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+				gfp_t flags, size_t size, void **p)
 {
-	if (!memcg_kmem_online())
+	if (likely(!memcg_kmem_online()))
 		return true;
 
 	if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
 		return true;
 
-	return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
-						  flags));
-}
-
-static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
-					 struct obj_cgroup *objcg,
-					 gfp_t flags, size_t size,
-					 void **p)
-{
-	struct slab *slab;
-	unsigned long off;
-	size_t i;
-
-	flags &= gfp_allowed_mask;
-
-	for (i = 0; i < size; i++) {
-		if (likely(p[i])) {
-			slab = virt_to_slab(p[i]);
-
-			if (!slab_objcgs(slab) &&
-			    memcg_alloc_slab_cgroups(slab, s, flags, false)) {
-				obj_cgroup_uncharge(objcg, obj_full_size(s));
-				continue;
-			}
+	if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
+		return true;
 
-			off = obj_to_index(s, slab, p[i]);
-			obj_cgroup_get(objcg);
-			slab_objcgs(slab)[off] = objcg;
-			mod_objcg_state(objcg, slab_pgdat(slab),
-					cache_vmstat_idx(s), obj_full_size(s));
-		} else {
-			obj_cgroup_uncharge(objcg, obj_full_size(s));
-		}
+	if (likely(size == 1)) {
+		memcg_alloc_abort_single(s, p);
+		*p = NULL;
+	} else {
+		kmem_cache_free_bulk(s, size, p);
 	}
-}
-
-static __fastpath_inline
-void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
-				gfp_t flags, size_t size, void **p)
-{
-	if (likely(!memcg_kmem_online() || !objcg))
-		return;
 
-	return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+	return false;
 }
 
 static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
@@ -2029,14 +2022,6 @@  void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
 
 	__memcg_slab_free_hook(s, slab, p, objects, objcgs);
 }
-
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
-			   struct obj_cgroup *objcg)
-{
-	if (objcg)
-		obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
-}
 #else /* CONFIG_MEMCG_KMEM */
 static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
 {
@@ -2047,31 +2032,18 @@  static inline void memcg_free_slab_cgroups(struct slab *slab)
 {
 }
 
-static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
-					     struct list_lru *lru,
-					     struct obj_cgroup **objcgp,
-					     size_t objects, gfp_t flags)
-{
-	return true;
-}
-
-static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
-					      struct obj_cgroup *objcg,
+static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
+					      struct list_lru *lru,
 					      gfp_t flags, size_t size,
 					      void **p)
 {
+	return true;
 }
 
 static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
 					void **p, int objects)
 {
 }
-
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
-				 struct obj_cgroup *objcg)
-{
-}
 #endif /* CONFIG_MEMCG_KMEM */
 
 /*
@@ -3751,10 +3723,7 @@  noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
 
 static __fastpath_inline
-struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
-				       struct list_lru *lru,
-				       struct obj_cgroup **objcgp,
-				       size_t size, gfp_t flags)
+struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 
@@ -3763,14 +3732,11 @@  struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 	if (unlikely(should_failslab(s, flags)))
 		return NULL;
 
-	if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
-		return NULL;
-
 	return s;
 }
 
 static __fastpath_inline
-void slab_post_alloc_hook(struct kmem_cache *s,	struct obj_cgroup *objcg,
+bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 			  gfp_t flags, size_t size, void **p, bool init,
 			  unsigned int orig_size)
 {
@@ -3819,7 +3785,7 @@  void slab_post_alloc_hook(struct kmem_cache *s,	struct obj_cgroup *objcg,
 		kmsan_slab_alloc(s, p[i], init_flags);
 	}
 
-	memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+	return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
 }
 
 /*
@@ -3836,10 +3802,9 @@  static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
 {
 	void *object;
-	struct obj_cgroup *objcg = NULL;
 	bool init = false;
 
-	s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
+	s = slab_pre_alloc_hook(s, gfpflags);
 	if (unlikely(!s))
 		return NULL;
 
@@ -3856,8 +3821,10 @@  static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	/*
 	 * When init equals 'true', like for kzalloc() family, only
 	 * @orig_size bytes might be zeroed instead of s->object_size
+	 * In case this fails due to memcg_slab_post_alloc_hook(),
+	 * object is set to NULL
 	 */
-	slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
+	slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
 
 	return object;
 }
@@ -4300,6 +4267,16 @@  void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 		do_slab_free(s, slab, object, object, 1, addr);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+/* Do not inline the rare memcg charging failed path into the allocation path */
+static noinline
+void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
+{
+	if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+		do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
+}
+#endif
+
 static __fastpath_inline
 void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
 		    void *tail, void **p, int cnt, unsigned long addr)
@@ -4635,29 +4612,26 @@  int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			  void **p)
 {
 	int i;
-	struct obj_cgroup *objcg = NULL;
 
 	if (!size)
 		return 0;
 
-	/* memcg and kmem_cache debug support */
-	s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
+	s = slab_pre_alloc_hook(s, flags);
 	if (unlikely(!s))
 		return 0;
 
 	i = __kmem_cache_alloc_bulk(s, flags, size, p);
+	if (unlikely(i == 0))
+		return 0;
 
 	/*
 	 * memcg and kmem_cache debug support and memory initialization.
 	 * Done outside of the IRQ disabled fastpath loop.
 	 */
-	if (likely(i != 0)) {
-		slab_post_alloc_hook(s, objcg, flags, size, p,
-			slab_want_init_on_alloc(flags, s), s->object_size);
-	} else {
-		memcg_slab_alloc_error_hook(s, size, objcg);
+	if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
+		    slab_want_init_on_alloc(flags, s), s->object_size))) {
+		return 0;
 	}
-
 	return i;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_bulk);