diff mbox series

[v3,2/5] mm/memcg: Introduce obj_cgroup_uncharge_mod_state()

Message ID 20210414012027.5352-3-longman@redhat.com (mailing list archive)
State New, archived
Headers show
Series mm/memcg: Reduce kmemcache memory accounting overhead | expand

Commit Message

Waiman Long April 14, 2021, 1:20 a.m. UTC
In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
is followed by mod_objcg_state()/mod_memcg_state(). Each of these
function call goes through a separate irq_save/irq_restore cycle. That
is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
that combines them with a single irq_save/irq_restore cycle.

Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
 include/linux/memcontrol.h |  2 ++
 mm/memcontrol.c            | 31 +++++++++++++++++++++++++++----
 mm/percpu.c                |  9 ++-------
 mm/slab.h                  |  6 +++---
 4 files changed, 34 insertions(+), 14 deletions(-)

Comments

Masayoshi Mizuma April 15, 2021, 3:27 a.m. UTC | #1
On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
> In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
> is followed by mod_objcg_state()/mod_memcg_state(). Each of these
> function call goes through a separate irq_save/irq_restore cycle. That
> is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
> that combines them with a single irq_save/irq_restore cycle.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> Reviewed-by: Shakeel Butt <shakeelb@google.com>
> Acked-by: Roman Gushchin <guro@fb.com>
> ---
>  include/linux/memcontrol.h |  2 ++
>  mm/memcontrol.c            | 31 +++++++++++++++++++++++++++----
>  mm/percpu.c                |  9 ++-------
>  mm/slab.h                  |  6 +++---
>  4 files changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 95f12996e66c..6890f999c1a3 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -1592,6 +1592,8 @@ struct obj_cgroup *get_obj_cgroup_from_current(void);
>  
>  int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
>  void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> +				   struct pglist_data *pgdat, int idx);
>  
>  extern struct static_key_false memcg_kmem_enabled_key;
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d66e1e38f8ac..b19100c68aa0 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3225,12 +3225,9 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
>  	return false;
>  }
>  
> -static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> +static void __refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>  {
>  	struct memcg_stock_pcp *stock;
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
>  
>  	stock = this_cpu_ptr(&memcg_stock);
>  	if (stock->cached_objcg != objcg) { /* reset if necessary */
> @@ -3243,7 +3240,14 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>  
>  	if (stock->nr_bytes > PAGE_SIZE)
>  		drain_obj_stock(stock);
> +}
> +
> +static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> +{
> +	unsigned long flags;
>  
> +	local_irq_save(flags);
> +	__refill_obj_stock(objcg, nr_bytes);
>  	local_irq_restore(flags);
>  }
>  
> @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
>  	refill_obj_stock(objcg, size);
>  }
>  
> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> +				   struct pglist_data *pgdat, int idx)
> +{
> +	unsigned long flags;
> +	struct mem_cgroup *memcg;
> +	struct lruvec *lruvec = NULL;
> +
> +	local_irq_save(flags);
> +	__refill_obj_stock(objcg, size);
> +
> +	rcu_read_lock();
> +	memcg = obj_cgroup_memcg(objcg);
> +	if (pgdat)
> +		lruvec = mem_cgroup_lruvec(memcg, pgdat);
> +	__mod_memcg_lruvec_state(memcg, lruvec, idx, -(int)size);
> +	rcu_read_unlock();
> +	local_irq_restore(flags);
> +}
> +
>  #endif /* CONFIG_MEMCG_KMEM */
>  
>  /*
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 23308113a5ff..fd7aad6d7f90 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1631,13 +1631,8 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
>  	objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
>  	chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
>  
> -	obj_cgroup_uncharge(objcg, size * num_possible_cpus());
> -
> -	rcu_read_lock();
> -	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> -			-(size * num_possible_cpus()));
> -	rcu_read_unlock();
> -
> +	obj_cgroup_uncharge_mod_state(objcg, size * num_possible_cpus(),
> +				      NULL, MEMCG_PERCPU_B);
>  	obj_cgroup_put(objcg);
>  }
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index bc6c7545e487..677cdc52e641 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -366,9 +366,9 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
>  			continue;
>  
>  		objcgs[off] = NULL;
> -		obj_cgroup_uncharge(objcg, obj_full_size(s));
> -		mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
> -				-obj_full_size(s));
> +		obj_cgroup_uncharge_mod_state(objcg, obj_full_size(s),
> +					      page_pgdat(page),
> +					      cache_vmstat_idx(s));
>  		obj_cgroup_put(objcg);
>  	}
>  }
> -- 
> 2.18.1
> 

Please feel free to add:

Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>

Thanks!
Masa
Johannes Weiner April 15, 2021, 4:30 p.m. UTC | #2
On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
> In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
> is followed by mod_objcg_state()/mod_memcg_state(). Each of these
> function call goes through a separate irq_save/irq_restore cycle. That
> is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
> that combines them with a single irq_save/irq_restore cycle.
>
> @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
>  	refill_obj_stock(objcg, size);
>  }
>  
> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> +				   struct pglist_data *pgdat, int idx)

The optimization makes sense.

But please don't combine independent operations like this into a
single function. It makes for an unclear parameter list, it's a pain
in the behind to change the constituent operations later on, and it
has a habit of attracting more random bools over time. E.g. what if
the caller already has irqs disabled? What if it KNOWS that irqs are
enabled and it could use local_irq_disable() instead of save?

Just provide an __obj_cgroup_uncharge() that assumes irqs are
disabled, combine with the existing __mod_memcg_lruvec_state(), and
bubble the irq handling up to those callsites which know better.
Waiman Long April 15, 2021, 4:35 p.m. UTC | #3
On 4/15/21 12:30 PM, Johannes Weiner wrote:
> On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
>> In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
>> is followed by mod_objcg_state()/mod_memcg_state(). Each of these
>> function call goes through a separate irq_save/irq_restore cycle. That
>> is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
>> that combines them with a single irq_save/irq_restore cycle.
>>
>> @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
>>   	refill_obj_stock(objcg, size);
>>   }
>>   
>> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
>> +				   struct pglist_data *pgdat, int idx)
> The optimization makes sense.
>
> But please don't combine independent operations like this into a
> single function. It makes for an unclear parameter list, it's a pain
> in the behind to change the constituent operations later on, and it
> has a habit of attracting more random bools over time. E.g. what if
> the caller already has irqs disabled? What if it KNOWS that irqs are
> enabled and it could use local_irq_disable() instead of save?
>
> Just provide an __obj_cgroup_uncharge() that assumes irqs are
> disabled, combine with the existing __mod_memcg_lruvec_state(), and
> bubble the irq handling up to those callsites which know better.
>
That will also work. However, the reason I did that was because of patch 
5 in the series. I could put the get_obj_stock() and put_obj_stock() 
code in slab.h and allowed them to be used directly in various places, 
but hiding in one function is easier.

Anyway, I can change the patch if you think that is the right way.

Cheers,
Longman
Johannes Weiner April 15, 2021, 6:10 p.m. UTC | #4
On Thu, Apr 15, 2021 at 12:35:45PM -0400, Waiman Long wrote:
> On 4/15/21 12:30 PM, Johannes Weiner wrote:
> > On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
> > > In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
> > > is followed by mod_objcg_state()/mod_memcg_state(). Each of these
> > > function call goes through a separate irq_save/irq_restore cycle. That
> > > is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
> > > that combines them with a single irq_save/irq_restore cycle.
> > > 
> > > @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
> > >   	refill_obj_stock(objcg, size);
> > >   }
> > > +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> > > +				   struct pglist_data *pgdat, int idx)
> > The optimization makes sense.
> > 
> > But please don't combine independent operations like this into a
> > single function. It makes for an unclear parameter list, it's a pain
> > in the behind to change the constituent operations later on, and it
> > has a habit of attracting more random bools over time. E.g. what if
> > the caller already has irqs disabled? What if it KNOWS that irqs are
> > enabled and it could use local_irq_disable() instead of save?
> > 
> > Just provide an __obj_cgroup_uncharge() that assumes irqs are
> > disabled, combine with the existing __mod_memcg_lruvec_state(), and
> > bubble the irq handling up to those callsites which know better.
> > 
> That will also work. However, the reason I did that was because of patch 5
> in the series. I could put the get_obj_stock() and put_obj_stock() code in
> slab.h and allowed them to be used directly in various places, but hiding in
> one function is easier.

Yeah it's more obvious after getting to patch 5.

But with the irq disabling gone entirely, is there still an incentive
to combine the atomic section at all? Disabling preemption is pretty
cheap, so it wouldn't matter to just do it twice.

I.e. couldn't the final sequence in slab code simply be

	objcg_uncharge()
	mod_objcg_state()

again and each function disables preemption (and in the rare case
irqs) as it sees fit?

You lose the irqsoff batching in the cold path, but as you say, hit
rates are pretty good, and it doesn't seem worth complicating the code
for the cold path.
Waiman Long April 15, 2021, 6:47 p.m. UTC | #5
On 4/15/21 2:10 PM, Johannes Weiner wrote:
> On Thu, Apr 15, 2021 at 12:35:45PM -0400, Waiman Long wrote:
>> On 4/15/21 12:30 PM, Johannes Weiner wrote:
>>> On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
>>>> In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
>>>> is followed by mod_objcg_state()/mod_memcg_state(). Each of these
>>>> function call goes through a separate irq_save/irq_restore cycle. That
>>>> is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
>>>> that combines them with a single irq_save/irq_restore cycle.
>>>>
>>>> @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
>>>>    	refill_obj_stock(objcg, size);
>>>>    }
>>>> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
>>>> +				   struct pglist_data *pgdat, int idx)
>>> The optimization makes sense.
>>>
>>> But please don't combine independent operations like this into a
>>> single function. It makes for an unclear parameter list, it's a pain
>>> in the behind to change the constituent operations later on, and it
>>> has a habit of attracting more random bools over time. E.g. what if
>>> the caller already has irqs disabled? What if it KNOWS that irqs are
>>> enabled and it could use local_irq_disable() instead of save?
>>>
>>> Just provide an __obj_cgroup_uncharge() that assumes irqs are
>>> disabled, combine with the existing __mod_memcg_lruvec_state(), and
>>> bubble the irq handling up to those callsites which know better.
>>>
>> That will also work. However, the reason I did that was because of patch 5
>> in the series. I could put the get_obj_stock() and put_obj_stock() code in
>> slab.h and allowed them to be used directly in various places, but hiding in
>> one function is easier.
> Yeah it's more obvious after getting to patch 5.
>
> But with the irq disabling gone entirely, is there still an incentive
> to combine the atomic section at all? Disabling preemption is pretty
> cheap, so it wouldn't matter to just do it twice.
>
> I.e. couldn't the final sequence in slab code simply be
>
> 	objcg_uncharge()
> 	mod_objcg_state()
>
> again and each function disables preemption (and in the rare case
> irqs) as it sees fit?
>
> You lose the irqsoff batching in the cold path, but as you say, hit
> rates are pretty good, and it doesn't seem worth complicating the code
> for the cold path.
>
That does make sense, though a little bit of performance may be lost. I 
will try that out to see how it work out performance wise.

Cheers,
Longman
Johannes Weiner April 15, 2021, 7:40 p.m. UTC | #6
On Thu, Apr 15, 2021 at 02:47:31PM -0400, Waiman Long wrote:
> On 4/15/21 2:10 PM, Johannes Weiner wrote:
> > On Thu, Apr 15, 2021 at 12:35:45PM -0400, Waiman Long wrote:
> > > On 4/15/21 12:30 PM, Johannes Weiner wrote:
> > > > On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
> > > > > In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
> > > > > is followed by mod_objcg_state()/mod_memcg_state(). Each of these
> > > > > function call goes through a separate irq_save/irq_restore cycle. That
> > > > > is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
> > > > > that combines them with a single irq_save/irq_restore cycle.
> > > > > 
> > > > > @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
> > > > >    	refill_obj_stock(objcg, size);
> > > > >    }
> > > > > +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> > > > > +				   struct pglist_data *pgdat, int idx)
> > > > The optimization makes sense.
> > > > 
> > > > But please don't combine independent operations like this into a
> > > > single function. It makes for an unclear parameter list, it's a pain
> > > > in the behind to change the constituent operations later on, and it
> > > > has a habit of attracting more random bools over time. E.g. what if
> > > > the caller already has irqs disabled? What if it KNOWS that irqs are
> > > > enabled and it could use local_irq_disable() instead of save?
> > > > 
> > > > Just provide an __obj_cgroup_uncharge() that assumes irqs are
> > > > disabled, combine with the existing __mod_memcg_lruvec_state(), and
> > > > bubble the irq handling up to those callsites which know better.
> > > > 
> > > That will also work. However, the reason I did that was because of patch 5
> > > in the series. I could put the get_obj_stock() and put_obj_stock() code in
> > > slab.h and allowed them to be used directly in various places, but hiding in
> > > one function is easier.
> > Yeah it's more obvious after getting to patch 5.
> > 
> > But with the irq disabling gone entirely, is there still an incentive
> > to combine the atomic section at all? Disabling preemption is pretty
> > cheap, so it wouldn't matter to just do it twice.
> > 
> > I.e. couldn't the final sequence in slab code simply be
> > 
> > 	objcg_uncharge()
> > 	mod_objcg_state()
> > 
> > again and each function disables preemption (and in the rare case
> > irqs) as it sees fit?
> > 
> > You lose the irqsoff batching in the cold path, but as you say, hit
> > rates are pretty good, and it doesn't seem worth complicating the code
> > for the cold path.
> > 
> That does make sense, though a little bit of performance may be lost. I will
> try that out to see how it work out performance wise.

Thanks.

Even if we still end up doing it, it's great to have that cost
isolated, so we know how much extra code complexity corresponds to how
much performance gain. It seems the task/irq split could otherwise be
a pretty localized change with no API implications.
Waiman Long April 15, 2021, 7:44 p.m. UTC | #7
On 4/15/21 3:40 PM, Johannes Weiner wrote:
> On Thu, Apr 15, 2021 at 02:47:31PM -0400, Waiman Long wrote:
>> On 4/15/21 2:10 PM, Johannes Weiner wrote:
>>> On Thu, Apr 15, 2021 at 12:35:45PM -0400, Waiman Long wrote:
>>>> On 4/15/21 12:30 PM, Johannes Weiner wrote:
>>>>> On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
>>>>>> In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
>>>>>> is followed by mod_objcg_state()/mod_memcg_state(). Each of these
>>>>>> function call goes through a separate irq_save/irq_restore cycle. That
>>>>>> is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
>>>>>> that combines them with a single irq_save/irq_restore cycle.
>>>>>>
>>>>>> @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
>>>>>>     	refill_obj_stock(objcg, size);
>>>>>>     }
>>>>>> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
>>>>>> +				   struct pglist_data *pgdat, int idx)
>>>>> The optimization makes sense.
>>>>>
>>>>> But please don't combine independent operations like this into a
>>>>> single function. It makes for an unclear parameter list, it's a pain
>>>>> in the behind to change the constituent operations later on, and it
>>>>> has a habit of attracting more random bools over time. E.g. what if
>>>>> the caller already has irqs disabled? What if it KNOWS that irqs are
>>>>> enabled and it could use local_irq_disable() instead of save?
>>>>>
>>>>> Just provide an __obj_cgroup_uncharge() that assumes irqs are
>>>>> disabled, combine with the existing __mod_memcg_lruvec_state(), and
>>>>> bubble the irq handling up to those callsites which know better.
>>>>>
>>>> That will also work. However, the reason I did that was because of patch 5
>>>> in the series. I could put the get_obj_stock() and put_obj_stock() code in
>>>> slab.h and allowed them to be used directly in various places, but hiding in
>>>> one function is easier.
>>> Yeah it's more obvious after getting to patch 5.
>>>
>>> But with the irq disabling gone entirely, is there still an incentive
>>> to combine the atomic section at all? Disabling preemption is pretty
>>> cheap, so it wouldn't matter to just do it twice.
>>>
>>> I.e. couldn't the final sequence in slab code simply be
>>>
>>> 	objcg_uncharge()
>>> 	mod_objcg_state()
>>>
>>> again and each function disables preemption (and in the rare case
>>> irqs) as it sees fit?
>>>
>>> You lose the irqsoff batching in the cold path, but as you say, hit
>>> rates are pretty good, and it doesn't seem worth complicating the code
>>> for the cold path.
>>>
>> That does make sense, though a little bit of performance may be lost. I will
>> try that out to see how it work out performance wise.
> Thanks.
>
> Even if we still end up doing it, it's great to have that cost
> isolated, so we know how much extra code complexity corresponds to how
> much performance gain. It seems the task/irq split could otherwise be
> a pretty localized change with no API implications.
>
I still want to move mod_objcg_state() function to memcontrol.c though 
as I don't want to put any obj_stock stuff in mm/slab.h.

Cheers,
Longman
Johannes Weiner April 15, 2021, 8:19 p.m. UTC | #8
On Thu, Apr 15, 2021 at 03:44:56PM -0400, Waiman Long wrote:
> On 4/15/21 3:40 PM, Johannes Weiner wrote:
> > On Thu, Apr 15, 2021 at 02:47:31PM -0400, Waiman Long wrote:
> > > On 4/15/21 2:10 PM, Johannes Weiner wrote:
> > > > On Thu, Apr 15, 2021 at 12:35:45PM -0400, Waiman Long wrote:
> > > > > On 4/15/21 12:30 PM, Johannes Weiner wrote:
> > > > > > On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
> > > > > > > In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
> > > > > > > is followed by mod_objcg_state()/mod_memcg_state(). Each of these
> > > > > > > function call goes through a separate irq_save/irq_restore cycle. That
> > > > > > > is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
> > > > > > > that combines them with a single irq_save/irq_restore cycle.
> > > > > > > 
> > > > > > > @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
> > > > > > >     	refill_obj_stock(objcg, size);
> > > > > > >     }
> > > > > > > +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> > > > > > > +				   struct pglist_data *pgdat, int idx)
> > > > > > The optimization makes sense.
> > > > > > 
> > > > > > But please don't combine independent operations like this into a
> > > > > > single function. It makes for an unclear parameter list, it's a pain
> > > > > > in the behind to change the constituent operations later on, and it
> > > > > > has a habit of attracting more random bools over time. E.g. what if
> > > > > > the caller already has irqs disabled? What if it KNOWS that irqs are
> > > > > > enabled and it could use local_irq_disable() instead of save?
> > > > > > 
> > > > > > Just provide an __obj_cgroup_uncharge() that assumes irqs are
> > > > > > disabled, combine with the existing __mod_memcg_lruvec_state(), and
> > > > > > bubble the irq handling up to those callsites which know better.
> > > > > > 
> > > > > That will also work. However, the reason I did that was because of patch 5
> > > > > in the series. I could put the get_obj_stock() and put_obj_stock() code in
> > > > > slab.h and allowed them to be used directly in various places, but hiding in
> > > > > one function is easier.
> > > > Yeah it's more obvious after getting to patch 5.
> > > > 
> > > > But with the irq disabling gone entirely, is there still an incentive
> > > > to combine the atomic section at all? Disabling preemption is pretty
> > > > cheap, so it wouldn't matter to just do it twice.
> > > > 
> > > > I.e. couldn't the final sequence in slab code simply be
> > > > 
> > > > 	objcg_uncharge()
> > > > 	mod_objcg_state()
> > > > 
> > > > again and each function disables preemption (and in the rare case
> > > > irqs) as it sees fit?
> > > > 
> > > > You lose the irqsoff batching in the cold path, but as you say, hit
> > > > rates are pretty good, and it doesn't seem worth complicating the code
> > > > for the cold path.
> > > > 
> > > That does make sense, though a little bit of performance may be lost. I will
> > > try that out to see how it work out performance wise.
> > Thanks.
> > 
> > Even if we still end up doing it, it's great to have that cost
> > isolated, so we know how much extra code complexity corresponds to how
> > much performance gain. It seems the task/irq split could otherwise be
> > a pretty localized change with no API implications.
> > 
> I still want to move mod_objcg_state() function to memcontrol.c though as I
> don't want to put any obj_stock stuff in mm/slab.h.

No objection from me!

That's actually a nice cleanup, IMO. Not sure why it was separated
from the rest of the objcg interface implementation to begin with.
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 95f12996e66c..6890f999c1a3 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1592,6 +1592,8 @@  struct obj_cgroup *get_obj_cgroup_from_current(void);
 
 int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
 void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
+void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
+				   struct pglist_data *pgdat, int idx);
 
 extern struct static_key_false memcg_kmem_enabled_key;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d66e1e38f8ac..b19100c68aa0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3225,12 +3225,9 @@  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 	return false;
 }
 
-static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+static void __refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
 	struct memcg_stock_pcp *stock;
-	unsigned long flags;
-
-	local_irq_save(flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached_objcg != objcg) { /* reset if necessary */
@@ -3243,7 +3240,14 @@  static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 
 	if (stock->nr_bytes > PAGE_SIZE)
 		drain_obj_stock(stock);
+}
+
+static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+{
+	unsigned long flags;
 
+	local_irq_save(flags);
+	__refill_obj_stock(objcg, nr_bytes);
 	local_irq_restore(flags);
 }
 
@@ -3292,6 +3296,25 @@  void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 	refill_obj_stock(objcg, size);
 }
 
+void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
+				   struct pglist_data *pgdat, int idx)
+{
+	unsigned long flags;
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec = NULL;
+
+	local_irq_save(flags);
+	__refill_obj_stock(objcg, size);
+
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
+	if (pgdat)
+		lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	__mod_memcg_lruvec_state(memcg, lruvec, idx, -(int)size);
+	rcu_read_unlock();
+	local_irq_restore(flags);
+}
+
 #endif /* CONFIG_MEMCG_KMEM */
 
 /*
diff --git a/mm/percpu.c b/mm/percpu.c
index 23308113a5ff..fd7aad6d7f90 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1631,13 +1631,8 @@  static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
 	objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
 	chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
 
-	obj_cgroup_uncharge(objcg, size * num_possible_cpus());
-
-	rcu_read_lock();
-	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
-			-(size * num_possible_cpus()));
-	rcu_read_unlock();
-
+	obj_cgroup_uncharge_mod_state(objcg, size * num_possible_cpus(),
+				      NULL, MEMCG_PERCPU_B);
 	obj_cgroup_put(objcg);
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index bc6c7545e487..677cdc52e641 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -366,9 +366,9 @@  static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
 			continue;
 
 		objcgs[off] = NULL;
-		obj_cgroup_uncharge(objcg, obj_full_size(s));
-		mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
-				-obj_full_size(s));
+		obj_cgroup_uncharge_mod_state(objcg, obj_full_size(s),
+					      page_pgdat(page),
+					      cache_vmstat_idx(s));
 		obj_cgroup_put(objcg);
 	}
 }