diff mbox series

mm/memcontrol: update lruvec counters in mem_cgroup_move_account

Message ID 157112699975.7360.1062614888388489788.stgit@buzz (mailing list archive)
State New, archived
Headers show
Series mm/memcontrol: update lruvec counters in mem_cgroup_move_account | expand

Commit Message

Konstantin Khlebnikov Oct. 15, 2019, 8:09 a.m. UTC
Mapped, dirty and writeback pages are also counted in per-lruvec stats.
These counters needs update when page is moved between cgroups.

Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 mm/memcontrol.c |   18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Michal Hocko Oct. 15, 2019, 8:20 a.m. UTC | #1
On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
> Mapped, dirty and writeback pages are also counted in per-lruvec stats.
> These counters needs update when page is moved between cgroups.

Please describe the user visible effect.

> Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

We want Cc: stable I suspect because broken stats might be really
misleading.

The patch looks ok to me otherwise
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/memcontrol.c |   18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index bdac56009a38..363106578876 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5420,6 +5420,8 @@ static int mem_cgroup_move_account(struct page *page,
>  				   struct mem_cgroup *from,
>  				   struct mem_cgroup *to)
>  {
> +	struct lruvec *from_vec, *to_vec;
> +	struct pglist_data *pgdat;
>  	unsigned long flags;
>  	unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
>  	int ret;
> @@ -5443,11 +5445,15 @@ static int mem_cgroup_move_account(struct page *page,
>  
>  	anon = PageAnon(page);
>  
> +	pgdat = page_pgdat(page);
> +	from_vec = mem_cgroup_lruvec(pgdat, from);
> +	to_vec = mem_cgroup_lruvec(pgdat, to);
> +
>  	spin_lock_irqsave(&from->move_lock, flags);
>  
>  	if (!anon && page_mapped(page)) {
> -		__mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
> -		__mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
> +		__mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
> +		__mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
>  	}
>  
>  	/*
> @@ -5459,14 +5465,14 @@ static int mem_cgroup_move_account(struct page *page,
>  		struct address_space *mapping = page_mapping(page);
>  
>  		if (mapping_cap_account_dirty(mapping)) {
> -			__mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
> -			__mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
> +			__mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
> +			__mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
>  		}
>  	}
>  
>  	if (PageWriteback(page)) {
> -		__mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
> -		__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
> +		__mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
> +		__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
>  	}
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
Konstantin Khlebnikov Oct. 15, 2019, 8:44 a.m. UTC | #2
On 15/10/2019 11.20, Michal Hocko wrote:
> On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
>> Mapped, dirty and writeback pages are also counted in per-lruvec stats.
>> These counters needs update when page is moved between cgroups.
> 
> Please describe the user visible effect.

Surprisingly I don't see any users at this moment.
So, there is no effect in mainline kernel.

>> Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> 
> We want Cc: stable I suspect because broken stats might be really
> misleading.
> 
> The patch looks ok to me otherwise
> Acked-by: Michal Hocko <mhocko@suse.com>
> 
>> ---
>>   mm/memcontrol.c |   18 ++++++++++++------
>>   1 file changed, 12 insertions(+), 6 deletions(-)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index bdac56009a38..363106578876 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -5420,6 +5420,8 @@ static int mem_cgroup_move_account(struct page *page,
>>   				   struct mem_cgroup *from,
>>   				   struct mem_cgroup *to)
>>   {
>> +	struct lruvec *from_vec, *to_vec;
>> +	struct pglist_data *pgdat;
>>   	unsigned long flags;
>>   	unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
>>   	int ret;
>> @@ -5443,11 +5445,15 @@ static int mem_cgroup_move_account(struct page *page,
>>   
>>   	anon = PageAnon(page);
>>   
>> +	pgdat = page_pgdat(page);
>> +	from_vec = mem_cgroup_lruvec(pgdat, from);
>> +	to_vec = mem_cgroup_lruvec(pgdat, to);
>> +
>>   	spin_lock_irqsave(&from->move_lock, flags);
>>   
>>   	if (!anon && page_mapped(page)) {
>> -		__mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
>> -		__mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
>> +		__mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
>> +		__mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
>>   	}
>>   
>>   	/*
>> @@ -5459,14 +5465,14 @@ static int mem_cgroup_move_account(struct page *page,
>>   		struct address_space *mapping = page_mapping(page);
>>   
>>   		if (mapping_cap_account_dirty(mapping)) {
>> -			__mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
>> -			__mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
>> +			__mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
>> +			__mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
>>   		}
>>   	}
>>   
>>   	if (PageWriteback(page)) {
>> -		__mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
>> -		__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
>> +		__mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
>> +		__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
>>   	}
>>   
>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>
Michal Hocko Oct. 15, 2019, 10:36 a.m. UTC | #3
On Tue 15-10-19 11:44:22, Konstantin Khlebnikov wrote:
> On 15/10/2019 11.20, Michal Hocko wrote:
> > On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
> > > Mapped, dirty and writeback pages are also counted in per-lruvec stats.
> > > These counters needs update when page is moved between cgroups.
> > 
> > Please describe the user visible effect.
> 
> Surprisingly I don't see any users at this moment.
> So, there is no effect in mainline kernel.

Those counters are exported right? Or do we exclude them for v1?

> > > Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
> > > Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> > 
> > We want Cc: stable I suspect because broken stats might be really
> > misleading.
> > 
> > The patch looks ok to me otherwise
> > Acked-by: Michal Hocko <mhocko@suse.com>
> > 
> > > ---
> > >   mm/memcontrol.c |   18 ++++++++++++------
> > >   1 file changed, 12 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index bdac56009a38..363106578876 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -5420,6 +5420,8 @@ static int mem_cgroup_move_account(struct page *page,
> > >   				   struct mem_cgroup *from,
> > >   				   struct mem_cgroup *to)
> > >   {
> > > +	struct lruvec *from_vec, *to_vec;
> > > +	struct pglist_data *pgdat;
> > >   	unsigned long flags;
> > >   	unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
> > >   	int ret;
> > > @@ -5443,11 +5445,15 @@ static int mem_cgroup_move_account(struct page *page,
> > >   	anon = PageAnon(page);
> > > +	pgdat = page_pgdat(page);
> > > +	from_vec = mem_cgroup_lruvec(pgdat, from);
> > > +	to_vec = mem_cgroup_lruvec(pgdat, to);
> > > +
> > >   	spin_lock_irqsave(&from->move_lock, flags);
> > >   	if (!anon && page_mapped(page)) {
> > > -		__mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
> > > -		__mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
> > > +		__mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
> > > +		__mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
> > >   	}
> > >   	/*
> > > @@ -5459,14 +5465,14 @@ static int mem_cgroup_move_account(struct page *page,
> > >   		struct address_space *mapping = page_mapping(page);
> > >   		if (mapping_cap_account_dirty(mapping)) {
> > > -			__mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
> > > -			__mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
> > > +			__mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
> > > +			__mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
> > >   		}
> > >   	}
> > >   	if (PageWriteback(page)) {
> > > -		__mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
> > > -		__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
> > > +		__mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
> > > +		__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
> > >   	}
> > >   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >
Konstantin Khlebnikov Oct. 15, 2019, 10:49 a.m. UTC | #4
On 15/10/2019 13.36, Michal Hocko wrote:
> On Tue 15-10-19 11:44:22, Konstantin Khlebnikov wrote:
>> On 15/10/2019 11.20, Michal Hocko wrote:
>>> On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
>>>> Mapped, dirty and writeback pages are also counted in per-lruvec stats.
>>>> These counters needs update when page is moved between cgroups.
>>>
>>> Please describe the user visible effect.
>>
>> Surprisingly I don't see any users at this moment.
>> So, there is no effect in mainline kernel.
> 
> Those counters are exported right? Or do we exclude them for v1?

It seems per-lruvec statistics is not exposed anywhere.
And per-lruvec NR_FILE_MAPPED, NR_FILE_DIRTY, NR_WRITEBACK never had users.

I've found this because I'm using mem_cgroup_move_account for recharging
pages at mlock and playing right now with debug for memory cgroup which
validates statistics and counters when cgroup dies.

> 
>>>> Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
>>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>>>
>>> We want Cc: stable I suspect because broken stats might be really
>>> misleading.
>>>
>>> The patch looks ok to me otherwise
>>> Acked-by: Michal Hocko <mhocko@suse.com>
>>>
>>>> ---
>>>>    mm/memcontrol.c |   18 ++++++++++++------
>>>>    1 file changed, 12 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>>> index bdac56009a38..363106578876 100644
>>>> --- a/mm/memcontrol.c
>>>> +++ b/mm/memcontrol.c
>>>> @@ -5420,6 +5420,8 @@ static int mem_cgroup_move_account(struct page *page,
>>>>    				   struct mem_cgroup *from,
>>>>    				   struct mem_cgroup *to)
>>>>    {
>>>> +	struct lruvec *from_vec, *to_vec;
>>>> +	struct pglist_data *pgdat;
>>>>    	unsigned long flags;
>>>>    	unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
>>>>    	int ret;
>>>> @@ -5443,11 +5445,15 @@ static int mem_cgroup_move_account(struct page *page,
>>>>    	anon = PageAnon(page);
>>>> +	pgdat = page_pgdat(page);
>>>> +	from_vec = mem_cgroup_lruvec(pgdat, from);
>>>> +	to_vec = mem_cgroup_lruvec(pgdat, to);
>>>> +
>>>>    	spin_lock_irqsave(&from->move_lock, flags);
>>>>    	if (!anon && page_mapped(page)) {
>>>> -		__mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
>>>> -		__mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
>>>> +		__mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
>>>> +		__mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
>>>>    	}
>>>>    	/*
>>>> @@ -5459,14 +5465,14 @@ static int mem_cgroup_move_account(struct page *page,
>>>>    		struct address_space *mapping = page_mapping(page);
>>>>    		if (mapping_cap_account_dirty(mapping)) {
>>>> -			__mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
>>>> -			__mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
>>>> +			__mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
>>>> +			__mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
>>>>    		}
>>>>    	}
>>>>    	if (PageWriteback(page)) {
>>>> -		__mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
>>>> -		__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
>>>> +		__mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
>>>> +		__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
>>>>    	}
>>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>
>
Michal Hocko Oct. 15, 2019, 11:04 a.m. UTC | #5
On Tue 15-10-19 13:49:14, Konstantin Khlebnikov wrote:
> On 15/10/2019 13.36, Michal Hocko wrote:
> > On Tue 15-10-19 11:44:22, Konstantin Khlebnikov wrote:
> > > On 15/10/2019 11.20, Michal Hocko wrote:
> > > > On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
> > > > > Mapped, dirty and writeback pages are also counted in per-lruvec stats.
> > > > > These counters needs update when page is moved between cgroups.
> > > > 
> > > > Please describe the user visible effect.
> > > 
> > > Surprisingly I don't see any users at this moment.
> > > So, there is no effect in mainline kernel.
> > 
> > Those counters are exported right? Or do we exclude them for v1?
> 
> It seems per-lruvec statistics is not exposed anywhere.
> And per-lruvec NR_FILE_MAPPED, NR_FILE_DIRTY, NR_WRITEBACK never had users.

So why do we have it in the first place? I have to say that counters
as we have them now are really clear as mud. This is really begging for
a clean up.
Johannes Weiner Oct. 15, 2019, 1:53 p.m. UTC | #6
On Tue, Oct 15, 2019 at 11:09:59AM +0300, Konstantin Khlebnikov wrote:
> Mapped, dirty and writeback pages are also counted in per-lruvec stats.
> These counters needs update when page is moved between cgroups.
> 
> Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Please mention in the changelog that currently is nobody *consuming*
the lruvec versions of these counters and that there is no
user-visible effect. Thanks
Konstantin Khlebnikov Oct. 15, 2019, 2:04 p.m. UTC | #7
On 15/10/2019 16.53, Johannes Weiner wrote:
> On Tue, Oct 15, 2019 at 11:09:59AM +0300, Konstantin Khlebnikov wrote:
>> Mapped, dirty and writeback pages are also counted in per-lruvec stats.
>> These counters needs update when page is moved between cgroups.
>>
>> Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> 
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> 
> Please mention in the changelog that currently is nobody *consuming*
> the lruvec versions of these counters and that there is no
> user-visible effect. Thanks
> 

Maybe just kill all these per-lruvec counters?
I see only one user which have no alternative data source: WORKINGSET_ACTIVATE.

This will save some memory: 32 * sizeof(long) * nr_nodes * nr_cpus bytes
Johannes Weiner Oct. 15, 2019, 2:31 p.m. UTC | #8
On Tue, Oct 15, 2019 at 01:04:01PM +0200, Michal Hocko wrote:
> On Tue 15-10-19 13:49:14, Konstantin Khlebnikov wrote:
> > On 15/10/2019 13.36, Michal Hocko wrote:
> > > On Tue 15-10-19 11:44:22, Konstantin Khlebnikov wrote:
> > > > On 15/10/2019 11.20, Michal Hocko wrote:
> > > > > On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
> > > > > > Mapped, dirty and writeback pages are also counted in per-lruvec stats.
> > > > > > These counters needs update when page is moved between cgroups.
> > > > > 
> > > > > Please describe the user visible effect.
> > > > 
> > > > Surprisingly I don't see any users at this moment.
> > > > So, there is no effect in mainline kernel.
> > > 
> > > Those counters are exported right? Or do we exclude them for v1?
> > 
> > It seems per-lruvec statistics is not exposed anywhere.
> > And per-lruvec NR_FILE_MAPPED, NR_FILE_DIRTY, NR_WRITEBACK never had users.
> 
> So why do we have it in the first place? I have to say that counters
> as we have them now are really clear as mud. This is really begging for
> a clean up.

IMO This is going in the right direction. The goal is to have all
vmstat items accounted per lruvec - the intersection of the node and
the memcg - to further integrate memcg into the traditional VM code
and eliminate differences between them. We use the lruvec counters
quite extensively in reclaim already, since the lruvec is the primary
context for page reclaim. More consumers will follow in pending
patches. This patch cleans up some stragglers.

The only counters we can't have in the lruvec are the legacy memcg
ones that are accounted to the memcg without a node context:
MEMCG_RSS, MEMCG_CACHE etc. We should eventually replace them with
per-lruvec accounted NR_ANON_PAGES, NR_FILE_PAGES etc - tracked by
generic VM code, not inside memcg, further reducing the size of the
memory controller. But it'll require some work in the page creation
path, as that accounting happens before the memcg commit right now.

Then we can get rid of memcg_stat_item and the_memcg_page_state
API. And we should be able to do for_each_node() summing of the lruvec
counters to produce memory.stat output, and drop memcg->vmstats_local,
memcg->vmstats_percpu, memcg->vmstats and memcg->vmevents altogether.
Johannes Weiner Oct. 15, 2019, 3:24 p.m. UTC | #9
On Tue, Oct 15, 2019 at 05:04:44PM +0300, Konstantin Khlebnikov wrote:
> On 15/10/2019 16.53, Johannes Weiner wrote:
> > On Tue, Oct 15, 2019 at 11:09:59AM +0300, Konstantin Khlebnikov wrote:
> > > Mapped, dirty and writeback pages are also counted in per-lruvec stats.
> > > These counters needs update when page is moved between cgroups.
> > > 
> > > Fixes: 00f3ca2c2d66 ("mm: memcontrol: per-lruvec stats infrastructure")
> > > Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> > 
> > Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> > 
> > Please mention in the changelog that currently is nobody *consuming*
> > the lruvec versions of these counters and that there is no
> > user-visible effect. Thanks
> > 
> 
> Maybe just kill all these per-lruvec counters?
> I see only one user which have no alternative data source: WORKINGSET_ACTIVATE.
> 
> This will save some memory: 32 * sizeof(long) * nr_nodes * nr_cpus bytes

This is backwards, see my reply to Michal, as well as the patches at
https://lore.kernel.org/linux-mm/20190603210746.15800-1-hannes@cmpxchg.org/

We're not using the lruvec counters in all places where we should.
Konstantin Khlebnikov Oct. 16, 2019, 8:25 a.m. UTC | #10
On 15/10/2019 17.31, Johannes Weiner wrote:
> On Tue, Oct 15, 2019 at 01:04:01PM +0200, Michal Hocko wrote:
>> On Tue 15-10-19 13:49:14, Konstantin Khlebnikov wrote:
>>> On 15/10/2019 13.36, Michal Hocko wrote:
>>>> On Tue 15-10-19 11:44:22, Konstantin Khlebnikov wrote:
>>>>> On 15/10/2019 11.20, Michal Hocko wrote:
>>>>>> On Tue 15-10-19 11:09:59, Konstantin Khlebnikov wrote:
>>>>>>> Mapped, dirty and writeback pages are also counted in per-lruvec stats.
>>>>>>> These counters needs update when page is moved between cgroups.
>>>>>>
>>>>>> Please describe the user visible effect.
>>>>>
>>>>> Surprisingly I don't see any users at this moment.
>>>>> So, there is no effect in mainline kernel.
>>>>
>>>> Those counters are exported right? Or do we exclude them for v1?
>>>
>>> It seems per-lruvec statistics is not exposed anywhere.
>>> And per-lruvec NR_FILE_MAPPED, NR_FILE_DIRTY, NR_WRITEBACK never had users.
>>
>> So why do we have it in the first place? I have to say that counters
>> as we have them now are really clear as mud. This is really begging for
>> a clean up.
> 
> IMO This is going in the right direction. The goal is to have all
> vmstat items accounted per lruvec - the intersection of the node and
> the memcg - to further integrate memcg into the traditional VM code
> and eliminate differences between them. We use the lruvec counters
> quite extensively in reclaim already, since the lruvec is the primary
> context for page reclaim. More consumers will follow in pending
> patches. This patch cleans up some stragglers.
> 
> The only counters we can't have in the lruvec are the legacy memcg
> ones that are accounted to the memcg without a node context:
> MEMCG_RSS, MEMCG_CACHE etc. We should eventually replace them with
> per-lruvec accounted NR_ANON_PAGES, NR_FILE_PAGES etc - tracked by
> generic VM code, not inside memcg, further reducing the size of the
> memory controller. But it'll require some work in the page creation
> path, as that accounting happens before the memcg commit right now.
> 
> Then we can get rid of memcg_stat_item and the_memcg_page_state
> API. And we should be able to do for_each_node() summing of the lruvec
> counters to produce memory.stat output, and drop memcg->vmstats_local,
> memcg->vmstats_percpu, memcg->vmstats and memcg->vmevents altogether.
> 

Ok, I see where it goes.
Some years ago I've worked on something similar.
Including linking page directly with its lruvec and moving lru_lock into lruvec.

Indeed VM code must be split per-node except accounting matters.
But summing per-node counters might be costly for balance_dirty_pages.
Probably memcg needs own dirty pages counter with per-cpu batching.
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bdac56009a38..363106578876 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5420,6 +5420,8 @@  static int mem_cgroup_move_account(struct page *page,
 				   struct mem_cgroup *from,
 				   struct mem_cgroup *to)
 {
+	struct lruvec *from_vec, *to_vec;
+	struct pglist_data *pgdat;
 	unsigned long flags;
 	unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
 	int ret;
@@ -5443,11 +5445,15 @@  static int mem_cgroup_move_account(struct page *page,
 
 	anon = PageAnon(page);
 
+	pgdat = page_pgdat(page);
+	from_vec = mem_cgroup_lruvec(pgdat, from);
+	to_vec = mem_cgroup_lruvec(pgdat, to);
+
 	spin_lock_irqsave(&from->move_lock, flags);
 
 	if (!anon && page_mapped(page)) {
-		__mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
-		__mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
+		__mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
+		__mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
 	}
 
 	/*
@@ -5459,14 +5465,14 @@  static int mem_cgroup_move_account(struct page *page,
 		struct address_space *mapping = page_mapping(page);
 
 		if (mapping_cap_account_dirty(mapping)) {
-			__mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
-			__mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
+			__mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
+			__mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
 		}
 	}
 
 	if (PageWriteback(page)) {
-		__mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
-		__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
+		__mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
+		__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
 	}
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE