diff mbox series

[v7,02/10] mm/memcg: fold lru_lock in lock_page_lru

Message ID 1577264666-246071-3-git-send-email-alex.shi@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series per lruvec lru_lock for memcg | expand

Commit Message

Alex Shi Dec. 25, 2019, 9:04 a.m. UTC
From the commit_charge's explanations and mem_cgroup_commit_charge
comments, as well as call path when lrucare is ture, The lru_lock is
just to guard the task migration(which would be lead to move_account)
So it isn't needed when !PageLRU, and better be fold into PageLRU to
reduce lock contentions.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: cgroups@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 mm/memcontrol.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Konstantin Khlebnikov Jan. 10, 2020, 8:49 a.m. UTC | #1
On 25/12/2019 12.04, Alex Shi wrote:
>  From the commit_charge's explanations and mem_cgroup_commit_charge
> comments, as well as call path when lrucare is ture, The lru_lock is
> just to guard the task migration(which would be lead to move_account)
> So it isn't needed when !PageLRU, and better be fold into PageLRU to
> reduce lock contentions.
> 
> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: cgroups@vger.kernel.org
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org
> ---
>   mm/memcontrol.c | 9 ++++-----
>   1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c5b5f74cfd4d..0ad10caabc3d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2572,12 +2572,11 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
>   
>   static void lock_page_lru(struct page *page, int *isolated)
>   {
> -	pg_data_t *pgdat = page_pgdat(page);
> -
> -	spin_lock_irq(&pgdat->lru_lock);
>   	if (PageLRU(page)) {
> +		pg_data_t *pgdat = page_pgdat(page);
>   		struct lruvec *lruvec;
>   
> +		spin_lock_irq(&pgdat->lru_lock);

That's wrong. Here PageLRU must be checked again under lru_lock.


Also I don't like these functions:
- called lock/unlock but actually also isolates
- used just once
- pgdat evaluated twice

>   		lruvec = mem_cgroup_page_lruvec(page, pgdat);
>   		ClearPageLRU(page);
>   		del_page_from_lru_list(page, lruvec, page_lru(page));
> @@ -2588,17 +2587,17 @@ static void lock_page_lru(struct page *page, int *isolated)
>   
>   static void unlock_page_lru(struct page *page, int isolated)
>   {
> -	pg_data_t *pgdat = page_pgdat(page);
>   
>   	if (isolated) {
> +		pg_data_t *pgdat = page_pgdat(page);
>   		struct lruvec *lruvec;
>   
>   		lruvec = mem_cgroup_page_lruvec(page, pgdat);
>   		VM_BUG_ON_PAGE(PageLRU(page), page);
>   		SetPageLRU(page);
>   		add_page_to_lru_list(page, lruvec, page_lru(page));
> +		spin_unlock_irq(&pgdat->lru_lock);
>   	}
> -	spin_unlock_irq(&pgdat->lru_lock);
>   }
>   
>   static void commit_charge(struct page *page, struct mem_cgroup *memcg,
>
Alex Shi Jan. 13, 2020, 9:45 a.m. UTC | #2
在 2020/1/10 下午4:49, Konstantin Khlebnikov 写道:
> On 25/12/2019 12.04, Alex Shi wrote:
>>  From the commit_charge's explanations and mem_cgroup_commit_charge
>> comments, as well as call path when lrucare is ture, The lru_lock is
>> just to guard the task migration(which would be lead to move_account)
>> So it isn't needed when !PageLRU, and better be fold into PageLRU to
>> reduce lock contentions.
>>
>> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
>> Cc: Johannes Weiner <hannes@cmpxchg.org>
>> Cc: Michal Hocko <mhocko@kernel.org>
>> Cc: Matthew Wilcox <willy@infradead.org>
>> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: cgroups@vger.kernel.org
>> Cc: linux-mm@kvack.org
>> Cc: linux-kernel@vger.kernel.org
>> ---
>>   mm/memcontrol.c | 9 ++++-----
>>   1 file changed, 4 insertions(+), 5 deletions(-)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index c5b5f74cfd4d..0ad10caabc3d 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -2572,12 +2572,11 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
>>     static void lock_page_lru(struct page *page, int *isolated)
>>   {
>> -    pg_data_t *pgdat = page_pgdat(page);
>> -
>> -    spin_lock_irq(&pgdat->lru_lock);
>>       if (PageLRU(page)) {
>> +        pg_data_t *pgdat = page_pgdat(page);
>>           struct lruvec *lruvec;
>>   +        spin_lock_irq(&pgdat->lru_lock);
> 
> That's wrong. Here PageLRU must be checked again under lru_lock.
Hi, Konstantin,

For logical remain, we can get the lock and then release for !PageLRU. 
but I still can figure out the problem scenario. Would like to give more hints?


> 
> 
> Also I don't like these functions:
> - called lock/unlock but actually also isolates
> - used just once
> - pgdat evaluated twice

That's right. I will fold these functions into commit_charge.

Thanks
Alex
Konstantin Khlebnikov Jan. 13, 2020, 9:55 a.m. UTC | #3
On 13/01/2020 12.45, Alex Shi wrote:
> 
> 
> 在 2020/1/10 下午4:49, Konstantin Khlebnikov 写道:
>> On 25/12/2019 12.04, Alex Shi wrote:
>>>   From the commit_charge's explanations and mem_cgroup_commit_charge
>>> comments, as well as call path when lrucare is ture, The lru_lock is
>>> just to guard the task migration(which would be lead to move_account)
>>> So it isn't needed when !PageLRU, and better be fold into PageLRU to
>>> reduce lock contentions.
>>>
>>> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
>>> Cc: Johannes Weiner <hannes@cmpxchg.org>
>>> Cc: Michal Hocko <mhocko@kernel.org>
>>> Cc: Matthew Wilcox <willy@infradead.org>
>>> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>> Cc: cgroups@vger.kernel.org
>>> Cc: linux-mm@kvack.org
>>> Cc: linux-kernel@vger.kernel.org
>>> ---
>>>    mm/memcontrol.c | 9 ++++-----
>>>    1 file changed, 4 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>> index c5b5f74cfd4d..0ad10caabc3d 100644
>>> --- a/mm/memcontrol.c
>>> +++ b/mm/memcontrol.c
>>> @@ -2572,12 +2572,11 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
>>>      static void lock_page_lru(struct page *page, int *isolated)
>>>    {
>>> -    pg_data_t *pgdat = page_pgdat(page);
>>> -
>>> -    spin_lock_irq(&pgdat->lru_lock);
>>>        if (PageLRU(page)) {
>>> +        pg_data_t *pgdat = page_pgdat(page);
>>>            struct lruvec *lruvec;
>>>    +        spin_lock_irq(&pgdat->lru_lock);
>>
>> That's wrong. Here PageLRU must be checked again under lru_lock.
> Hi, Konstantin,
> 
> For logical remain, we can get the lock and then release for !PageLRU.
> but I still can figure out the problem scenario. Would like to give more hints?

That's trivial race: page could be isolated from lru between

if (PageLRU(page))
and
spin_lock_irq(&pgdat->lru_lock);

> 
> 
>>
>>
>> Also I don't like these functions:
>> - called lock/unlock but actually also isolates
>> - used just once
>> - pgdat evaluated twice
> 
> That's right. I will fold these functions into commit_charge.
> 
> Thanks
> Alex
>
Alex Shi Jan. 13, 2020, 12:47 p.m. UTC | #4
在 2020/1/13 下午5:55, Konstantin Khlebnikov 写道:
>>>>
>>>> index c5b5f74cfd4d..0ad10caabc3d 100644
>>>> --- a/mm/memcontrol.c
>>>> +++ b/mm/memcontrol.c
>>>> @@ -2572,12 +2572,11 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
>>>>      static void lock_page_lru(struct page *page, int *isolated)
>>>>    {
>>>> -    pg_data_t *pgdat = page_pgdat(page);
>>>> -
>>>> -    spin_lock_irq(&pgdat->lru_lock);
>>>>        if (PageLRU(page)) {
>>>> +        pg_data_t *pgdat = page_pgdat(page);
>>>>            struct lruvec *lruvec;
>>>>    +        spin_lock_irq(&pgdat->lru_lock);
>>>
>>> That's wrong. Here PageLRU must be checked again under lru_lock.
>> Hi, Konstantin,
>>
>> For logical remain, we can get the lock and then release for !PageLRU.
>> but I still can figure out the problem scenario. Would like to give more hints?
> 
> That's trivial race: page could be isolated from lru between
> 
> if (PageLRU(page))
> and
> spin_lock_irq(&pgdat->lru_lock);

yes, it could be a problem. guess the following change could helpful:
I will update it in new version.

Thanks a lot!
Alex

-static void lock_page_lru(struct page *page, int *isolated)
-{
-       pg_data_t *pgdat = page_pgdat(page);
-
-       spin_lock_irq(&pgdat->lru_lock);
-       if (PageLRU(page)) {
-               struct lruvec *lruvec;
-
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
-               ClearPageLRU(page);
-               del_page_from_lru_list(page, lruvec, page_lru(page));
-               *isolated = 1;
-       } else
-               *isolated = 0;
-}
-
-static void unlock_page_lru(struct page *page, int isolated)
-{
-       pg_data_t *pgdat = page_pgdat(page);
-
-       if (isolated) {
-               struct lruvec *lruvec;
-
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
-               VM_BUG_ON_PAGE(PageLRU(page), page);
-               SetPageLRU(page);
-               add_page_to_lru_list(page, lruvec, page_lru(page));
-       }
-       spin_unlock_irq(&pgdat->lru_lock);
-}
-
 static void commit_charge(struct page *page, struct mem_cgroup *memcg,
                          bool lrucare)
 {
-       int isolated;
+       struct lruvec *lruvec = NULL;

        VM_BUG_ON_PAGE(page->mem_cgroup, page);

@@ -2612,8 +2617,16 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
         * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
         * may already be on some other mem_cgroup's LRU.  Take care of it.
         */
-       if (lrucare)
-               lock_page_lru(page, &isolated);
+       if (lrucare) {
+               lruvec = lock_page_lruvec_irq(page);
+               if (likely(PageLRU(page))) {
+                       ClearPageLRU(page);
+                       del_page_from_lru_list(page, lruvec, page_lru(page));
+               } else {
+                       unlock_page_lruvec_irq(lruvec);
+                       lruvec = NULL;
+               }
+       }

        /*
         * Nobody should be changing or seriously looking at
@@ -2631,8 +2644,15 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
         */
        page->mem_cgroup = memcg;

-       if (lrucare)
-               unlock_page_lru(page, isolated);
+       if (lrucare && lruvec) {
+               unlock_page_lruvec_irq(lruvec);
+               lruvec = lock_page_lruvec_irq(page);
+
+               VM_BUG_ON_PAGE(PageLRU(page), page);
+               SetPageLRU(page);
+               add_page_to_lru_list(page, lruvec, page_lru(page));
+               unlock_page_lruvec_irq(lruvec);
+       }
 }
Matthew Wilcox Jan. 13, 2020, 4:34 p.m. UTC | #5
On Mon, Jan 13, 2020 at 08:47:25PM +0800, Alex Shi wrote:
> 在 2020/1/13 下午5:55, Konstantin Khlebnikov 写道:
> >>> That's wrong. Here PageLRU must be checked again under lru_lock.
> >> Hi, Konstantin,
> >>
> >> For logical remain, we can get the lock and then release for !PageLRU.
> >> but I still can figure out the problem scenario. Would like to give more hints?
> > 
> > That's trivial race: page could be isolated from lru between
> > 
> > if (PageLRU(page))
> > and
> > spin_lock_irq(&pgdat->lru_lock);
> 
> yes, it could be a problem. guess the following change could helpful:
> I will update it in new version.

> +       if (lrucare) {
> +               lruvec = lock_page_lruvec_irq(page);
> +               if (likely(PageLRU(page))) {
> +                       ClearPageLRU(page);
> +                       del_page_from_lru_list(page, lruvec, page_lru(page));
> +               } else {
> +                       unlock_page_lruvec_irq(lruvec);
> +                       lruvec = NULL;
> +               }

What about a harder race to hit like a page being on LRU list A when you
look up the lruvec, then it's removed and added to LRU list B by the
time you get the lock?  At that point, you are holding a lock on the
wrong LRU list.  I think you need to check not just that the page
is still PageLRU but also still on the same LRU list.
Alex Shi Jan. 14, 2020, 9:20 a.m. UTC | #6
在 2020/1/14 上午12:34, Matthew Wilcox 写道:
> On Mon, Jan 13, 2020 at 08:47:25PM +0800, Alex Shi wrote:
>> 在 2020/1/13 下午5:55, Konstantin Khlebnikov 写道:
>>>>> That's wrong. Here PageLRU must be checked again under lru_lock.
>>>> Hi, Konstantin,
>>>>
>>>> For logical remain, we can get the lock and then release for !PageLRU.
>>>> but I still can figure out the problem scenario. Would like to give more hints?
>>>
>>> That's trivial race: page could be isolated from lru between
>>>
>>> if (PageLRU(page))
>>> and
>>> spin_lock_irq(&pgdat->lru_lock);
>>
>> yes, it could be a problem. guess the following change could helpful:
>> I will update it in new version.
> 
>> +       if (lrucare) {
>> +               lruvec = lock_page_lruvec_irq(page);
>> +               if (likely(PageLRU(page))) {
>> +                       ClearPageLRU(page);
>> +                       del_page_from_lru_list(page, lruvec, page_lru(page));
>> +               } else {
>> +                       unlock_page_lruvec_irq(lruvec);
>> +                       lruvec = NULL;
>> +               }
> 
> What about a harder race to hit like a page being on LRU list A when you
> look up the lruvec, then it's removed and added to LRU list B by the
> time you get the lock?  At that point, you are holding a lock on the
> wrong LRU list.  I think you need to check not just that the page
> is still PageLRU but also still on the same LRU list.
> 

Thanks for comments, Matthew!

We will check and lock lruvec after lock_page_memcg, so if it works well, a page
won't moved from one lruvec to another. Also the later debug do this check, to
see if lruvec changed.

If you mean lru list not lruvec, It seems there is noway to figure out the lru list
from a page now.

Thanks
Alex
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c5b5f74cfd4d..0ad10caabc3d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2572,12 +2572,11 @@  static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 
 static void lock_page_lru(struct page *page, int *isolated)
 {
-	pg_data_t *pgdat = page_pgdat(page);
-
-	spin_lock_irq(&pgdat->lru_lock);
 	if (PageLRU(page)) {
+		pg_data_t *pgdat = page_pgdat(page);
 		struct lruvec *lruvec;
 
+		spin_lock_irq(&pgdat->lru_lock);
 		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		ClearPageLRU(page);
 		del_page_from_lru_list(page, lruvec, page_lru(page));
@@ -2588,17 +2587,17 @@  static void lock_page_lru(struct page *page, int *isolated)
 
 static void unlock_page_lru(struct page *page, int isolated)
 {
-	pg_data_t *pgdat = page_pgdat(page);
 
 	if (isolated) {
+		pg_data_t *pgdat = page_pgdat(page);
 		struct lruvec *lruvec;
 
 		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		VM_BUG_ON_PAGE(PageLRU(page), page);
 		SetPageLRU(page);
 		add_page_to_lru_list(page, lruvec, page_lru(page));
+		spin_unlock_irq(&pgdat->lru_lock);
 	}
-	spin_unlock_irq(&pgdat->lru_lock);
 }
 
 static void commit_charge(struct page *page, struct mem_cgroup *memcg,