diff mbox series

[7/7] mm/khugepaged: try to free transhuge swapcache when possible

Message ID 20220611084731.55155-8-linmiaohe@huawei.com (mailing list archive)
State New
Headers show
Series A few cleanup patches for khugepaged | expand

Commit Message

Miaohe Lin June 11, 2022, 8:47 a.m. UTC
Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
It's because release_pte_page() is not called for these pages and
thus free_page_and_swap_cache can't grab the page lock. These pages
won't be freed from swap cache even if we are the only user until
next time reclaim. It shouldn't hurt indeed, but we could try to
free these pages to save more memory for system.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
---
 include/linux/swap.h | 5 +++++
 mm/khugepaged.c      | 1 +
 mm/swap.h            | 5 -----
 3 files changed, 6 insertions(+), 5 deletions(-)

Comments

Zach O'Keefe June 15, 2022, 5:13 p.m. UTC | #1
On 11 Jun 16:47, Miaohe Lin wrote:
> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
> It's because release_pte_page() is not called for these pages and
> thus free_page_and_swap_cache can't grab the page lock. These pages
> won't be freed from swap cache even if we are the only user until
> next time reclaim. It shouldn't hurt indeed, but we could try to
> free these pages to save more memory for system.
> 
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> ---
>  include/linux/swap.h | 5 +++++
>  mm/khugepaged.c      | 1 +
>  mm/swap.h            | 5 -----
>  3 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 8672a7123ccd..ccb83b12b724 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>  	return global_node_page_state(NR_SWAPCACHE);
>  }
>  
> +extern void free_swap_cache(struct page *page);
>  extern void free_page_and_swap_cache(struct page *);
>  extern void free_pages_and_swap_cache(struct page **, int);
>  /* linux/mm/swapfile.c */
> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>  
> +static inline void free_swap_cache(struct page *page)
> +{
> +}
> +
>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>  {
>  	return 0;
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index ee0a719c8be9..52109ad13f78 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>  	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>  		list_del(&src_page->lru);
>  		release_pte_page(src_page);
> +		free_swap_cache(src_page);
>  	}
>  }

Aside: in __collapse_huge_page_isolate() (and also here) why can't we just check
PageCompound(page) && page == compound_head(page) to only act on compound pages
once? AFAIK this would alleviate this compound_pagelist business..

Anyways, as-is, free_page_and_swap_cache() won't be able to do
try_to_free_swap(), since it can't grab page lock, put it will call put_page().
I think (?) the last page ref might be dropped in release_pte_page(), so should
free_swap_cache() come before it?

>  
> diff --git a/mm/swap.h b/mm/swap.h
> index 0193797b0c92..863f6086c916 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>  void delete_from_swap_cache(struct page *page);
>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>  				  unsigned long end);
> -void free_swap_cache(struct page *page);
>  struct page *lookup_swap_cache(swp_entry_t entry,
>  			       struct vm_area_struct *vma,
>  			       unsigned long addr);
> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>  	return NULL;
>  }
>  
> -static inline void free_swap_cache(struct page *page)
> -{
> -}
> -
>  static inline void show_swap_cache_info(void)
>  {
>  }
> -- 
> 2.23.0
> 
>
Yang Shi June 15, 2022, 11:58 p.m. UTC | #2
On Sat, Jun 11, 2022 at 1:47 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
> It's because release_pte_page() is not called for these pages and
> thus free_page_and_swap_cache can't grab the page lock. These pages
> won't be freed from swap cache even if we are the only user until
> next time reclaim. It shouldn't hurt indeed, but we could try to
> free these pages to save more memory for system.


>
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> ---
>  include/linux/swap.h | 5 +++++
>  mm/khugepaged.c      | 1 +
>  mm/swap.h            | 5 -----
>  3 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 8672a7123ccd..ccb83b12b724 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>         return global_node_page_state(NR_SWAPCACHE);
>  }
>
> +extern void free_swap_cache(struct page *page);
>  extern void free_page_and_swap_cache(struct page *);
>  extern void free_pages_and_swap_cache(struct page **, int);
>  /* linux/mm/swapfile.c */
> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>
> +static inline void free_swap_cache(struct page *page)
> +{
> +}
> +
>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>  {
>         return 0;
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index ee0a719c8be9..52109ad13f78 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>                 list_del(&src_page->lru);
>                 release_pte_page(src_page);
> +               free_swap_cache(src_page);

Will this really work? The free_swap_cache() will just dec refcounts
without putting the page back to buddy. So the hugepage is not
actually freed at all. Am I missing something?

>         }
>  }
>
> diff --git a/mm/swap.h b/mm/swap.h
> index 0193797b0c92..863f6086c916 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>  void delete_from_swap_cache(struct page *page);
>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>                                   unsigned long end);
> -void free_swap_cache(struct page *page);
>  struct page *lookup_swap_cache(swp_entry_t entry,
>                                struct vm_area_struct *vma,
>                                unsigned long addr);
> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>         return NULL;
>  }
>
> -static inline void free_swap_cache(struct page *page)
> -{
> -}
> -
>  static inline void show_swap_cache_info(void)
>  {
>  }
> --
> 2.23.0
>
>
Mika Penttilä June 16, 2022, 3:38 a.m. UTC | #3
On 15.6.2022 20.13, Zach O'Keefe wrote:
> On 11 Jun 16:47, Miaohe Lin wrote:
>> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
>> It's because release_pte_page() is not called for these pages and
>> thus free_page_and_swap_cache can't grab the page lock. These pages
>> won't be freed from swap cache even if we are the only user until
>> next time reclaim. It shouldn't hurt indeed, but we could try to
>> free these pages to save more memory for system.
>>
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>   include/linux/swap.h | 5 +++++
>>   mm/khugepaged.c      | 1 +
>>   mm/swap.h            | 5 -----
>>   3 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index 8672a7123ccd..ccb83b12b724 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>>   	return global_node_page_state(NR_SWAPCACHE);
>>   }
>>   
>> +extern void free_swap_cache(struct page *page);
>>   extern void free_page_and_swap_cache(struct page *);
>>   extern void free_pages_and_swap_cache(struct page **, int);
>>   /* linux/mm/swapfile.c */
>> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>>   /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>>   #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>>   
>> +static inline void free_swap_cache(struct page *page)
>> +{
>> +}
>> +
>>   static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>>   {
>>   	return 0;
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index ee0a719c8be9..52109ad13f78 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>   	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>   		list_del(&src_page->lru);
>>   		release_pte_page(src_page);
>> +		free_swap_cache(src_page);
>>   	}
>>   }
> 
> Aside: in __collapse_huge_page_isolate() (and also here) why can't we just check
> PageCompound(page) && page == compound_head(page) to only act on compound pages
> once? AFAIK this would alleviate this compound_pagelist business..



It is for pte mapped compound pages. Things like lock/unlock page and 
isolate/putback lru have to operate on head pages and you have to do 
operations like copy from tail pages before releasing heads. So while 
maybe you could add tests for head/tail here and there, work pages 
backwards etc it easily gets messier than the current solution with 
compound_pagelist.



> 
> Anyways, as-is, free_page_and_swap_cache() won't be able to do
> try_to_free_swap(), since it can't grab page lock, put it will call put_page().
> I think (?) the last page ref might be dropped in release_pte_page(), so should
> free_swap_cache() come before it?
> 
>>   
>> diff --git a/mm/swap.h b/mm/swap.h
>> index 0193797b0c92..863f6086c916 100644
>> --- a/mm/swap.h
>> +++ b/mm/swap.h
>> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>>   void delete_from_swap_cache(struct page *page);
>>   void clear_shadow_from_swap_cache(int type, unsigned long begin,
>>   				  unsigned long end);
>> -void free_swap_cache(struct page *page);
>>   struct page *lookup_swap_cache(swp_entry_t entry,
>>   			       struct vm_area_struct *vma,
>>   			       unsigned long addr);
>> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>>   	return NULL;
>>   }
>>   
>> -static inline void free_swap_cache(struct page *page)
>> -{
>> -}
>> -
>>   static inline void show_swap_cache_info(void)
>>   {
>>   }
>> -- 
>> 2.23.0
>>
>>
> 

--Mika
Miaohe Lin June 16, 2022, 7:33 a.m. UTC | #4
On 2022/6/16 1:13, Zach O'Keefe wrote:
> On 11 Jun 16:47, Miaohe Lin wrote:
>> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
>> It's because release_pte_page() is not called for these pages and
>> thus free_page_and_swap_cache can't grab the page lock. These pages
>> won't be freed from swap cache even if we are the only user until
>> next time reclaim. It shouldn't hurt indeed, but we could try to
>> free these pages to save more memory for system.
>>
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>  include/linux/swap.h | 5 +++++
>>  mm/khugepaged.c      | 1 +
>>  mm/swap.h            | 5 -----
>>  3 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index 8672a7123ccd..ccb83b12b724 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>>  	return global_node_page_state(NR_SWAPCACHE);
>>  }
>>  
>> +extern void free_swap_cache(struct page *page);
>>  extern void free_page_and_swap_cache(struct page *);
>>  extern void free_pages_and_swap_cache(struct page **, int);
>>  /* linux/mm/swapfile.c */
>> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>>  
>> +static inline void free_swap_cache(struct page *page)
>> +{
>> +}
>> +
>>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>>  {
>>  	return 0;
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index ee0a719c8be9..52109ad13f78 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>  	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>  		list_del(&src_page->lru);
>>  		release_pte_page(src_page);
>> +		free_swap_cache(src_page);
>>  	}
>>  }
> 
> Aside: in __collapse_huge_page_isolate() (and also here) why can't we just check
> PageCompound(page) && page == compound_head(page) to only act on compound pages
> once? AFAIK this would alleviate this compound_pagelist business..
> 
> Anyways, as-is, free_page_and_swap_cache() won't be able to do
> try_to_free_swap(), since it can't grab page lock, put it will call put_page().
> I think (?) the last page ref might be dropped in release_pte_page(), so should
> free_swap_cache() come before it?

Thanks for catching this! If page is not in swapcache, the last page ref might be dropped.
So it's bad to call free_swap_cache() after it. Thanks!

> 
>>  
>> diff --git a/mm/swap.h b/mm/swap.h
>> index 0193797b0c92..863f6086c916 100644
>> --- a/mm/swap.h
>> +++ b/mm/swap.h
>> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>>  void delete_from_swap_cache(struct page *page);
>>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>>  				  unsigned long end);
>> -void free_swap_cache(struct page *page);
>>  struct page *lookup_swap_cache(swp_entry_t entry,
>>  			       struct vm_area_struct *vma,
>>  			       unsigned long addr);
>> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>>  	return NULL;
>>  }
>>  
>> -static inline void free_swap_cache(struct page *page)
>> -{
>> -}
>> -
>>  static inline void show_swap_cache_info(void)
>>  {
>>  }
>> -- 
>> 2.23.0
>>
>>
> .
>
Miaohe Lin June 16, 2022, 7:42 a.m. UTC | #5
On 2022/6/16 7:58, Yang Shi wrote:
> On Sat, Jun 11, 2022 at 1:47 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>
>> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
>> It's because release_pte_page() is not called for these pages and
>> thus free_page_and_swap_cache can't grab the page lock. These pages
>> won't be freed from swap cache even if we are the only user until
>> next time reclaim. It shouldn't hurt indeed, but we could try to
>> free these pages to save more memory for system.
> 
> 
>>
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>  include/linux/swap.h | 5 +++++
>>  mm/khugepaged.c      | 1 +
>>  mm/swap.h            | 5 -----
>>  3 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index 8672a7123ccd..ccb83b12b724 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>>         return global_node_page_state(NR_SWAPCACHE);
>>  }
>>
>> +extern void free_swap_cache(struct page *page);
>>  extern void free_page_and_swap_cache(struct page *);
>>  extern void free_pages_and_swap_cache(struct page **, int);
>>  /* linux/mm/swapfile.c */
>> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>>
>> +static inline void free_swap_cache(struct page *page)
>> +{
>> +}
>> +
>>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>>  {
>>         return 0;
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index ee0a719c8be9..52109ad13f78 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>                 list_del(&src_page->lru);
>>                 release_pte_page(src_page);
>> +               free_swap_cache(src_page);
> 
> Will this really work? The free_swap_cache() will just dec refcounts
> without putting the page back to buddy. So the hugepage is not
> actually freed at all. Am I missing something?

Thanks for catching this! If page is on percpu lru_pvecs cache, page will
be released when lru_pvecs are drained. But if not, free_swap_cache() won't
free the page as it assumes the caller has a reference on the page and thus
only does page_ref_sub(). Does the below change looks sense for you?

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 52109ad13f78..b8c96e33591d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -755,8 +755,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,

        list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
                list_del(&src_page->lru);
-               release_pte_page(src_page);
+               mod_node_page_state(page_pgdat(src_page),
+                                   NR_ISOLATED_ANON + page_is_file_lru(src_page),
+                                   -compound_nr(src_page));
+               unlock_page(src_page);
                free_swap_cache(src_page);
+               putback_lru_page(src_page);
        }
 }

Thanks!

> 
>>         }
>>  }
>>
>> diff --git a/mm/swap.h b/mm/swap.h
>> index 0193797b0c92..863f6086c916 100644
>> --- a/mm/swap.h
>> +++ b/mm/swap.h
>> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>>  void delete_from_swap_cache(struct page *page);
>>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>>                                   unsigned long end);
>> -void free_swap_cache(struct page *page);
>>  struct page *lookup_swap_cache(swp_entry_t entry,
>>                                struct vm_area_struct *vma,
>>                                unsigned long addr);
>> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>>         return NULL;
>>  }
>>
>> -static inline void free_swap_cache(struct page *page)
>> -{
>> -}
>> -
>>  static inline void show_swap_cache_info(void)
>>  {
>>  }
>> --
>> 2.23.0
>>
>>
> .
>
Yang Shi June 16, 2022, 3:53 p.m. UTC | #6
On Thu, Jun 16, 2022 at 12:42 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> On 2022/6/16 7:58, Yang Shi wrote:
> > On Sat, Jun 11, 2022 at 1:47 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
> >>
> >> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
> >> It's because release_pte_page() is not called for these pages and
> >> thus free_page_and_swap_cache can't grab the page lock. These pages
> >> won't be freed from swap cache even if we are the only user until
> >> next time reclaim. It shouldn't hurt indeed, but we could try to
> >> free these pages to save more memory for system.
> >
> >
> >>
> >> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> >> ---
> >>  include/linux/swap.h | 5 +++++
> >>  mm/khugepaged.c      | 1 +
> >>  mm/swap.h            | 5 -----
> >>  3 files changed, 6 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/include/linux/swap.h b/include/linux/swap.h
> >> index 8672a7123ccd..ccb83b12b724 100644
> >> --- a/include/linux/swap.h
> >> +++ b/include/linux/swap.h
> >> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
> >>         return global_node_page_state(NR_SWAPCACHE);
> >>  }
> >>
> >> +extern void free_swap_cache(struct page *page);
> >>  extern void free_page_and_swap_cache(struct page *);
> >>  extern void free_pages_and_swap_cache(struct page **, int);
> >>  /* linux/mm/swapfile.c */
> >> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
> >>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
> >>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
> >>
> >> +static inline void free_swap_cache(struct page *page)
> >> +{
> >> +}
> >> +
> >>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
> >>  {
> >>         return 0;
> >> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> >> index ee0a719c8be9..52109ad13f78 100644
> >> --- a/mm/khugepaged.c
> >> +++ b/mm/khugepaged.c
> >> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
> >>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
> >>                 list_del(&src_page->lru);
> >>                 release_pte_page(src_page);
> >> +               free_swap_cache(src_page);
> >
> > Will this really work? The free_swap_cache() will just dec refcounts
> > without putting the page back to buddy. So the hugepage is not
> > actually freed at all. Am I missing something?
>
> Thanks for catching this! If page is on percpu lru_pvecs cache, page will
> be released when lru_pvecs are drained. But if not, free_swap_cache() won't
> free the page as it assumes the caller has a reference on the page and thus
> only does page_ref_sub(). Does the below change looks sense for you?

THP gets drained immediately so they won't stay in pagevecs.

>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 52109ad13f78..b8c96e33591d 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -755,8 +755,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>
>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>                 list_del(&src_page->lru);
> -               release_pte_page(src_page);
> +               mod_node_page_state(page_pgdat(src_page),
> +                                   NR_ISOLATED_ANON + page_is_file_lru(src_page),
> +                                   -compound_nr(src_page));
> +               unlock_page(src_page);
>                 free_swap_cache(src_page);
> +               putback_lru_page(src_page);

I'm not sure if it is worth it or not for a rare corner case since THP
should not stay in swapcache unless try_to_unmap() in vmscan fails
IIUC. And it is not guaranteed that free_swap_cache() will get the
page lock.

>         }
>  }
>
> Thanks!
>
> >
> >>         }
> >>  }
> >>
> >> diff --git a/mm/swap.h b/mm/swap.h
> >> index 0193797b0c92..863f6086c916 100644
> >> --- a/mm/swap.h
> >> +++ b/mm/swap.h
> >> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
> >>  void delete_from_swap_cache(struct page *page);
> >>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
> >>                                   unsigned long end);
> >> -void free_swap_cache(struct page *page);
> >>  struct page *lookup_swap_cache(swp_entry_t entry,
> >>                                struct vm_area_struct *vma,
> >>                                unsigned long addr);
> >> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
> >>         return NULL;
> >>  }
> >>
> >> -static inline void free_swap_cache(struct page *page)
> >> -{
> >> -}
> >> -
> >>  static inline void show_swap_cache_info(void)
> >>  {
> >>  }
> >> --
> >> 2.23.0
> >>
> >>
> > .
> >
>
Miaohe Lin June 17, 2022, 2:26 a.m. UTC | #7
On 2022/6/16 23:53, Yang Shi wrote:
> On Thu, Jun 16, 2022 at 12:42 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>
>> On 2022/6/16 7:58, Yang Shi wrote:
>>> On Sat, Jun 11, 2022 at 1:47 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>>>
>>>> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
>>>> It's because release_pte_page() is not called for these pages and
>>>> thus free_page_and_swap_cache can't grab the page lock. These pages
>>>> won't be freed from swap cache even if we are the only user until
>>>> next time reclaim. It shouldn't hurt indeed, but we could try to
>>>> free these pages to save more memory for system.
>>>
>>>
>>>>
>>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>>>> ---
>>>>  include/linux/swap.h | 5 +++++
>>>>  mm/khugepaged.c      | 1 +
>>>>  mm/swap.h            | 5 -----
>>>>  3 files changed, 6 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>>>> index 8672a7123ccd..ccb83b12b724 100644
>>>> --- a/include/linux/swap.h
>>>> +++ b/include/linux/swap.h
>>>> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>>>>         return global_node_page_state(NR_SWAPCACHE);
>>>>  }
>>>>
>>>> +extern void free_swap_cache(struct page *page);
>>>>  extern void free_page_and_swap_cache(struct page *);
>>>>  extern void free_pages_and_swap_cache(struct page **, int);
>>>>  /* linux/mm/swapfile.c */
>>>> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>>>>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>>>>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>>>>
>>>> +static inline void free_swap_cache(struct page *page)
>>>> +{
>>>> +}
>>>> +
>>>>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>>>>  {
>>>>         return 0;
>>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>>> index ee0a719c8be9..52109ad13f78 100644
>>>> --- a/mm/khugepaged.c
>>>> +++ b/mm/khugepaged.c
>>>> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>>>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>>>                 list_del(&src_page->lru);
>>>>                 release_pte_page(src_page);
>>>> +               free_swap_cache(src_page);
>>>
>>> Will this really work? The free_swap_cache() will just dec refcounts
>>> without putting the page back to buddy. So the hugepage is not
>>> actually freed at all. Am I missing something?
>>
>> Thanks for catching this! If page is on percpu lru_pvecs cache, page will
>> be released when lru_pvecs are drained. But if not, free_swap_cache() won't
>> free the page as it assumes the caller has a reference on the page and thus
>> only does page_ref_sub(). Does the below change looks sense for you?
> 
> THP gets drained immediately so they won't stay in pagevecs.

Yes, you're right. I missed this.

> 
>>
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index 52109ad13f78..b8c96e33591d 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -755,8 +755,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>
>>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>                 list_del(&src_page->lru);
>> -               release_pte_page(src_page);
>> +               mod_node_page_state(page_pgdat(src_page),
>> +                                   NR_ISOLATED_ANON + page_is_file_lru(src_page),
>> +                                   -compound_nr(src_page));
>> +               unlock_page(src_page);
>>                 free_swap_cache(src_page);
>> +               putback_lru_page(src_page);
> 
> I'm not sure if it is worth it or not for a rare corner case since THP
> should not stay in swapcache unless try_to_unmap() in vmscan fails

IIUC, even if try_to_unmap() in vmscan succeeds, THP might be still in the
swapcache if shrink_page_list is not called for this THP again after writeback
is done, e.g. when shrink_page_list is called from madvise, so there might be
no memory pressure, or do_swap_page puts the THP into page table again. Also THP
might not be splited when deferred_split_shrinker is not called, e.g. due to
not lacking of memory. Even if there is memory pressure, the THP will stay in
swapcache until next round page reclaim for this THP is done. So there should
be a non-negligible window that THP will stay in the swapcache.
Or am I miss something?

> IIUC. And it is not guaranteed that free_swap_cache() will get the
> page lock.

IMHO, we're not guaranteed that free_swap_cache() will get the page lock for the normal
page anyway.

Thanks!

> 
>>         }
>>  }
>>
>> Thanks!
>>
>>>
>>>>         }
>>>>  }
>>>>
>>>> diff --git a/mm/swap.h b/mm/swap.h
>>>> index 0193797b0c92..863f6086c916 100644
>>>> --- a/mm/swap.h
>>>> +++ b/mm/swap.h
>>>> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>>>>  void delete_from_swap_cache(struct page *page);
>>>>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>>>>                                   unsigned long end);
>>>> -void free_swap_cache(struct page *page);
>>>>  struct page *lookup_swap_cache(swp_entry_t entry,
>>>>                                struct vm_area_struct *vma,
>>>>                                unsigned long addr);
>>>> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>>>>         return NULL;
>>>>  }
>>>>
>>>> -static inline void free_swap_cache(struct page *page)
>>>> -{
>>>> -}
>>>> -
>>>>  static inline void show_swap_cache_info(void)
>>>>  {
>>>>  }
>>>> --
>>>> 2.23.0
>>>>
>>>>
>>> .
>>>
>>
> .
>
Yang Shi June 17, 2022, 4:35 p.m. UTC | #8
On Thu, Jun 16, 2022 at 7:27 PM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> On 2022/6/16 23:53, Yang Shi wrote:
> > On Thu, Jun 16, 2022 at 12:42 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
> >>
> >> On 2022/6/16 7:58, Yang Shi wrote:
> >>> On Sat, Jun 11, 2022 at 1:47 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
> >>>>
> >>>> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
> >>>> It's because release_pte_page() is not called for these pages and
> >>>> thus free_page_and_swap_cache can't grab the page lock. These pages
> >>>> won't be freed from swap cache even if we are the only user until
> >>>> next time reclaim. It shouldn't hurt indeed, but we could try to
> >>>> free these pages to save more memory for system.
> >>>
> >>>
> >>>>
> >>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> >>>> ---
> >>>>  include/linux/swap.h | 5 +++++
> >>>>  mm/khugepaged.c      | 1 +
> >>>>  mm/swap.h            | 5 -----
> >>>>  3 files changed, 6 insertions(+), 5 deletions(-)
> >>>>
> >>>> diff --git a/include/linux/swap.h b/include/linux/swap.h
> >>>> index 8672a7123ccd..ccb83b12b724 100644
> >>>> --- a/include/linux/swap.h
> >>>> +++ b/include/linux/swap.h
> >>>> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
> >>>>         return global_node_page_state(NR_SWAPCACHE);
> >>>>  }
> >>>>
> >>>> +extern void free_swap_cache(struct page *page);
> >>>>  extern void free_page_and_swap_cache(struct page *);
> >>>>  extern void free_pages_and_swap_cache(struct page **, int);
> >>>>  /* linux/mm/swapfile.c */
> >>>> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
> >>>>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
> >>>>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
> >>>>
> >>>> +static inline void free_swap_cache(struct page *page)
> >>>> +{
> >>>> +}
> >>>> +
> >>>>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
> >>>>  {
> >>>>         return 0;
> >>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> >>>> index ee0a719c8be9..52109ad13f78 100644
> >>>> --- a/mm/khugepaged.c
> >>>> +++ b/mm/khugepaged.c
> >>>> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
> >>>>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
> >>>>                 list_del(&src_page->lru);
> >>>>                 release_pte_page(src_page);
> >>>> +               free_swap_cache(src_page);
> >>>
> >>> Will this really work? The free_swap_cache() will just dec refcounts
> >>> without putting the page back to buddy. So the hugepage is not
> >>> actually freed at all. Am I missing something?
> >>
> >> Thanks for catching this! If page is on percpu lru_pvecs cache, page will
> >> be released when lru_pvecs are drained. But if not, free_swap_cache() won't
> >> free the page as it assumes the caller has a reference on the page and thus
> >> only does page_ref_sub(). Does the below change looks sense for you?
> >
> > THP gets drained immediately so they won't stay in pagevecs.
>
> Yes, you're right. I missed this.
>
> >
> >>
> >> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> >> index 52109ad13f78..b8c96e33591d 100644
> >> --- a/mm/khugepaged.c
> >> +++ b/mm/khugepaged.c
> >> @@ -755,8 +755,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
> >>
> >>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
> >>                 list_del(&src_page->lru);
> >> -               release_pte_page(src_page);
> >> +               mod_node_page_state(page_pgdat(src_page),
> >> +                                   NR_ISOLATED_ANON + page_is_file_lru(src_page),
> >> +                                   -compound_nr(src_page));
> >> +               unlock_page(src_page);
> >>                 free_swap_cache(src_page);
> >> +               putback_lru_page(src_page);
> >
> > I'm not sure if it is worth it or not for a rare corner case since THP
> > should not stay in swapcache unless try_to_unmap() in vmscan fails
>
> IIUC, even if try_to_unmap() in vmscan succeeds, THP might be still in the
> swapcache if shrink_page_list is not called for this THP again after writeback
> is done, e.g. when shrink_page_list is called from madvise, so there might be

I don't get, doesn't __remove_mapping() delete the page from swap cache?

> no memory pressure, or do_swap_page puts the THP into page table again. Also THP

do_swap_page() just swaps in base page, never THP.

> might not be splited when deferred_split_shrinker is not called, e.g. due to

I don't see how deferred split is related to this.

> not lacking of memory. Even if there is memory pressure, the THP will stay in
> swapcache until next round page reclaim for this THP is done. So there should
> be a non-negligible window that THP will stay in the swapcache.
> Or am I miss something?

I guess you may misunderstand what I meant. This patch is trying to
optimize freeing THP in swapcache. But it should be very rare that
khugepaged sees THP from swap cache. The only case I could think of is
try_to_unmap() in vmscan fails. That might leave THP in swap cache so
that khugepaged could see it.


>
> > IIUC. And it is not guaranteed that free_swap_cache() will get the
> > page lock.
>
> IMHO, we're not guaranteed that free_swap_cache() will get the page lock for the normal
> page anyway.
>
> Thanks!
>
> >
> >>         }
> >>  }
> >>
> >> Thanks!
> >>
> >>>
> >>>>         }
> >>>>  }
> >>>>
> >>>> diff --git a/mm/swap.h b/mm/swap.h
> >>>> index 0193797b0c92..863f6086c916 100644
> >>>> --- a/mm/swap.h
> >>>> +++ b/mm/swap.h
> >>>> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
> >>>>  void delete_from_swap_cache(struct page *page);
> >>>>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
> >>>>                                   unsigned long end);
> >>>> -void free_swap_cache(struct page *page);
> >>>>  struct page *lookup_swap_cache(swp_entry_t entry,
> >>>>                                struct vm_area_struct *vma,
> >>>>                                unsigned long addr);
> >>>> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
> >>>>         return NULL;
> >>>>  }
> >>>>
> >>>> -static inline void free_swap_cache(struct page *page)
> >>>> -{
> >>>> -}
> >>>> -
> >>>>  static inline void show_swap_cache_info(void)
> >>>>  {
> >>>>  }
> >>>> --
> >>>> 2.23.0
> >>>>
> >>>>
> >>> .
> >>>
> >>
> > .
> >
>
Miaohe Lin June 18, 2022, 3:13 a.m. UTC | #9
On 2022/6/18 0:35, Yang Shi wrote:
> On Thu, Jun 16, 2022 at 7:27 PM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>
>> On 2022/6/16 23:53, Yang Shi wrote:
>>> On Thu, Jun 16, 2022 at 12:42 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>>>
>>>> On 2022/6/16 7:58, Yang Shi wrote:
>>>>> On Sat, Jun 11, 2022 at 1:47 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>>>>>
>>>>>> Transhuge swapcaches won't be freed in __collapse_huge_page_copy().
>>>>>> It's because release_pte_page() is not called for these pages and
>>>>>> thus free_page_and_swap_cache can't grab the page lock. These pages
>>>>>> won't be freed from swap cache even if we are the only user until
>>>>>> next time reclaim. It shouldn't hurt indeed, but we could try to
>>>>>> free these pages to save more memory for system.
>>>>>
>>>>>
>>>>>>
>>>>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>>>>>> ---
>>>>>>  include/linux/swap.h | 5 +++++
>>>>>>  mm/khugepaged.c      | 1 +
>>>>>>  mm/swap.h            | 5 -----
>>>>>>  3 files changed, 6 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>>>>>> index 8672a7123ccd..ccb83b12b724 100644
>>>>>> --- a/include/linux/swap.h
>>>>>> +++ b/include/linux/swap.h
>>>>>> @@ -456,6 +456,7 @@ static inline unsigned long total_swapcache_pages(void)
>>>>>>         return global_node_page_state(NR_SWAPCACHE);
>>>>>>  }
>>>>>>
>>>>>> +extern void free_swap_cache(struct page *page);
>>>>>>  extern void free_page_and_swap_cache(struct page *);
>>>>>>  extern void free_pages_and_swap_cache(struct page **, int);
>>>>>>  /* linux/mm/swapfile.c */
>>>>>> @@ -540,6 +541,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
>>>>>>  /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
>>>>>>  #define free_swap_and_cache(e) is_pfn_swap_entry(e)
>>>>>>
>>>>>> +static inline void free_swap_cache(struct page *page)
>>>>>> +{
>>>>>> +}
>>>>>> +
>>>>>>  static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
>>>>>>  {
>>>>>>         return 0;
>>>>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>>>>> index ee0a719c8be9..52109ad13f78 100644
>>>>>> --- a/mm/khugepaged.c
>>>>>> +++ b/mm/khugepaged.c
>>>>>> @@ -756,6 +756,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>>>>>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>>>>>                 list_del(&src_page->lru);
>>>>>>                 release_pte_page(src_page);
>>>>>> +               free_swap_cache(src_page);
>>>>>
>>>>> Will this really work? The free_swap_cache() will just dec refcounts
>>>>> without putting the page back to buddy. So the hugepage is not
>>>>> actually freed at all. Am I missing something?
>>>>
>>>> Thanks for catching this! If page is on percpu lru_pvecs cache, page will
>>>> be released when lru_pvecs are drained. But if not, free_swap_cache() won't
>>>> free the page as it assumes the caller has a reference on the page and thus
>>>> only does page_ref_sub(). Does the below change looks sense for you?
>>>
>>> THP gets drained immediately so they won't stay in pagevecs.
>>
>> Yes, you're right. I missed this.
>>
>>>
>>>>
>>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>>> index 52109ad13f78..b8c96e33591d 100644
>>>> --- a/mm/khugepaged.c
>>>> +++ b/mm/khugepaged.c
>>>> @@ -755,8 +755,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
>>>>
>>>>         list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
>>>>                 list_del(&src_page->lru);
>>>> -               release_pte_page(src_page);
>>>> +               mod_node_page_state(page_pgdat(src_page),
>>>> +                                   NR_ISOLATED_ANON + page_is_file_lru(src_page),
>>>> +                                   -compound_nr(src_page));
>>>> +               unlock_page(src_page);
>>>>                 free_swap_cache(src_page);
>>>> +               putback_lru_page(src_page);
>>>
>>> I'm not sure if it is worth it or not for a rare corner case since THP
>>> should not stay in swapcache unless try_to_unmap() in vmscan fails
>>
>> IIUC, even if try_to_unmap() in vmscan succeeds, THP might be still in the
>> swapcache if shrink_page_list is not called for this THP again after writeback
>> is done, e.g. when shrink_page_list is called from madvise, so there might be
> 
> I don't get, doesn't __remove_mapping() delete the page from swap cache?

Sorry for making confusion. :(
IIUC, __remove_mapping() is only called when page is clean and page writeback is done in
shrink_page_list(). So for the first round of shrink_page_list(), the THP is under writeback
and __remove_mapping() won't be called. THP will be removed from swapcache via __remove_mapping()
in next round of shrink_page_list() if THP is clean and not under writeback. So THP should be in
the swapcache until next round of shrink_page_list().

And if shrink_page_list is called from madvise, the next round of shrink_page_list() for this
THP won't arrive if there is no memory pressure because madvise can't shrink pages that are
already in the swapcache (!pte_present case is ignored in madvise_cold_or_pageout_pte_range()).
So the THP might stay in swapcache for a long time.

Does this make sense for you?

> 
>> no memory pressure, or do_swap_page puts the THP into page table again. Also THP
> 
> do_swap_page() just swaps in base page, never THP.

If the THP is not removed from swapcache, do_swap_cache can found it via lookup_swap_cache().
So we "swap in" the THP.

> 
>> might not be splited when deferred_split_shrinker is not called, e.g. due to
> 
> I don't see how deferred split is related to this.

What I mean is that if the THP is splitted, khugepaged won't found that THP. So deferred split
should be considered?

> 
>> not lacking of memory. Even if there is memory pressure, the THP will stay in
>> swapcache until next round page reclaim for this THP is done. So there should
>> be a non-negligible window that THP will stay in the swapcache.
>> Or am I miss something?
> 
> I guess you may misunderstand what I meant. This patch is trying to
> optimize freeing THP in swapcache. But it should be very rare that
> khugepaged sees THP from swap cache. The only case I could think of is
> try_to_unmap() in vmscan fails. That might leave THP in swap cache so
> that khugepaged could see it.

I was trying to show you that how a THP can stay in the swapcache. If it's not
removed from swapcache via __remove_mapping, not splitted or swapped in before
it's removed, it will stay in the swapcache. Or am I miss something?

I hope I make my point clear this time. ;)

> 
> 
>>
>>> IIUC. And it is not guaranteed that free_swap_cache() will get the
>>> page lock.
>>
>> IMHO, we're not guaranteed that free_swap_cache() will get the page lock for the normal
>> page anyway.
>>
>> Thanks!
>>
>>>
>>>>         }
>>>>  }
>>>>
>>>> Thanks!
>>>>
>>>>>
>>>>>>         }
>>>>>>  }
>>>>>>
>>>>>> diff --git a/mm/swap.h b/mm/swap.h
>>>>>> index 0193797b0c92..863f6086c916 100644
>>>>>> --- a/mm/swap.h
>>>>>> +++ b/mm/swap.h
>>>>>> @@ -41,7 +41,6 @@ void __delete_from_swap_cache(struct page *page,
>>>>>>  void delete_from_swap_cache(struct page *page);
>>>>>>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>>>>>>                                   unsigned long end);
>>>>>> -void free_swap_cache(struct page *page);
>>>>>>  struct page *lookup_swap_cache(swp_entry_t entry,
>>>>>>                                struct vm_area_struct *vma,
>>>>>>                                unsigned long addr);
>>>>>> @@ -81,10 +80,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
>>>>>>         return NULL;
>>>>>>  }
>>>>>>
>>>>>> -static inline void free_swap_cache(struct page *page)
>>>>>> -{
>>>>>> -}
>>>>>> -
>>>>>>  static inline void show_swap_cache_info(void)
>>>>>>  {
>>>>>>  }
>>>>>> --
>>>>>> 2.23.0
>>>>>>
>>>>>>
>>>>> .
>>>>>
>>>>
>>> .
>>>
>>
> .
>
diff mbox series

Patch

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8672a7123ccd..ccb83b12b724 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -456,6 +456,7 @@  static inline unsigned long total_swapcache_pages(void)
 	return global_node_page_state(NR_SWAPCACHE);
 }
 
+extern void free_swap_cache(struct page *page);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
 /* linux/mm/swapfile.c */
@@ -540,6 +541,10 @@  static inline void put_swap_device(struct swap_info_struct *si)
 /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
 #define free_swap_and_cache(e) is_pfn_swap_entry(e)
 
+static inline void free_swap_cache(struct page *page)
+{
+}
+
 static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
 {
 	return 0;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ee0a719c8be9..52109ad13f78 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -756,6 +756,7 @@  static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
 		list_del(&src_page->lru);
 		release_pte_page(src_page);
+		free_swap_cache(src_page);
 	}
 }
 
diff --git a/mm/swap.h b/mm/swap.h
index 0193797b0c92..863f6086c916 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -41,7 +41,6 @@  void __delete_from_swap_cache(struct page *page,
 void delete_from_swap_cache(struct page *page);
 void clear_shadow_from_swap_cache(int type, unsigned long begin,
 				  unsigned long end);
-void free_swap_cache(struct page *page);
 struct page *lookup_swap_cache(swp_entry_t entry,
 			       struct vm_area_struct *vma,
 			       unsigned long addr);
@@ -81,10 +80,6 @@  static inline struct address_space *swap_address_space(swp_entry_t entry)
 	return NULL;
 }
 
-static inline void free_swap_cache(struct page *page)
-{
-}
-
 static inline void show_swap_cache_info(void)
 {
 }