diff mbox series

[mm-unstable,v1,1/4] mm/swap: stop using page->private on tail pages for THP_SWAP

Message ID 20230821160849.531668-2-david@redhat.com (mailing list archive)
State New
Headers show
Series mm/swap: stop using page->private on tail pages for THP_SWAP + cleanups | expand

Commit Message

David Hildenbrand Aug. 21, 2023, 4:08 p.m. UTC
Let's stop using page->private on tail pages, making it possible to
just unconditionally reuse that field in the tail pages of large folios.

The remaining usage of the private field for THP_SWAP is in the THP
splitting code (mm/huge_memory.c), that we'll handle separately later.

Update the THP_SWAP documentation and sanity checks in mm_types.h and
__split_huge_page_tail().

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/arm64/mm/mteswap.c  |  5 +++--
 include/linux/mm_types.h | 12 +-----------
 include/linux/swap.h     |  9 +++++++++
 mm/huge_memory.c         | 15 ++++++---------
 mm/memory.c              |  2 +-
 mm/rmap.c                |  2 +-
 mm/swap_state.c          |  5 +++--
 mm/swapfile.c            |  4 ++--
 8 files changed, 26 insertions(+), 28 deletions(-)

Comments

Catalin Marinas Aug. 22, 2023, 4:24 p.m. UTC | #1
On Mon, Aug 21, 2023 at 06:08:46PM +0200, David Hildenbrand wrote:
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index cd508ba80ab1..a31833e3ddc5 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page)
>  
>  	mte_save_page_tags(page_address(page), tag_storage);
>  
> -	/* page_private contains the swap entry.val set in do_swap_page */
> -	ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL);
> +	/* lookup the swap entry.val from the page */
> +	ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> +		       GFP_KERNEL);
>  	if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
>  		mte_free_tag_storage(tag_storage);
>  		return xa_err(ret);

For arm64:

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Yosry Ahmed Aug. 22, 2023, 5 p.m. UTC | #2
On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote:
>
> Let's stop using page->private on tail pages, making it possible to
> just unconditionally reuse that field in the tail pages of large folios.
>
> The remaining usage of the private field for THP_SWAP is in the THP
> splitting code (mm/huge_memory.c), that we'll handle separately later.
>
> Update the THP_SWAP documentation and sanity checks in mm_types.h and
> __split_huge_page_tail().
>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  arch/arm64/mm/mteswap.c  |  5 +++--
>  include/linux/mm_types.h | 12 +-----------
>  include/linux/swap.h     |  9 +++++++++
>  mm/huge_memory.c         | 15 ++++++---------
>  mm/memory.c              |  2 +-
>  mm/rmap.c                |  2 +-
>  mm/swap_state.c          |  5 +++--
>  mm/swapfile.c            |  4 ++--
>  8 files changed, 26 insertions(+), 28 deletions(-)
>
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index cd508ba80ab1..a31833e3ddc5 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page)
>
>         mte_save_page_tags(page_address(page), tag_storage);
>
> -       /* page_private contains the swap entry.val set in do_swap_page */
> -       ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL);
> +       /* lookup the swap entry.val from the page */
> +       ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> +                      GFP_KERNEL);
>         if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
>                 mte_free_tag_storage(tag_storage);
>                 return xa_err(ret);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index b9b6c88875b9..61361f1750c3 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -333,11 +333,8 @@ struct folio {
>                         atomic_t _pincount;
>  #ifdef CONFIG_64BIT
>                         unsigned int _folio_nr_pages;
> -                       /* 4 byte gap here */
> -       /* private: the union with struct page is transitional */
> -                       /* Fix THP_SWAP to not use tail->private */
> -                       unsigned long _private_1;
>  #endif
> +       /* private: the union with struct page is transitional */
>                 };
>                 struct page __page_1;
>         };
> @@ -358,9 +355,6 @@ struct folio {
>         /* public: */
>                         struct list_head _deferred_list;
>         /* private: the union with struct page is transitional */
> -                       unsigned long _avail_2a;
> -                       /* Fix THP_SWAP to not use tail->private */
> -                       unsigned long _private_2a;
>                 };
>                 struct page __page_2;
>         };
> @@ -385,9 +379,6 @@ FOLIO_MATCH(memcg_data, memcg_data);
>                         offsetof(struct page, pg) + sizeof(struct page))
>  FOLIO_MATCH(flags, _flags_1);
>  FOLIO_MATCH(compound_head, _head_1);
> -#ifdef CONFIG_64BIT
> -FOLIO_MATCH(private, _private_1);
> -#endif
>  #undef FOLIO_MATCH
>  #define FOLIO_MATCH(pg, fl)                                            \
>         static_assert(offsetof(struct folio, fl) ==                     \
> @@ -396,7 +387,6 @@ FOLIO_MATCH(flags, _flags_2);
>  FOLIO_MATCH(compound_head, _head_2);
>  FOLIO_MATCH(flags, _flags_2a);
>  FOLIO_MATCH(compound_head, _head_2a);
> -FOLIO_MATCH(private, _private_2a);
>  #undef FOLIO_MATCH
>
>  /**
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index bb5adc604144..84fe0e94f5cd 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
>         return entry;
>  }
>
> +static inline swp_entry_t page_swap_entry(struct page *page)
> +{
> +       struct folio *folio = page_folio(page);
> +       swp_entry_t entry = folio_swap_entry(folio);
> +
> +       entry.val += page - &folio->page;

Would it be better to use folio_page_idx() here?

> +       return entry;
> +}
> +
>  static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
>  {
>         folio->private = (void *)entry.val;
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index cc2f65f8cc62..c04702ae71d2 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
>         page_tail->index = head->index + tail;
>
>         /*
> -        * page->private should not be set in tail pages with the exception
> -        * of swap cache pages that store the swp_entry_t in tail pages.
> -        * Fix up and warn once if private is unexpectedly set.
> -        *
> -        * What of 32-bit systems, on which folio->_pincount overlays
> -        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
> -        * pincount must be 0 for folio_ref_freeze() to have succeeded.
> +        * page->private should not be set in tail pages. Fix up and warn once
> +        * if private is unexpectedly set.
>          */
> -       if (!folio_test_swapcache(page_folio(head))) {
> -               VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
> +       if (unlikely(page_tail->private)) {
> +               VM_WARN_ON_ONCE_PAGE(true, page_tail);
>                 page_tail->private = 0;
>         }
> +       if (PageSwapCache(head))
> +               set_page_private(page_tail, (unsigned long)head->private + tail);
>
>         /* Page flags must be visible before we make the page non-compound. */
>         smp_wmb();
> diff --git a/mm/memory.c b/mm/memory.c
> index d003076b218d..ff13242c1589 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3882,7 +3882,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>                  * changed.
>                  */
>                 if (unlikely(!folio_test_swapcache(folio) ||
> -                            page_private(page) != entry.val))
> +                            page_swap_entry(page).val != entry.val))
>                         goto out_page;
>
>                 /*
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 1f04debdc87a..ec7f8e6c9e48 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1647,7 +1647,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>                          */
>                         dec_mm_counter(mm, mm_counter(&folio->page));
>                 } else if (folio_test_anon(folio)) {
> -                       swp_entry_t entry = { .val = page_private(subpage) };
> +                       swp_entry_t entry = page_swap_entry(subpage);
>                         pte_t swp_pte;
>                         /*
>                          * Store the swap location in the pte.
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 01f15139b7d9..2f2417810052 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -100,6 +100,7 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
>
>         folio_ref_add(folio, nr);
>         folio_set_swapcache(folio);
> +       folio_set_swap_entry(folio, entry);
>
>         do {
>                 xas_lock_irq(&xas);
> @@ -113,7 +114,6 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
>                                 if (shadowp)
>                                         *shadowp = old;
>                         }
> -                       set_page_private(folio_page(folio, i), entry.val + i);
>                         xas_store(&xas, folio);
>                         xas_next(&xas);
>                 }
> @@ -154,9 +154,10 @@ void __delete_from_swap_cache(struct folio *folio,
>         for (i = 0; i < nr; i++) {
>                 void *entry = xas_store(&xas, shadow);
>                 VM_BUG_ON_PAGE(entry != folio, entry);
> -               set_page_private(folio_page(folio, i), 0);
>                 xas_next(&xas);
>         }
> +       entry.val = 0;
> +       folio_set_swap_entry(folio, entry);
>         folio_clear_swapcache(folio);
>         address_space->nrpages -= nr;
>         __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index d46933adf789..bd9d904671b9 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -3369,7 +3369,7 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry)
>
>  struct swap_info_struct *page_swap_info(struct page *page)
>  {
> -       swp_entry_t entry = { .val = page_private(page) };
> +       swp_entry_t entry = page_swap_entry(page);
>         return swp_swap_info(entry);
>  }
>
> @@ -3384,7 +3384,7 @@ EXPORT_SYMBOL_GPL(swapcache_mapping);
>
>  pgoff_t __page_file_index(struct page *page)
>  {
> -       swp_entry_t swap = { .val = page_private(page) };
> +       swp_entry_t swap = page_swap_entry(page);
>         return swp_offset(swap);
>  }
>  EXPORT_SYMBOL_GPL(__page_file_index);
> --
> 2.41.0
>
>
David Hildenbrand Aug. 22, 2023, 5:14 p.m. UTC | #3
>>
>> +static inline swp_entry_t page_swap_entry(struct page *page)
>> +{
>> +       struct folio *folio = page_folio(page);
>> +       swp_entry_t entry = folio_swap_entry(folio);
>> +
>> +       entry.val += page - &folio->page;
> 
> Would it be better to use folio_page_idx() here?

Sounds reasonable!
David Hildenbrand Aug. 23, 2023, 12:15 p.m. UTC | #4
On 21.08.23 18:08, David Hildenbrand wrote:
> Let's stop using page->private on tail pages, making it possible to
> just unconditionally reuse that field in the tail pages of large folios.
> 
> The remaining usage of the private field for THP_SWAP is in the THP
> splitting code (mm/huge_memory.c), that we'll handle separately later.
> 
> Update the THP_SWAP documentation and sanity checks in mm_types.h and
> __split_huge_page_tail().
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---

@Andrew, the following change on top


 From 2b1fd10ef07c6d47aa9cd0ce10445ab1e1b97361 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 22 Aug 2023 19:16:55 +0200
Subject: [PATCH] fixup: mm/swap: stop using page->private on tail pages for
  THP_SWAP

Per Yoshry, use folio_page_idx(). It shouldn't make a difference for
our (THP) use case, but it's certainly cleaner.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
  include/linux/swap.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 84fe0e94f5cd..e5cf58a1cf9e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -344,7 +344,7 @@ static inline swp_entry_t page_swap_entry(struct page *page)
  	struct folio *folio = page_folio(page);
  	swp_entry_t entry = folio_swap_entry(folio);
  
-	entry.val += page - &folio->page;
+	entry.val += folio_page_idx(folio, page);
  	return entry;
  }
Yosry Ahmed Aug. 23, 2023, 3:12 p.m. UTC | #5
On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote:
>
> Let's stop using page->private on tail pages, making it possible to
> just unconditionally reuse that field in the tail pages of large folios.
>
> The remaining usage of the private field for THP_SWAP is in the THP
> splitting code (mm/huge_memory.c), that we'll handle separately later.
>
> Update the THP_SWAP documentation and sanity checks in mm_types.h and
> __split_huge_page_tail().
>
> Signed-off-by: David Hildenbrand <david@redhat.com>

The mm part looks good to me (with the added fixup):

Reviewed-by: Yosry Ahmed <yosryahmed@google.com>

Minor nit below, not worth a respin, but perhaps if you respin anyway
for something else.
> ---
>  arch/arm64/mm/mteswap.c  |  5 +++--
>  include/linux/mm_types.h | 12 +-----------
>  include/linux/swap.h     |  9 +++++++++
>  mm/huge_memory.c         | 15 ++++++---------
>  mm/memory.c              |  2 +-
>  mm/rmap.c                |  2 +-
>  mm/swap_state.c          |  5 +++--
>  mm/swapfile.c            |  4 ++--
>  8 files changed, 26 insertions(+), 28 deletions(-)
>
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index cd508ba80ab1..a31833e3ddc5 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page)
>
>         mte_save_page_tags(page_address(page), tag_storage);
>
> -       /* page_private contains the swap entry.val set in do_swap_page */
> -       ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL);
> +       /* lookup the swap entry.val from the page */
> +       ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
> +                      GFP_KERNEL);
>         if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
>                 mte_free_tag_storage(tag_storage);
>                 return xa_err(ret);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index b9b6c88875b9..61361f1750c3 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -333,11 +333,8 @@ struct folio {
>                         atomic_t _pincount;
>  #ifdef CONFIG_64BIT
>                         unsigned int _folio_nr_pages;
> -                       /* 4 byte gap here */
> -       /* private: the union with struct page is transitional */
> -                       /* Fix THP_SWAP to not use tail->private */
> -                       unsigned long _private_1;
>  #endif
> +       /* private: the union with struct page is transitional */
>                 };
>                 struct page __page_1;
>         };
> @@ -358,9 +355,6 @@ struct folio {
>         /* public: */
>                         struct list_head _deferred_list;
>         /* private: the union with struct page is transitional */
> -                       unsigned long _avail_2a;
> -                       /* Fix THP_SWAP to not use tail->private */
> -                       unsigned long _private_2a;
>                 };
>                 struct page __page_2;
>         };
> @@ -385,9 +379,6 @@ FOLIO_MATCH(memcg_data, memcg_data);
>                         offsetof(struct page, pg) + sizeof(struct page))
>  FOLIO_MATCH(flags, _flags_1);
>  FOLIO_MATCH(compound_head, _head_1);
> -#ifdef CONFIG_64BIT
> -FOLIO_MATCH(private, _private_1);
> -#endif
>  #undef FOLIO_MATCH
>  #define FOLIO_MATCH(pg, fl)                                            \
>         static_assert(offsetof(struct folio, fl) ==                     \
> @@ -396,7 +387,6 @@ FOLIO_MATCH(flags, _flags_2);
>  FOLIO_MATCH(compound_head, _head_2);
>  FOLIO_MATCH(flags, _flags_2a);
>  FOLIO_MATCH(compound_head, _head_2a);
> -FOLIO_MATCH(private, _private_2a);
>  #undef FOLIO_MATCH
>
>  /**
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index bb5adc604144..84fe0e94f5cd 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
>         return entry;
>  }
>
> +static inline swp_entry_t page_swap_entry(struct page *page)
> +{
> +       struct folio *folio = page_folio(page);
> +       swp_entry_t entry = folio_swap_entry(folio);
> +
> +       entry.val += page - &folio->page;
> +       return entry;
> +}
> +
>  static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
>  {
>         folio->private = (void *)entry.val;
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index cc2f65f8cc62..c04702ae71d2 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
>         page_tail->index = head->index + tail;
>
>         /*
> -        * page->private should not be set in tail pages with the exception
> -        * of swap cache pages that store the swp_entry_t in tail pages.
> -        * Fix up and warn once if private is unexpectedly set.
> -        *
> -        * What of 32-bit systems, on which folio->_pincount overlays
> -        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
> -        * pincount must be 0 for folio_ref_freeze() to have succeeded.
> +        * page->private should not be set in tail pages. Fix up and warn once
> +        * if private is unexpectedly set.
>          */
> -       if (!folio_test_swapcache(page_folio(head))) {
> -               VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
> +       if (unlikely(page_tail->private)) {
> +               VM_WARN_ON_ONCE_PAGE(true, page_tail);
>                 page_tail->private = 0;
>         }

Could probably save a couple of lines here:

if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail))

       page_tail->private = 0;

> +       if (PageSwapCache(head))
> +               set_page_private(page_tail, (unsigned long)head->private + tail);
>
>         /* Page flags must be visible before we make the page non-compound. */
>         smp_wmb();
> diff --git a/mm/memory.c b/mm/memory.c
> index d003076b218d..ff13242c1589 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3882,7 +3882,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>                  * changed.
>                  */
>                 if (unlikely(!folio_test_swapcache(folio) ||
> -                            page_private(page) != entry.val))
> +                            page_swap_entry(page).val != entry.val))
>                         goto out_page;
>
>                 /*
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 1f04debdc87a..ec7f8e6c9e48 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1647,7 +1647,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>                          */
>                         dec_mm_counter(mm, mm_counter(&folio->page));
>                 } else if (folio_test_anon(folio)) {
> -                       swp_entry_t entry = { .val = page_private(subpage) };
> +                       swp_entry_t entry = page_swap_entry(subpage);
>                         pte_t swp_pte;
>                         /*
>                          * Store the swap location in the pte.
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 01f15139b7d9..2f2417810052 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -100,6 +100,7 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
>
>         folio_ref_add(folio, nr);
>         folio_set_swapcache(folio);
> +       folio_set_swap_entry(folio, entry);
>
>         do {
>                 xas_lock_irq(&xas);
> @@ -113,7 +114,6 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
>                                 if (shadowp)
>                                         *shadowp = old;
>                         }
> -                       set_page_private(folio_page(folio, i), entry.val + i);
>                         xas_store(&xas, folio);
>                         xas_next(&xas);
>                 }
> @@ -154,9 +154,10 @@ void __delete_from_swap_cache(struct folio *folio,
>         for (i = 0; i < nr; i++) {
>                 void *entry = xas_store(&xas, shadow);
>                 VM_BUG_ON_PAGE(entry != folio, entry);
> -               set_page_private(folio_page(folio, i), 0);
>                 xas_next(&xas);
>         }
> +       entry.val = 0;
> +       folio_set_swap_entry(folio, entry);
>         folio_clear_swapcache(folio);
>         address_space->nrpages -= nr;
>         __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index d46933adf789..bd9d904671b9 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -3369,7 +3369,7 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry)
>
>  struct swap_info_struct *page_swap_info(struct page *page)
>  {
> -       swp_entry_t entry = { .val = page_private(page) };
> +       swp_entry_t entry = page_swap_entry(page);
>         return swp_swap_info(entry);
>  }
>
> @@ -3384,7 +3384,7 @@ EXPORT_SYMBOL_GPL(swapcache_mapping);
>
>  pgoff_t __page_file_index(struct page *page)
>  {
> -       swp_entry_t swap = { .val = page_private(page) };
> +       swp_entry_t swap = page_swap_entry(page);
>         return swp_offset(swap);
>  }
>  EXPORT_SYMBOL_GPL(__page_file_index);
> --
> 2.41.0
>
>
David Hildenbrand Aug. 23, 2023, 3:17 p.m. UTC | #6
On 23.08.23 17:12, Yosry Ahmed wrote:
> On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote:
>>
>> Let's stop using page->private on tail pages, making it possible to
>> just unconditionally reuse that field in the tail pages of large folios.
>>
>> The remaining usage of the private field for THP_SWAP is in the THP
>> splitting code (mm/huge_memory.c), that we'll handle separately later.
>>
>> Update the THP_SWAP documentation and sanity checks in mm_types.h and
>> __split_huge_page_tail().
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> The mm part looks good to me (with the added fixup):
> 
> Reviewed-by: Yosry Ahmed <yosryahmed@google.com>

Thanks!

>>   /**
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index bb5adc604144..84fe0e94f5cd 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
>>          return entry;
>>   }
>>
>> +static inline swp_entry_t page_swap_entry(struct page *page)
>> +{
>> +       struct folio *folio = page_folio(page);
>> +       swp_entry_t entry = folio_swap_entry(folio);
>> +
>> +       entry.val += page - &folio->page;
>> +       return entry;
>> +}
>> +
>>   static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
>>   {
>>          folio->private = (void *)entry.val;
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index cc2f65f8cc62..c04702ae71d2 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
>>          page_tail->index = head->index + tail;
>>
>>          /*
>> -        * page->private should not be set in tail pages with the exception
>> -        * of swap cache pages that store the swp_entry_t in tail pages.
>> -        * Fix up and warn once if private is unexpectedly set.
>> -        *
>> -        * What of 32-bit systems, on which folio->_pincount overlays
>> -        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
>> -        * pincount must be 0 for folio_ref_freeze() to have succeeded.
>> +        * page->private should not be set in tail pages. Fix up and warn once
>> +        * if private is unexpectedly set.
>>           */
>> -       if (!folio_test_swapcache(page_folio(head))) {
>> -               VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
>> +       if (unlikely(page_tail->private)) {
>> +               VM_WARN_ON_ONCE_PAGE(true, page_tail);
>>                  page_tail->private = 0;
>>          }
> 
> Could probably save a couple of lines here:
> 
> if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail))
> 
>         page_tail->private = 0;
> 

That would mean that we eventually compile out the runtime check

#define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
Yosry Ahmed Aug. 23, 2023, 3:21 p.m. UTC | #7
On Wed, Aug 23, 2023 at 8:17 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 23.08.23 17:12, Yosry Ahmed wrote:
> > On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote:
> >>
> >> Let's stop using page->private on tail pages, making it possible to
> >> just unconditionally reuse that field in the tail pages of large folios.
> >>
> >> The remaining usage of the private field for THP_SWAP is in the THP
> >> splitting code (mm/huge_memory.c), that we'll handle separately later.
> >>
> >> Update the THP_SWAP documentation and sanity checks in mm_types.h and
> >> __split_huge_page_tail().
> >>
> >> Signed-off-by: David Hildenbrand <david@redhat.com>
> >
> > The mm part looks good to me (with the added fixup):
> >
> > Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
>
> Thanks!
>
> >>   /**
> >> diff --git a/include/linux/swap.h b/include/linux/swap.h
> >> index bb5adc604144..84fe0e94f5cd 100644
> >> --- a/include/linux/swap.h
> >> +++ b/include/linux/swap.h
> >> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
> >>          return entry;
> >>   }
> >>
> >> +static inline swp_entry_t page_swap_entry(struct page *page)
> >> +{
> >> +       struct folio *folio = page_folio(page);
> >> +       swp_entry_t entry = folio_swap_entry(folio);
> >> +
> >> +       entry.val += page - &folio->page;
> >> +       return entry;
> >> +}
> >> +
> >>   static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
> >>   {
> >>          folio->private = (void *)entry.val;
> >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> >> index cc2f65f8cc62..c04702ae71d2 100644
> >> --- a/mm/huge_memory.c
> >> +++ b/mm/huge_memory.c
> >> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
> >>          page_tail->index = head->index + tail;
> >>
> >>          /*
> >> -        * page->private should not be set in tail pages with the exception
> >> -        * of swap cache pages that store the swp_entry_t in tail pages.
> >> -        * Fix up and warn once if private is unexpectedly set.
> >> -        *
> >> -        * What of 32-bit systems, on which folio->_pincount overlays
> >> -        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
> >> -        * pincount must be 0 for folio_ref_freeze() to have succeeded.
> >> +        * page->private should not be set in tail pages. Fix up and warn once
> >> +        * if private is unexpectedly set.
> >>           */
> >> -       if (!folio_test_swapcache(page_folio(head))) {
> >> -               VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
> >> +       if (unlikely(page_tail->private)) {
> >> +               VM_WARN_ON_ONCE_PAGE(true, page_tail);
> >>                  page_tail->private = 0;
> >>          }
> >
> > Could probably save a couple of lines here:
> >
> > if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail))
> >
> >         page_tail->private = 0;
> >
>
> That would mean that we eventually compile out the runtime check
>
> #define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)

I thought the warning would be compiled out but not the check, my bad.

>
> --
> Cheers,
>
> David / dhildenb
>
David Hildenbrand Aug. 23, 2023, 3:26 p.m. UTC | #8
On 23.08.23 17:21, Yosry Ahmed wrote:
> On Wed, Aug 23, 2023 at 8:17 AM David Hildenbrand <david@redhat.com> wrote:
>>
>> On 23.08.23 17:12, Yosry Ahmed wrote:
>>> On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote:
>>>>
>>>> Let's stop using page->private on tail pages, making it possible to
>>>> just unconditionally reuse that field in the tail pages of large folios.
>>>>
>>>> The remaining usage of the private field for THP_SWAP is in the THP
>>>> splitting code (mm/huge_memory.c), that we'll handle separately later.
>>>>
>>>> Update the THP_SWAP documentation and sanity checks in mm_types.h and
>>>> __split_huge_page_tail().
>>>>
>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>
>>> The mm part looks good to me (with the added fixup):
>>>
>>> Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
>>
>> Thanks!
>>
>>>>    /**
>>>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>>>> index bb5adc604144..84fe0e94f5cd 100644
>>>> --- a/include/linux/swap.h
>>>> +++ b/include/linux/swap.h
>>>> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
>>>>           return entry;
>>>>    }
>>>>
>>>> +static inline swp_entry_t page_swap_entry(struct page *page)
>>>> +{
>>>> +       struct folio *folio = page_folio(page);
>>>> +       swp_entry_t entry = folio_swap_entry(folio);
>>>> +
>>>> +       entry.val += page - &folio->page;
>>>> +       return entry;
>>>> +}
>>>> +
>>>>    static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
>>>>    {
>>>>           folio->private = (void *)entry.val;
>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>> index cc2f65f8cc62..c04702ae71d2 100644
>>>> --- a/mm/huge_memory.c
>>>> +++ b/mm/huge_memory.c
>>>> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
>>>>           page_tail->index = head->index + tail;
>>>>
>>>>           /*
>>>> -        * page->private should not be set in tail pages with the exception
>>>> -        * of swap cache pages that store the swp_entry_t in tail pages.
>>>> -        * Fix up and warn once if private is unexpectedly set.
>>>> -        *
>>>> -        * What of 32-bit systems, on which folio->_pincount overlays
>>>> -        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
>>>> -        * pincount must be 0 for folio_ref_freeze() to have succeeded.
>>>> +        * page->private should not be set in tail pages. Fix up and warn once
>>>> +        * if private is unexpectedly set.
>>>>            */
>>>> -       if (!folio_test_swapcache(page_folio(head))) {
>>>> -               VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
>>>> +       if (unlikely(page_tail->private)) {
>>>> +               VM_WARN_ON_ONCE_PAGE(true, page_tail);
>>>>                   page_tail->private = 0;
>>>>           }
>>>
>>> Could probably save a couple of lines here:
>>>
>>> if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail))
>>>
>>>          page_tail->private = 0;
>>>
>>
>> That would mean that we eventually compile out the runtime check
>>
>> #define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
> 
> I thought the warning would be compiled out but not the check, my bad.

I even remembered that VM_WARN_ON_ONCE and friends could/should not be
used in conditionals.

But we do seem to have two users now:

  $ git grep "if (VM_WARN_ON"
mm/mmap.c:              if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm))
mm/mmap.c:              if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm))

But they only do warning-related action, to dump the stack, the vma, ...

So if the warnings get compiled out, also all the other stuff gets compiled out as well,
which makes sense here.
Yosry Ahmed Aug. 23, 2023, 3:31 p.m. UTC | #9
On Wed, Aug 23, 2023 at 8:26 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 23.08.23 17:21, Yosry Ahmed wrote:
> > On Wed, Aug 23, 2023 at 8:17 AM David Hildenbrand <david@redhat.com> wrote:
> >>
> >> On 23.08.23 17:12, Yosry Ahmed wrote:
> >>> On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote:
> >>>>
> >>>> Let's stop using page->private on tail pages, making it possible to
> >>>> just unconditionally reuse that field in the tail pages of large folios.
> >>>>
> >>>> The remaining usage of the private field for THP_SWAP is in the THP
> >>>> splitting code (mm/huge_memory.c), that we'll handle separately later.
> >>>>
> >>>> Update the THP_SWAP documentation and sanity checks in mm_types.h and
> >>>> __split_huge_page_tail().
> >>>>
> >>>> Signed-off-by: David Hildenbrand <david@redhat.com>
> >>>
> >>> The mm part looks good to me (with the added fixup):
> >>>
> >>> Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
> >>
> >> Thanks!
> >>
> >>>>    /**
> >>>> diff --git a/include/linux/swap.h b/include/linux/swap.h
> >>>> index bb5adc604144..84fe0e94f5cd 100644
> >>>> --- a/include/linux/swap.h
> >>>> +++ b/include/linux/swap.h
> >>>> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
> >>>>           return entry;
> >>>>    }
> >>>>
> >>>> +static inline swp_entry_t page_swap_entry(struct page *page)
> >>>> +{
> >>>> +       struct folio *folio = page_folio(page);
> >>>> +       swp_entry_t entry = folio_swap_entry(folio);
> >>>> +
> >>>> +       entry.val += page - &folio->page;
> >>>> +       return entry;
> >>>> +}
> >>>> +
> >>>>    static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
> >>>>    {
> >>>>           folio->private = (void *)entry.val;
> >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> >>>> index cc2f65f8cc62..c04702ae71d2 100644
> >>>> --- a/mm/huge_memory.c
> >>>> +++ b/mm/huge_memory.c
> >>>> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
> >>>>           page_tail->index = head->index + tail;
> >>>>
> >>>>           /*
> >>>> -        * page->private should not be set in tail pages with the exception
> >>>> -        * of swap cache pages that store the swp_entry_t in tail pages.
> >>>> -        * Fix up and warn once if private is unexpectedly set.
> >>>> -        *
> >>>> -        * What of 32-bit systems, on which folio->_pincount overlays
> >>>> -        * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
> >>>> -        * pincount must be 0 for folio_ref_freeze() to have succeeded.
> >>>> +        * page->private should not be set in tail pages. Fix up and warn once
> >>>> +        * if private is unexpectedly set.
> >>>>            */
> >>>> -       if (!folio_test_swapcache(page_folio(head))) {
> >>>> -               VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
> >>>> +       if (unlikely(page_tail->private)) {
> >>>> +               VM_WARN_ON_ONCE_PAGE(true, page_tail);
> >>>>                   page_tail->private = 0;
> >>>>           }
> >>>
> >>> Could probably save a couple of lines here:
> >>>
> >>> if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail))
> >>>
> >>>          page_tail->private = 0;
> >>>
> >>
> >> That would mean that we eventually compile out the runtime check
> >>
> >> #define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
> >
> > I thought the warning would be compiled out but not the check, my bad.
>
> I even remembered that VM_WARN_ON_ONCE and friends could/should not be
> used in conditionals.
>
> But we do seem to have two users now:
>
>   $ git grep "if (VM_WARN_ON"
> mm/mmap.c:              if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm))
> mm/mmap.c:              if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm))
>
> But they only do warning-related action, to dump the stack, the vma, ...
>
> So if the warnings get compiled out, also all the other stuff gets compiled out as well,
> which makes sense here.

Funny enough, I did the same grep and immediately thought that since
we have users of that, then it's okay (i.e the check wouldn't be
compiled out). I wasn't thorough enough to actually check what they
are doing :)

>
> --
> Cheers,
>
> David / dhildenb
>
diff mbox series

Patch

diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index cd508ba80ab1..a31833e3ddc5 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -33,8 +33,9 @@  int mte_save_tags(struct page *page)
 
 	mte_save_page_tags(page_address(page), tag_storage);
 
-	/* page_private contains the swap entry.val set in do_swap_page */
-	ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL);
+	/* lookup the swap entry.val from the page */
+	ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
+		       GFP_KERNEL);
 	if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
 		mte_free_tag_storage(tag_storage);
 		return xa_err(ret);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b9b6c88875b9..61361f1750c3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -333,11 +333,8 @@  struct folio {
 			atomic_t _pincount;
 #ifdef CONFIG_64BIT
 			unsigned int _folio_nr_pages;
-			/* 4 byte gap here */
-	/* private: the union with struct page is transitional */
-			/* Fix THP_SWAP to not use tail->private */
-			unsigned long _private_1;
 #endif
+	/* private: the union with struct page is transitional */
 		};
 		struct page __page_1;
 	};
@@ -358,9 +355,6 @@  struct folio {
 	/* public: */
 			struct list_head _deferred_list;
 	/* private: the union with struct page is transitional */
-			unsigned long _avail_2a;
-			/* Fix THP_SWAP to not use tail->private */
-			unsigned long _private_2a;
 		};
 		struct page __page_2;
 	};
@@ -385,9 +379,6 @@  FOLIO_MATCH(memcg_data, memcg_data);
 			offsetof(struct page, pg) + sizeof(struct page))
 FOLIO_MATCH(flags, _flags_1);
 FOLIO_MATCH(compound_head, _head_1);
-#ifdef CONFIG_64BIT
-FOLIO_MATCH(private, _private_1);
-#endif
 #undef FOLIO_MATCH
 #define FOLIO_MATCH(pg, fl)						\
 	static_assert(offsetof(struct folio, fl) ==			\
@@ -396,7 +387,6 @@  FOLIO_MATCH(flags, _flags_2);
 FOLIO_MATCH(compound_head, _head_2);
 FOLIO_MATCH(flags, _flags_2a);
 FOLIO_MATCH(compound_head, _head_2a);
-FOLIO_MATCH(private, _private_2a);
 #undef FOLIO_MATCH
 
 /**
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bb5adc604144..84fe0e94f5cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -339,6 +339,15 @@  static inline swp_entry_t folio_swap_entry(struct folio *folio)
 	return entry;
 }
 
+static inline swp_entry_t page_swap_entry(struct page *page)
+{
+	struct folio *folio = page_folio(page);
+	swp_entry_t entry = folio_swap_entry(folio);
+
+	entry.val += page - &folio->page;
+	return entry;
+}
+
 static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
 {
 	folio->private = (void *)entry.val;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cc2f65f8cc62..c04702ae71d2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2446,18 +2446,15 @@  static void __split_huge_page_tail(struct page *head, int tail,
 	page_tail->index = head->index + tail;
 
 	/*
-	 * page->private should not be set in tail pages with the exception
-	 * of swap cache pages that store the swp_entry_t in tail pages.
-	 * Fix up and warn once if private is unexpectedly set.
-	 *
-	 * What of 32-bit systems, on which folio->_pincount overlays
-	 * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
-	 * pincount must be 0 for folio_ref_freeze() to have succeeded.
+	 * page->private should not be set in tail pages. Fix up and warn once
+	 * if private is unexpectedly set.
 	 */
-	if (!folio_test_swapcache(page_folio(head))) {
-		VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
+	if (unlikely(page_tail->private)) {
+		VM_WARN_ON_ONCE_PAGE(true, page_tail);
 		page_tail->private = 0;
 	}
+	if (PageSwapCache(head))
+		set_page_private(page_tail, (unsigned long)head->private + tail);
 
 	/* Page flags must be visible before we make the page non-compound. */
 	smp_wmb();
diff --git a/mm/memory.c b/mm/memory.c
index d003076b218d..ff13242c1589 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3882,7 +3882,7 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 		 * changed.
 		 */
 		if (unlikely(!folio_test_swapcache(folio) ||
-			     page_private(page) != entry.val))
+			     page_swap_entry(page).val != entry.val))
 			goto out_page;
 
 		/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 1f04debdc87a..ec7f8e6c9e48 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1647,7 +1647,7 @@  static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			 */
 			dec_mm_counter(mm, mm_counter(&folio->page));
 		} else if (folio_test_anon(folio)) {
-			swp_entry_t entry = { .val = page_private(subpage) };
+			swp_entry_t entry = page_swap_entry(subpage);
 			pte_t swp_pte;
 			/*
 			 * Store the swap location in the pte.
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 01f15139b7d9..2f2417810052 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -100,6 +100,7 @@  int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
 
 	folio_ref_add(folio, nr);
 	folio_set_swapcache(folio);
+	folio_set_swap_entry(folio, entry);
 
 	do {
 		xas_lock_irq(&xas);
@@ -113,7 +114,6 @@  int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
 				if (shadowp)
 					*shadowp = old;
 			}
-			set_page_private(folio_page(folio, i), entry.val + i);
 			xas_store(&xas, folio);
 			xas_next(&xas);
 		}
@@ -154,9 +154,10 @@  void __delete_from_swap_cache(struct folio *folio,
 	for (i = 0; i < nr; i++) {
 		void *entry = xas_store(&xas, shadow);
 		VM_BUG_ON_PAGE(entry != folio, entry);
-		set_page_private(folio_page(folio, i), 0);
 		xas_next(&xas);
 	}
+	entry.val = 0;
+	folio_set_swap_entry(folio, entry);
 	folio_clear_swapcache(folio);
 	address_space->nrpages -= nr;
 	__node_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d46933adf789..bd9d904671b9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3369,7 +3369,7 @@  struct swap_info_struct *swp_swap_info(swp_entry_t entry)
 
 struct swap_info_struct *page_swap_info(struct page *page)
 {
-	swp_entry_t entry = { .val = page_private(page) };
+	swp_entry_t entry = page_swap_entry(page);
 	return swp_swap_info(entry);
 }
 
@@ -3384,7 +3384,7 @@  EXPORT_SYMBOL_GPL(swapcache_mapping);
 
 pgoff_t __page_file_index(struct page *page)
 {
-	swp_entry_t swap = { .val = page_private(page) };
+	swp_entry_t swap = page_swap_entry(page);
 	return swp_offset(swap);
 }
 EXPORT_SYMBOL_GPL(__page_file_index);