Message ID | 20230821160849.531668-2-david@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/swap: stop using page->private on tail pages for THP_SWAP + cleanups | expand |
On Mon, Aug 21, 2023 at 06:08:46PM +0200, David Hildenbrand wrote: > diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c > index cd508ba80ab1..a31833e3ddc5 100644 > --- a/arch/arm64/mm/mteswap.c > +++ b/arch/arm64/mm/mteswap.c > @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page) > > mte_save_page_tags(page_address(page), tag_storage); > > - /* page_private contains the swap entry.val set in do_swap_page */ > - ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL); > + /* lookup the swap entry.val from the page */ > + ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage, > + GFP_KERNEL); > if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { > mte_free_tag_storage(tag_storage); > return xa_err(ret); For arm64: Acked-by: Catalin Marinas <catalin.marinas@arm.com>
On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote: > > Let's stop using page->private on tail pages, making it possible to > just unconditionally reuse that field in the tail pages of large folios. > > The remaining usage of the private field for THP_SWAP is in the THP > splitting code (mm/huge_memory.c), that we'll handle separately later. > > Update the THP_SWAP documentation and sanity checks in mm_types.h and > __split_huge_page_tail(). > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > arch/arm64/mm/mteswap.c | 5 +++-- > include/linux/mm_types.h | 12 +----------- > include/linux/swap.h | 9 +++++++++ > mm/huge_memory.c | 15 ++++++--------- > mm/memory.c | 2 +- > mm/rmap.c | 2 +- > mm/swap_state.c | 5 +++-- > mm/swapfile.c | 4 ++-- > 8 files changed, 26 insertions(+), 28 deletions(-) > > diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c > index cd508ba80ab1..a31833e3ddc5 100644 > --- a/arch/arm64/mm/mteswap.c > +++ b/arch/arm64/mm/mteswap.c > @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page) > > mte_save_page_tags(page_address(page), tag_storage); > > - /* page_private contains the swap entry.val set in do_swap_page */ > - ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL); > + /* lookup the swap entry.val from the page */ > + ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage, > + GFP_KERNEL); > if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { > mte_free_tag_storage(tag_storage); > return xa_err(ret); > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index b9b6c88875b9..61361f1750c3 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -333,11 +333,8 @@ struct folio { > atomic_t _pincount; > #ifdef CONFIG_64BIT > unsigned int _folio_nr_pages; > - /* 4 byte gap here */ > - /* private: the union with struct page is transitional */ > - /* Fix THP_SWAP to not use tail->private */ > - unsigned long _private_1; > #endif > + /* private: the union with struct page is transitional */ > }; > struct page __page_1; > }; > @@ -358,9 +355,6 @@ struct folio { > /* public: */ > struct list_head _deferred_list; > /* private: the union with struct page is transitional */ > - unsigned long _avail_2a; > - /* Fix THP_SWAP to not use tail->private */ > - unsigned long _private_2a; > }; > struct page __page_2; > }; > @@ -385,9 +379,6 @@ FOLIO_MATCH(memcg_data, memcg_data); > offsetof(struct page, pg) + sizeof(struct page)) > FOLIO_MATCH(flags, _flags_1); > FOLIO_MATCH(compound_head, _head_1); > -#ifdef CONFIG_64BIT > -FOLIO_MATCH(private, _private_1); > -#endif > #undef FOLIO_MATCH > #define FOLIO_MATCH(pg, fl) \ > static_assert(offsetof(struct folio, fl) == \ > @@ -396,7 +387,6 @@ FOLIO_MATCH(flags, _flags_2); > FOLIO_MATCH(compound_head, _head_2); > FOLIO_MATCH(flags, _flags_2a); > FOLIO_MATCH(compound_head, _head_2a); > -FOLIO_MATCH(private, _private_2a); > #undef FOLIO_MATCH > > /** > diff --git a/include/linux/swap.h b/include/linux/swap.h > index bb5adc604144..84fe0e94f5cd 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) > return entry; > } > > +static inline swp_entry_t page_swap_entry(struct page *page) > +{ > + struct folio *folio = page_folio(page); > + swp_entry_t entry = folio_swap_entry(folio); > + > + entry.val += page - &folio->page; Would it be better to use folio_page_idx() here? > + return entry; > +} > + > static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) > { > folio->private = (void *)entry.val; > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index cc2f65f8cc62..c04702ae71d2 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, > page_tail->index = head->index + tail; > > /* > - * page->private should not be set in tail pages with the exception > - * of swap cache pages that store the swp_entry_t in tail pages. > - * Fix up and warn once if private is unexpectedly set. > - * > - * What of 32-bit systems, on which folio->_pincount overlays > - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and > - * pincount must be 0 for folio_ref_freeze() to have succeeded. > + * page->private should not be set in tail pages. Fix up and warn once > + * if private is unexpectedly set. > */ > - if (!folio_test_swapcache(page_folio(head))) { > - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); > + if (unlikely(page_tail->private)) { > + VM_WARN_ON_ONCE_PAGE(true, page_tail); > page_tail->private = 0; > } > + if (PageSwapCache(head)) > + set_page_private(page_tail, (unsigned long)head->private + tail); > > /* Page flags must be visible before we make the page non-compound. */ > smp_wmb(); > diff --git a/mm/memory.c b/mm/memory.c > index d003076b218d..ff13242c1589 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3882,7 +3882,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > * changed. > */ > if (unlikely(!folio_test_swapcache(folio) || > - page_private(page) != entry.val)) > + page_swap_entry(page).val != entry.val)) > goto out_page; > > /* > diff --git a/mm/rmap.c b/mm/rmap.c > index 1f04debdc87a..ec7f8e6c9e48 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1647,7 +1647,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > */ > dec_mm_counter(mm, mm_counter(&folio->page)); > } else if (folio_test_anon(folio)) { > - swp_entry_t entry = { .val = page_private(subpage) }; > + swp_entry_t entry = page_swap_entry(subpage); > pte_t swp_pte; > /* > * Store the swap location in the pte. > diff --git a/mm/swap_state.c b/mm/swap_state.c > index 01f15139b7d9..2f2417810052 100644 > --- a/mm/swap_state.c > +++ b/mm/swap_state.c > @@ -100,6 +100,7 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, > > folio_ref_add(folio, nr); > folio_set_swapcache(folio); > + folio_set_swap_entry(folio, entry); > > do { > xas_lock_irq(&xas); > @@ -113,7 +114,6 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, > if (shadowp) > *shadowp = old; > } > - set_page_private(folio_page(folio, i), entry.val + i); > xas_store(&xas, folio); > xas_next(&xas); > } > @@ -154,9 +154,10 @@ void __delete_from_swap_cache(struct folio *folio, > for (i = 0; i < nr; i++) { > void *entry = xas_store(&xas, shadow); > VM_BUG_ON_PAGE(entry != folio, entry); > - set_page_private(folio_page(folio, i), 0); > xas_next(&xas); > } > + entry.val = 0; > + folio_set_swap_entry(folio, entry); > folio_clear_swapcache(folio); > address_space->nrpages -= nr; > __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr); > diff --git a/mm/swapfile.c b/mm/swapfile.c > index d46933adf789..bd9d904671b9 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -3369,7 +3369,7 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry) > > struct swap_info_struct *page_swap_info(struct page *page) > { > - swp_entry_t entry = { .val = page_private(page) }; > + swp_entry_t entry = page_swap_entry(page); > return swp_swap_info(entry); > } > > @@ -3384,7 +3384,7 @@ EXPORT_SYMBOL_GPL(swapcache_mapping); > > pgoff_t __page_file_index(struct page *page) > { > - swp_entry_t swap = { .val = page_private(page) }; > + swp_entry_t swap = page_swap_entry(page); > return swp_offset(swap); > } > EXPORT_SYMBOL_GPL(__page_file_index); > -- > 2.41.0 > >
>> >> +static inline swp_entry_t page_swap_entry(struct page *page) >> +{ >> + struct folio *folio = page_folio(page); >> + swp_entry_t entry = folio_swap_entry(folio); >> + >> + entry.val += page - &folio->page; > > Would it be better to use folio_page_idx() here? Sounds reasonable!
On 21.08.23 18:08, David Hildenbrand wrote: > Let's stop using page->private on tail pages, making it possible to > just unconditionally reuse that field in the tail pages of large folios. > > The remaining usage of the private field for THP_SWAP is in the THP > splitting code (mm/huge_memory.c), that we'll handle separately later. > > Update the THP_SWAP documentation and sanity checks in mm_types.h and > __split_huge_page_tail(). > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- @Andrew, the following change on top From 2b1fd10ef07c6d47aa9cd0ce10445ab1e1b97361 Mon Sep 17 00:00:00 2001 From: David Hildenbrand <david@redhat.com> Date: Tue, 22 Aug 2023 19:16:55 +0200 Subject: [PATCH] fixup: mm/swap: stop using page->private on tail pages for THP_SWAP Per Yoshry, use folio_page_idx(). It shouldn't make a difference for our (THP) use case, but it's certainly cleaner. Signed-off-by: David Hildenbrand <david@redhat.com> --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 84fe0e94f5cd..e5cf58a1cf9e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -344,7 +344,7 @@ static inline swp_entry_t page_swap_entry(struct page *page) struct folio *folio = page_folio(page); swp_entry_t entry = folio_swap_entry(folio); - entry.val += page - &folio->page; + entry.val += folio_page_idx(folio, page); return entry; }
On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote: > > Let's stop using page->private on tail pages, making it possible to > just unconditionally reuse that field in the tail pages of large folios. > > The remaining usage of the private field for THP_SWAP is in the THP > splitting code (mm/huge_memory.c), that we'll handle separately later. > > Update the THP_SWAP documentation and sanity checks in mm_types.h and > __split_huge_page_tail(). > > Signed-off-by: David Hildenbrand <david@redhat.com> The mm part looks good to me (with the added fixup): Reviewed-by: Yosry Ahmed <yosryahmed@google.com> Minor nit below, not worth a respin, but perhaps if you respin anyway for something else. > --- > arch/arm64/mm/mteswap.c | 5 +++-- > include/linux/mm_types.h | 12 +----------- > include/linux/swap.h | 9 +++++++++ > mm/huge_memory.c | 15 ++++++--------- > mm/memory.c | 2 +- > mm/rmap.c | 2 +- > mm/swap_state.c | 5 +++-- > mm/swapfile.c | 4 ++-- > 8 files changed, 26 insertions(+), 28 deletions(-) > > diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c > index cd508ba80ab1..a31833e3ddc5 100644 > --- a/arch/arm64/mm/mteswap.c > +++ b/arch/arm64/mm/mteswap.c > @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page) > > mte_save_page_tags(page_address(page), tag_storage); > > - /* page_private contains the swap entry.val set in do_swap_page */ > - ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL); > + /* lookup the swap entry.val from the page */ > + ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage, > + GFP_KERNEL); > if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { > mte_free_tag_storage(tag_storage); > return xa_err(ret); > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index b9b6c88875b9..61361f1750c3 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -333,11 +333,8 @@ struct folio { > atomic_t _pincount; > #ifdef CONFIG_64BIT > unsigned int _folio_nr_pages; > - /* 4 byte gap here */ > - /* private: the union with struct page is transitional */ > - /* Fix THP_SWAP to not use tail->private */ > - unsigned long _private_1; > #endif > + /* private: the union with struct page is transitional */ > }; > struct page __page_1; > }; > @@ -358,9 +355,6 @@ struct folio { > /* public: */ > struct list_head _deferred_list; > /* private: the union with struct page is transitional */ > - unsigned long _avail_2a; > - /* Fix THP_SWAP to not use tail->private */ > - unsigned long _private_2a; > }; > struct page __page_2; > }; > @@ -385,9 +379,6 @@ FOLIO_MATCH(memcg_data, memcg_data); > offsetof(struct page, pg) + sizeof(struct page)) > FOLIO_MATCH(flags, _flags_1); > FOLIO_MATCH(compound_head, _head_1); > -#ifdef CONFIG_64BIT > -FOLIO_MATCH(private, _private_1); > -#endif > #undef FOLIO_MATCH > #define FOLIO_MATCH(pg, fl) \ > static_assert(offsetof(struct folio, fl) == \ > @@ -396,7 +387,6 @@ FOLIO_MATCH(flags, _flags_2); > FOLIO_MATCH(compound_head, _head_2); > FOLIO_MATCH(flags, _flags_2a); > FOLIO_MATCH(compound_head, _head_2a); > -FOLIO_MATCH(private, _private_2a); > #undef FOLIO_MATCH > > /** > diff --git a/include/linux/swap.h b/include/linux/swap.h > index bb5adc604144..84fe0e94f5cd 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) > return entry; > } > > +static inline swp_entry_t page_swap_entry(struct page *page) > +{ > + struct folio *folio = page_folio(page); > + swp_entry_t entry = folio_swap_entry(folio); > + > + entry.val += page - &folio->page; > + return entry; > +} > + > static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) > { > folio->private = (void *)entry.val; > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index cc2f65f8cc62..c04702ae71d2 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, > page_tail->index = head->index + tail; > > /* > - * page->private should not be set in tail pages with the exception > - * of swap cache pages that store the swp_entry_t in tail pages. > - * Fix up and warn once if private is unexpectedly set. > - * > - * What of 32-bit systems, on which folio->_pincount overlays > - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and > - * pincount must be 0 for folio_ref_freeze() to have succeeded. > + * page->private should not be set in tail pages. Fix up and warn once > + * if private is unexpectedly set. > */ > - if (!folio_test_swapcache(page_folio(head))) { > - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); > + if (unlikely(page_tail->private)) { > + VM_WARN_ON_ONCE_PAGE(true, page_tail); > page_tail->private = 0; > } Could probably save a couple of lines here: if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail)) page_tail->private = 0; > + if (PageSwapCache(head)) > + set_page_private(page_tail, (unsigned long)head->private + tail); > > /* Page flags must be visible before we make the page non-compound. */ > smp_wmb(); > diff --git a/mm/memory.c b/mm/memory.c > index d003076b218d..ff13242c1589 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3882,7 +3882,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > * changed. > */ > if (unlikely(!folio_test_swapcache(folio) || > - page_private(page) != entry.val)) > + page_swap_entry(page).val != entry.val)) > goto out_page; > > /* > diff --git a/mm/rmap.c b/mm/rmap.c > index 1f04debdc87a..ec7f8e6c9e48 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1647,7 +1647,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > */ > dec_mm_counter(mm, mm_counter(&folio->page)); > } else if (folio_test_anon(folio)) { > - swp_entry_t entry = { .val = page_private(subpage) }; > + swp_entry_t entry = page_swap_entry(subpage); > pte_t swp_pte; > /* > * Store the swap location in the pte. > diff --git a/mm/swap_state.c b/mm/swap_state.c > index 01f15139b7d9..2f2417810052 100644 > --- a/mm/swap_state.c > +++ b/mm/swap_state.c > @@ -100,6 +100,7 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, > > folio_ref_add(folio, nr); > folio_set_swapcache(folio); > + folio_set_swap_entry(folio, entry); > > do { > xas_lock_irq(&xas); > @@ -113,7 +114,6 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, > if (shadowp) > *shadowp = old; > } > - set_page_private(folio_page(folio, i), entry.val + i); > xas_store(&xas, folio); > xas_next(&xas); > } > @@ -154,9 +154,10 @@ void __delete_from_swap_cache(struct folio *folio, > for (i = 0; i < nr; i++) { > void *entry = xas_store(&xas, shadow); > VM_BUG_ON_PAGE(entry != folio, entry); > - set_page_private(folio_page(folio, i), 0); > xas_next(&xas); > } > + entry.val = 0; > + folio_set_swap_entry(folio, entry); > folio_clear_swapcache(folio); > address_space->nrpages -= nr; > __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr); > diff --git a/mm/swapfile.c b/mm/swapfile.c > index d46933adf789..bd9d904671b9 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -3369,7 +3369,7 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry) > > struct swap_info_struct *page_swap_info(struct page *page) > { > - swp_entry_t entry = { .val = page_private(page) }; > + swp_entry_t entry = page_swap_entry(page); > return swp_swap_info(entry); > } > > @@ -3384,7 +3384,7 @@ EXPORT_SYMBOL_GPL(swapcache_mapping); > > pgoff_t __page_file_index(struct page *page) > { > - swp_entry_t swap = { .val = page_private(page) }; > + swp_entry_t swap = page_swap_entry(page); > return swp_offset(swap); > } > EXPORT_SYMBOL_GPL(__page_file_index); > -- > 2.41.0 > >
On 23.08.23 17:12, Yosry Ahmed wrote: > On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote: >> >> Let's stop using page->private on tail pages, making it possible to >> just unconditionally reuse that field in the tail pages of large folios. >> >> The remaining usage of the private field for THP_SWAP is in the THP >> splitting code (mm/huge_memory.c), that we'll handle separately later. >> >> Update the THP_SWAP documentation and sanity checks in mm_types.h and >> __split_huge_page_tail(). >> >> Signed-off-by: David Hildenbrand <david@redhat.com> > > The mm part looks good to me (with the added fixup): > > Reviewed-by: Yosry Ahmed <yosryahmed@google.com> Thanks! >> /** >> diff --git a/include/linux/swap.h b/include/linux/swap.h >> index bb5adc604144..84fe0e94f5cd 100644 >> --- a/include/linux/swap.h >> +++ b/include/linux/swap.h >> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) >> return entry; >> } >> >> +static inline swp_entry_t page_swap_entry(struct page *page) >> +{ >> + struct folio *folio = page_folio(page); >> + swp_entry_t entry = folio_swap_entry(folio); >> + >> + entry.val += page - &folio->page; >> + return entry; >> +} >> + >> static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) >> { >> folio->private = (void *)entry.val; >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index cc2f65f8cc62..c04702ae71d2 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, >> page_tail->index = head->index + tail; >> >> /* >> - * page->private should not be set in tail pages with the exception >> - * of swap cache pages that store the swp_entry_t in tail pages. >> - * Fix up and warn once if private is unexpectedly set. >> - * >> - * What of 32-bit systems, on which folio->_pincount overlays >> - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and >> - * pincount must be 0 for folio_ref_freeze() to have succeeded. >> + * page->private should not be set in tail pages. Fix up and warn once >> + * if private is unexpectedly set. >> */ >> - if (!folio_test_swapcache(page_folio(head))) { >> - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); >> + if (unlikely(page_tail->private)) { >> + VM_WARN_ON_ONCE_PAGE(true, page_tail); >> page_tail->private = 0; >> } > > Could probably save a couple of lines here: > > if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail)) > > page_tail->private = 0; > That would mean that we eventually compile out the runtime check #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
On Wed, Aug 23, 2023 at 8:17 AM David Hildenbrand <david@redhat.com> wrote: > > On 23.08.23 17:12, Yosry Ahmed wrote: > > On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote: > >> > >> Let's stop using page->private on tail pages, making it possible to > >> just unconditionally reuse that field in the tail pages of large folios. > >> > >> The remaining usage of the private field for THP_SWAP is in the THP > >> splitting code (mm/huge_memory.c), that we'll handle separately later. > >> > >> Update the THP_SWAP documentation and sanity checks in mm_types.h and > >> __split_huge_page_tail(). > >> > >> Signed-off-by: David Hildenbrand <david@redhat.com> > > > > The mm part looks good to me (with the added fixup): > > > > Reviewed-by: Yosry Ahmed <yosryahmed@google.com> > > Thanks! > > >> /** > >> diff --git a/include/linux/swap.h b/include/linux/swap.h > >> index bb5adc604144..84fe0e94f5cd 100644 > >> --- a/include/linux/swap.h > >> +++ b/include/linux/swap.h > >> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) > >> return entry; > >> } > >> > >> +static inline swp_entry_t page_swap_entry(struct page *page) > >> +{ > >> + struct folio *folio = page_folio(page); > >> + swp_entry_t entry = folio_swap_entry(folio); > >> + > >> + entry.val += page - &folio->page; > >> + return entry; > >> +} > >> + > >> static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) > >> { > >> folio->private = (void *)entry.val; > >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c > >> index cc2f65f8cc62..c04702ae71d2 100644 > >> --- a/mm/huge_memory.c > >> +++ b/mm/huge_memory.c > >> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, > >> page_tail->index = head->index + tail; > >> > >> /* > >> - * page->private should not be set in tail pages with the exception > >> - * of swap cache pages that store the swp_entry_t in tail pages. > >> - * Fix up and warn once if private is unexpectedly set. > >> - * > >> - * What of 32-bit systems, on which folio->_pincount overlays > >> - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and > >> - * pincount must be 0 for folio_ref_freeze() to have succeeded. > >> + * page->private should not be set in tail pages. Fix up and warn once > >> + * if private is unexpectedly set. > >> */ > >> - if (!folio_test_swapcache(page_folio(head))) { > >> - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); > >> + if (unlikely(page_tail->private)) { > >> + VM_WARN_ON_ONCE_PAGE(true, page_tail); > >> page_tail->private = 0; > >> } > > > > Could probably save a couple of lines here: > > > > if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail)) > > > > page_tail->private = 0; > > > > That would mean that we eventually compile out the runtime check > > #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) I thought the warning would be compiled out but not the check, my bad. > > -- > Cheers, > > David / dhildenb >
On 23.08.23 17:21, Yosry Ahmed wrote: > On Wed, Aug 23, 2023 at 8:17 AM David Hildenbrand <david@redhat.com> wrote: >> >> On 23.08.23 17:12, Yosry Ahmed wrote: >>> On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote: >>>> >>>> Let's stop using page->private on tail pages, making it possible to >>>> just unconditionally reuse that field in the tail pages of large folios. >>>> >>>> The remaining usage of the private field for THP_SWAP is in the THP >>>> splitting code (mm/huge_memory.c), that we'll handle separately later. >>>> >>>> Update the THP_SWAP documentation and sanity checks in mm_types.h and >>>> __split_huge_page_tail(). >>>> >>>> Signed-off-by: David Hildenbrand <david@redhat.com> >>> >>> The mm part looks good to me (with the added fixup): >>> >>> Reviewed-by: Yosry Ahmed <yosryahmed@google.com> >> >> Thanks! >> >>>> /** >>>> diff --git a/include/linux/swap.h b/include/linux/swap.h >>>> index bb5adc604144..84fe0e94f5cd 100644 >>>> --- a/include/linux/swap.h >>>> +++ b/include/linux/swap.h >>>> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) >>>> return entry; >>>> } >>>> >>>> +static inline swp_entry_t page_swap_entry(struct page *page) >>>> +{ >>>> + struct folio *folio = page_folio(page); >>>> + swp_entry_t entry = folio_swap_entry(folio); >>>> + >>>> + entry.val += page - &folio->page; >>>> + return entry; >>>> +} >>>> + >>>> static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) >>>> { >>>> folio->private = (void *)entry.val; >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >>>> index cc2f65f8cc62..c04702ae71d2 100644 >>>> --- a/mm/huge_memory.c >>>> +++ b/mm/huge_memory.c >>>> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, >>>> page_tail->index = head->index + tail; >>>> >>>> /* >>>> - * page->private should not be set in tail pages with the exception >>>> - * of swap cache pages that store the swp_entry_t in tail pages. >>>> - * Fix up and warn once if private is unexpectedly set. >>>> - * >>>> - * What of 32-bit systems, on which folio->_pincount overlays >>>> - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and >>>> - * pincount must be 0 for folio_ref_freeze() to have succeeded. >>>> + * page->private should not be set in tail pages. Fix up and warn once >>>> + * if private is unexpectedly set. >>>> */ >>>> - if (!folio_test_swapcache(page_folio(head))) { >>>> - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); >>>> + if (unlikely(page_tail->private)) { >>>> + VM_WARN_ON_ONCE_PAGE(true, page_tail); >>>> page_tail->private = 0; >>>> } >>> >>> Could probably save a couple of lines here: >>> >>> if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail)) >>> >>> page_tail->private = 0; >>> >> >> That would mean that we eventually compile out the runtime check >> >> #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) > > I thought the warning would be compiled out but not the check, my bad. I even remembered that VM_WARN_ON_ONCE and friends could/should not be used in conditionals. But we do seem to have two users now: $ git grep "if (VM_WARN_ON" mm/mmap.c: if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm)) mm/mmap.c: if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm)) But they only do warning-related action, to dump the stack, the vma, ... So if the warnings get compiled out, also all the other stuff gets compiled out as well, which makes sense here.
On Wed, Aug 23, 2023 at 8:26 AM David Hildenbrand <david@redhat.com> wrote: > > On 23.08.23 17:21, Yosry Ahmed wrote: > > On Wed, Aug 23, 2023 at 8:17 AM David Hildenbrand <david@redhat.com> wrote: > >> > >> On 23.08.23 17:12, Yosry Ahmed wrote: > >>> On Mon, Aug 21, 2023 at 9:09 AM David Hildenbrand <david@redhat.com> wrote: > >>>> > >>>> Let's stop using page->private on tail pages, making it possible to > >>>> just unconditionally reuse that field in the tail pages of large folios. > >>>> > >>>> The remaining usage of the private field for THP_SWAP is in the THP > >>>> splitting code (mm/huge_memory.c), that we'll handle separately later. > >>>> > >>>> Update the THP_SWAP documentation and sanity checks in mm_types.h and > >>>> __split_huge_page_tail(). > >>>> > >>>> Signed-off-by: David Hildenbrand <david@redhat.com> > >>> > >>> The mm part looks good to me (with the added fixup): > >>> > >>> Reviewed-by: Yosry Ahmed <yosryahmed@google.com> > >> > >> Thanks! > >> > >>>> /** > >>>> diff --git a/include/linux/swap.h b/include/linux/swap.h > >>>> index bb5adc604144..84fe0e94f5cd 100644 > >>>> --- a/include/linux/swap.h > >>>> +++ b/include/linux/swap.h > >>>> @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) > >>>> return entry; > >>>> } > >>>> > >>>> +static inline swp_entry_t page_swap_entry(struct page *page) > >>>> +{ > >>>> + struct folio *folio = page_folio(page); > >>>> + swp_entry_t entry = folio_swap_entry(folio); > >>>> + > >>>> + entry.val += page - &folio->page; > >>>> + return entry; > >>>> +} > >>>> + > >>>> static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) > >>>> { > >>>> folio->private = (void *)entry.val; > >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c > >>>> index cc2f65f8cc62..c04702ae71d2 100644 > >>>> --- a/mm/huge_memory.c > >>>> +++ b/mm/huge_memory.c > >>>> @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, > >>>> page_tail->index = head->index + tail; > >>>> > >>>> /* > >>>> - * page->private should not be set in tail pages with the exception > >>>> - * of swap cache pages that store the swp_entry_t in tail pages. > >>>> - * Fix up and warn once if private is unexpectedly set. > >>>> - * > >>>> - * What of 32-bit systems, on which folio->_pincount overlays > >>>> - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and > >>>> - * pincount must be 0 for folio_ref_freeze() to have succeeded. > >>>> + * page->private should not be set in tail pages. Fix up and warn once > >>>> + * if private is unexpectedly set. > >>>> */ > >>>> - if (!folio_test_swapcache(page_folio(head))) { > >>>> - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); > >>>> + if (unlikely(page_tail->private)) { > >>>> + VM_WARN_ON_ONCE_PAGE(true, page_tail); > >>>> page_tail->private = 0; > >>>> } > >>> > >>> Could probably save a couple of lines here: > >>> > >>> if (VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail)) > >>> > >>> page_tail->private = 0; > >>> > >> > >> That would mean that we eventually compile out the runtime check > >> > >> #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) > > > > I thought the warning would be compiled out but not the check, my bad. > > I even remembered that VM_WARN_ON_ONCE and friends could/should not be > used in conditionals. > > But we do seem to have two users now: > > $ git grep "if (VM_WARN_ON" > mm/mmap.c: if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm)) > mm/mmap.c: if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm)) > > But they only do warning-related action, to dump the stack, the vma, ... > > So if the warnings get compiled out, also all the other stuff gets compiled out as well, > which makes sense here. Funny enough, I did the same grep and immediately thought that since we have users of that, then it's okay (i.e the check wouldn't be compiled out). I wasn't thorough enough to actually check what they are doing :) > > -- > Cheers, > > David / dhildenb >
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index cd508ba80ab1..a31833e3ddc5 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -33,8 +33,9 @@ int mte_save_tags(struct page *page) mte_save_page_tags(page_address(page), tag_storage); - /* page_private contains the swap entry.val set in do_swap_page */ - ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL); + /* lookup the swap entry.val from the page */ + ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage, + GFP_KERNEL); if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { mte_free_tag_storage(tag_storage); return xa_err(ret); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b9b6c88875b9..61361f1750c3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -333,11 +333,8 @@ struct folio { atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; - /* 4 byte gap here */ - /* private: the union with struct page is transitional */ - /* Fix THP_SWAP to not use tail->private */ - unsigned long _private_1; #endif + /* private: the union with struct page is transitional */ }; struct page __page_1; }; @@ -358,9 +355,6 @@ struct folio { /* public: */ struct list_head _deferred_list; /* private: the union with struct page is transitional */ - unsigned long _avail_2a; - /* Fix THP_SWAP to not use tail->private */ - unsigned long _private_2a; }; struct page __page_2; }; @@ -385,9 +379,6 @@ FOLIO_MATCH(memcg_data, memcg_data); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); -#ifdef CONFIG_64BIT -FOLIO_MATCH(private, _private_1); -#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ @@ -396,7 +387,6 @@ FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); FOLIO_MATCH(flags, _flags_2a); FOLIO_MATCH(compound_head, _head_2a); -FOLIO_MATCH(private, _private_2a); #undef FOLIO_MATCH /** diff --git a/include/linux/swap.h b/include/linux/swap.h index bb5adc604144..84fe0e94f5cd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -339,6 +339,15 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) return entry; } +static inline swp_entry_t page_swap_entry(struct page *page) +{ + struct folio *folio = page_folio(page); + swp_entry_t entry = folio_swap_entry(folio); + + entry.val += page - &folio->page; + return entry; +} + static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) { folio->private = (void *)entry.val; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cc2f65f8cc62..c04702ae71d2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2446,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail, page_tail->index = head->index + tail; /* - * page->private should not be set in tail pages with the exception - * of swap cache pages that store the swp_entry_t in tail pages. - * Fix up and warn once if private is unexpectedly set. - * - * What of 32-bit systems, on which folio->_pincount overlays - * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and - * pincount must be 0 for folio_ref_freeze() to have succeeded. + * page->private should not be set in tail pages. Fix up and warn once + * if private is unexpectedly set. */ - if (!folio_test_swapcache(page_folio(head))) { - VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); + if (unlikely(page_tail->private)) { + VM_WARN_ON_ONCE_PAGE(true, page_tail); page_tail->private = 0; } + if (PageSwapCache(head)) + set_page_private(page_tail, (unsigned long)head->private + tail); /* Page flags must be visible before we make the page non-compound. */ smp_wmb(); diff --git a/mm/memory.c b/mm/memory.c index d003076b218d..ff13242c1589 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3882,7 +3882,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) * changed. */ if (unlikely(!folio_test_swapcache(folio) || - page_private(page) != entry.val)) + page_swap_entry(page).val != entry.val)) goto out_page; /* diff --git a/mm/rmap.c b/mm/rmap.c index 1f04debdc87a..ec7f8e6c9e48 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1647,7 +1647,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ dec_mm_counter(mm, mm_counter(&folio->page)); } else if (folio_test_anon(folio)) { - swp_entry_t entry = { .val = page_private(subpage) }; + swp_entry_t entry = page_swap_entry(subpage); pte_t swp_pte; /* * Store the swap location in the pte. diff --git a/mm/swap_state.c b/mm/swap_state.c index 01f15139b7d9..2f2417810052 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -100,6 +100,7 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, folio_ref_add(folio, nr); folio_set_swapcache(folio); + folio_set_swap_entry(folio, entry); do { xas_lock_irq(&xas); @@ -113,7 +114,6 @@ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, if (shadowp) *shadowp = old; } - set_page_private(folio_page(folio, i), entry.val + i); xas_store(&xas, folio); xas_next(&xas); } @@ -154,9 +154,10 @@ void __delete_from_swap_cache(struct folio *folio, for (i = 0; i < nr; i++) { void *entry = xas_store(&xas, shadow); VM_BUG_ON_PAGE(entry != folio, entry); - set_page_private(folio_page(folio, i), 0); xas_next(&xas); } + entry.val = 0; + folio_set_swap_entry(folio, entry); folio_clear_swapcache(folio); address_space->nrpages -= nr; __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr); diff --git a/mm/swapfile.c b/mm/swapfile.c index d46933adf789..bd9d904671b9 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3369,7 +3369,7 @@ struct swap_info_struct *swp_swap_info(swp_entry_t entry) struct swap_info_struct *page_swap_info(struct page *page) { - swp_entry_t entry = { .val = page_private(page) }; + swp_entry_t entry = page_swap_entry(page); return swp_swap_info(entry); } @@ -3384,7 +3384,7 @@ EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __page_file_index(struct page *page) { - swp_entry_t swap = { .val = page_private(page) }; + swp_entry_t swap = page_swap_entry(page); return swp_offset(swap); } EXPORT_SYMBOL_GPL(__page_file_index);
Let's stop using page->private on tail pages, making it possible to just unconditionally reuse that field in the tail pages of large folios. The remaining usage of the private field for THP_SWAP is in the THP splitting code (mm/huge_memory.c), that we'll handle separately later. Update the THP_SWAP documentation and sanity checks in mm_types.h and __split_huge_page_tail(). Signed-off-by: David Hildenbrand <david@redhat.com> --- arch/arm64/mm/mteswap.c | 5 +++-- include/linux/mm_types.h | 12 +----------- include/linux/swap.h | 9 +++++++++ mm/huge_memory.c | 15 ++++++--------- mm/memory.c | 2 +- mm/rmap.c | 2 +- mm/swap_state.c | 5 +++-- mm/swapfile.c | 4 ++-- 8 files changed, 26 insertions(+), 28 deletions(-)