diff mbox series

[5/9] mm: Turn folio_test_hugetlb into a PageType

Message ID 20240321142448.1645400-6-willy@infradead.org (mailing list archive)
State New
Headers show
Series Various significant MM patches | expand

Commit Message

Matthew Wilcox March 21, 2024, 2:24 p.m. UTC
The current folio_test_hugetlb() can be fooled by a concurrent folio split
into returning true for a folio which has never belonged to hugetlbfs.
This can't happen if the caller holds a refcount on it, but we have a
few places (memory-failure, compaction, procfs) which do not and should
not take a speculative reference.

Since hugetlb pages do not use individual page mapcounts (they are always
fully mapped and use the entire_mapcount field to record the number
of mappings), the PageType field is available now that page_mapcount()
ignores the value in this field.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
---
 include/linux/page-flags.h     | 70 ++++++++++++++++------------------
 include/trace/events/mmflags.h |  1 +
 mm/hugetlb.c                   | 22 ++---------
 3 files changed, 37 insertions(+), 56 deletions(-)

Comments

Vlastimil Babka March 22, 2024, 10:19 a.m. UTC | #1
On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote:
> The current folio_test_hugetlb() can be fooled by a concurrent folio split
> into returning true for a folio which has never belonged to hugetlbfs.
> This can't happen if the caller holds a refcount on it, but we have a
> few places (memory-failure, compaction, procfs) which do not and should
> not take a speculative reference.

Should we add metadata wrt closing the bug report from Luis?

https://lore.kernel.org/all/8fa1c95c-4749-33dd-42ba-243e492ab109@suse.cz/

I assume this wouldn't be fun wrt stable...

> Since hugetlb pages do not use individual page mapcounts (they are always
> fully mapped and use the entire_mapcount field to record the number

Wasn't there some discussions to allow partial mappings of hugetlb? What
would be the implications?

> of mappings), the PageType field is available now that page_mapcount()
> ignores the value in this field.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Reviewed-by: David Hildenbrand <david@redhat.com>

Other than that,
Acked-by: Vlastimil Babka <vbabka@suse.cz>

> ---
>  include/linux/page-flags.h     | 70 ++++++++++++++++------------------
>  include/trace/events/mmflags.h |  1 +
>  mm/hugetlb.c                   | 22 ++---------
>  3 files changed, 37 insertions(+), 56 deletions(-)
> 
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 5852f967c640..6fb3cd42ee59 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -190,7 +190,6 @@ enum pageflags {
>  
>  	/* At least one page in this folio has the hwpoison flag set */
>  	PG_has_hwpoisoned = PG_error,
> -	PG_hugetlb = PG_active,
>  	PG_large_rmappable = PG_workingset, /* anon or file-backed */
>  };
>  
> @@ -876,29 +875,6 @@ FOLIO_FLAG_FALSE(large_rmappable)
>  
>  #define PG_head_mask ((1UL << PG_head))
>  
> -#ifdef CONFIG_HUGETLB_PAGE
> -int PageHuge(const struct page *page);
> -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
> -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
> -
> -/**
> - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
> - * @folio: The folio to test.
> - *
> - * Context: Any context.  Caller should have a reference on the folio to
> - * prevent it from being turned into a tail page.
> - * Return: True for hugetlbfs folios, false for anon folios or folios
> - * belonging to other filesystems.
> - */
> -static inline bool folio_test_hugetlb(const struct folio *folio)
> -{
> -	return folio_test_large(folio) &&
> -		test_bit(PG_hugetlb, const_folio_flags(folio, 1));
> -}
> -#else
> -TESTPAGEFLAG_FALSE(Huge, hugetlb)
> -#endif
> -
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  /*
>   * PageHuge() only returns true for hugetlbfs pages, but not for
> @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>  	TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>  #endif
>  
> -/*
> - * Check if a page is currently marked HWPoisoned. Note that this check is
> - * best effort only and inherently racy: there is no way to synchronize with
> - * failing hardware.
> - */
> -static inline bool is_page_hwpoison(struct page *page)
> -{
> -	if (PageHWPoison(page))
> -		return true;
> -	return PageHuge(page) && PageHWPoison(compound_head(page));
> -}
> -
>  /*
>   * For pages that are never mapped to userspace (and aren't PageSlab),
>   * page_type may be used.  Because it is initialised to -1, we invert the
> @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page)
>  #define PG_offline	0x00000100
>  #define PG_table	0x00000200
>  #define PG_guard	0x00000400
> +#define PG_hugetlb	0x00000800
>  
>  #define PageType(page, flag)						\
>  	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
> @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable)
>   */
>  PAGE_TYPE_OPS(Guard, guard, guard)
>  
> +#ifdef CONFIG_HUGETLB_PAGE
> +FOLIO_TYPE_OPS(hugetlb, hugetlb)
> +#else
> +FOLIO_TEST_FLAG_FALSE(hugetlb)
> +#endif
> +
> +/**
> + * PageHuge - Determine if the page belongs to hugetlbfs
> + * @page: The page to test.
> + *
> + * Context: Any context.
> + * Return: True for hugetlbfs pages, false for anon pages or pages
> + * belonging to other filesystems.
> + */
> +static inline bool PageHuge(const struct page *page)
> +{
> +	return folio_test_hugetlb(page_folio(page));
> +}
> +
> +/*
> + * Check if a page is currently marked HWPoisoned. Note that this check is
> + * best effort only and inherently racy: there is no way to synchronize with
> + * failing hardware.
> + */
> +static inline bool is_page_hwpoison(struct page *page)
> +{
> +	if (PageHWPoison(page))
> +		return true;
> +	return PageHuge(page) && PageHWPoison(compound_head(page));
> +}
> +
>  extern bool is_free_buddy_page(struct page *page);
>  
>  PAGEFLAG(Isolated, isolated, PF_ANY);
> @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
>   */
>  #define PAGE_FLAGS_SECOND						\
>  	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
> -	 1UL << PG_hugetlb		| 1UL << PG_large_rmappable)
> +	 1UL << PG_large_rmappable)
>  
>  #define PAGE_FLAGS_PRIVATE				\
>  	(1UL << PG_private | 1UL << PG_private_2)
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index d801409b33cf..d55e53ac91bd 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
>  #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
>  
>  #define __def_pagetype_names						\
> +	DEF_PAGETYPE_NAME(hugetlb),					\
>  	DEF_PAGETYPE_NAME(offline),					\
>  	DEF_PAGETYPE_NAME(guard),					\
>  	DEF_PAGETYPE_NAME(table),					\
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 7e9a766059aa..bdcbb62096cf 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h,
>  {
>  	lockdep_assert_held(&hugetlb_lock);
>  
> -	folio_clear_hugetlb(folio);
> +	__folio_clear_hugetlb(folio);
>  }
>  
>  /*
> @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
>  		h->surplus_huge_pages_node[nid]++;
>  	}
>  
> -	folio_set_hugetlb(folio);
> +	__folio_set_hugetlb(folio);
>  	folio_change_private(folio, NULL);
>  	/*
>  	 * We have to set hugetlb_vmemmap_optimized again as above
> @@ -2050,7 +2050,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
>  
>  static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
>  {
> -	folio_set_hugetlb(folio);
> +	__folio_set_hugetlb(folio);
>  	INIT_LIST_HEAD(&folio->lru);
>  	hugetlb_set_folio_subpool(folio, NULL);
>  	set_hugetlb_cgroup(folio, NULL);
> @@ -2160,22 +2160,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
>  	return __prep_compound_gigantic_folio(folio, order, true);
>  }
>  
> -/*
> - * PageHuge() only returns true for hugetlbfs pages, but not for normal or
> - * transparent huge pages.  See the PageTransHuge() documentation for more
> - * details.
> - */
> -int PageHuge(const struct page *page)
> -{
> -	const struct folio *folio;
> -
> -	if (!PageCompound(page))
> -		return 0;
> -	folio = page_folio(page);
> -	return folio_test_hugetlb(folio);
> -}
> -EXPORT_SYMBOL_GPL(PageHuge);
> -
>  /*
>   * Find and lock address space (mapping) in write mode.
>   *
David Hildenbrand March 22, 2024, 3:06 p.m. UTC | #2
On 22.03.24 11:19, Vlastimil Babka wrote:
> On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote:
>> The current folio_test_hugetlb() can be fooled by a concurrent folio split
>> into returning true for a folio which has never belonged to hugetlbfs.
>> This can't happen if the caller holds a refcount on it, but we have a
>> few places (memory-failure, compaction, procfs) which do not and should
>> not take a speculative reference.
> 
> Should we add metadata wrt closing the bug report from Luis?
> 
> https://lore.kernel.org/all/8fa1c95c-4749-33dd-42ba-243e492ab109@suse.cz/
> 
> I assume this wouldn't be fun wrt stable...
> 
>> Since hugetlb pages do not use individual page mapcounts (they are always
>> fully mapped and use the entire_mapcount field to record the number
> 
> Wasn't there some discussions to allow partial mappings of hugetlb? What
> would be the implications?

If we ever go that path, we really should avoid messing with any 
subpages right from the start. We should make it work using a single 
total mapcount per folio.

Anyhow, that's stuff for the future.
Matthew Wilcox March 23, 2024, 3:24 a.m. UTC | #3
On Fri, Mar 22, 2024 at 11:19:34AM +0100, Vlastimil Babka wrote:
> Should we add metadata wrt closing the bug report from Luis?
> 
> https://lore.kernel.org/all/8fa1c95c-4749-33dd-42ba-243e492ab109@suse.cz/

Probably a good idea.

> I assume this wouldn't be fun wrt stable...

I don't think it should be too bad?  I think we only need to backport it
as far as v6.6 when I got rid of folio->dtor.  Yes, it's unreliable
before that, but it doesn't cause crashes, just bad decisions.

> > Since hugetlb pages do not use individual page mapcounts (they are always
> > fully mapped and use the entire_mapcount field to record the number
> 
> Wasn't there some discussions to allow partial mappings of hugetlb? What
> would be the implications?

I think I'm hammering another nail into that coffin.  As I understand
it, everyone has given up on that proposal and they're looking to make
THP more reliable so they can use THP.  See Yu Zhao's recent proposals.
Vlastimil Babka March 25, 2024, 7:57 a.m. UTC | #4
On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote:
> The current folio_test_hugetlb() can be fooled by a concurrent folio split
> into returning true for a folio which has never belonged to hugetlbfs.
> This can't happen if the caller holds a refcount on it, but we have a
> few places (memory-failure, compaction, procfs) which do not and should
> not take a speculative reference.

In compaction and with CONFIG_DEBUG_VM enabled, the current implementation
can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b
("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks
in the PageHuge() testing path.

> Since hugetlb pages do not use individual page mapcounts (they are always
> fully mapped and use the entire_mapcount field to record the number
> of mappings), the PageType field is available now that page_mapcount()
> ignores the value in this field.

Reported-by: Luis Chamberlain <mcgrof@kernel.org>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227
Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
Cc: <stable@vger.kernel.org>

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Reviewed-by: David Hildenbrand <david@redhat.com>
> ---
>  include/linux/page-flags.h     | 70 ++++++++++++++++------------------
>  include/trace/events/mmflags.h |  1 +
>  mm/hugetlb.c                   | 22 ++---------
>  3 files changed, 37 insertions(+), 56 deletions(-)
> 
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 5852f967c640..6fb3cd42ee59 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -190,7 +190,6 @@ enum pageflags {
>  
>  	/* At least one page in this folio has the hwpoison flag set */
>  	PG_has_hwpoisoned = PG_error,
> -	PG_hugetlb = PG_active,
>  	PG_large_rmappable = PG_workingset, /* anon or file-backed */
>  };
>  
> @@ -876,29 +875,6 @@ FOLIO_FLAG_FALSE(large_rmappable)
>  
>  #define PG_head_mask ((1UL << PG_head))
>  
> -#ifdef CONFIG_HUGETLB_PAGE
> -int PageHuge(const struct page *page);
> -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
> -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
> -
> -/**
> - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
> - * @folio: The folio to test.
> - *
> - * Context: Any context.  Caller should have a reference on the folio to
> - * prevent it from being turned into a tail page.
> - * Return: True for hugetlbfs folios, false for anon folios or folios
> - * belonging to other filesystems.
> - */
> -static inline bool folio_test_hugetlb(const struct folio *folio)
> -{
> -	return folio_test_large(folio) &&
> -		test_bit(PG_hugetlb, const_folio_flags(folio, 1));
> -}
> -#else
> -TESTPAGEFLAG_FALSE(Huge, hugetlb)
> -#endif
> -
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  /*
>   * PageHuge() only returns true for hugetlbfs pages, but not for
> @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>  	TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
>  #endif
>  
> -/*
> - * Check if a page is currently marked HWPoisoned. Note that this check is
> - * best effort only and inherently racy: there is no way to synchronize with
> - * failing hardware.
> - */
> -static inline bool is_page_hwpoison(struct page *page)
> -{
> -	if (PageHWPoison(page))
> -		return true;
> -	return PageHuge(page) && PageHWPoison(compound_head(page));
> -}
> -
>  /*
>   * For pages that are never mapped to userspace (and aren't PageSlab),
>   * page_type may be used.  Because it is initialised to -1, we invert the
> @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page)
>  #define PG_offline	0x00000100
>  #define PG_table	0x00000200
>  #define PG_guard	0x00000400
> +#define PG_hugetlb	0x00000800
>  
>  #define PageType(page, flag)						\
>  	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
> @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable)
>   */
>  PAGE_TYPE_OPS(Guard, guard, guard)
>  
> +#ifdef CONFIG_HUGETLB_PAGE
> +FOLIO_TYPE_OPS(hugetlb, hugetlb)
> +#else
> +FOLIO_TEST_FLAG_FALSE(hugetlb)
> +#endif
> +
> +/**
> + * PageHuge - Determine if the page belongs to hugetlbfs
> + * @page: The page to test.
> + *
> + * Context: Any context.
> + * Return: True for hugetlbfs pages, false for anon pages or pages
> + * belonging to other filesystems.
> + */
> +static inline bool PageHuge(const struct page *page)
> +{
> +	return folio_test_hugetlb(page_folio(page));
> +}
> +
> +/*
> + * Check if a page is currently marked HWPoisoned. Note that this check is
> + * best effort only and inherently racy: there is no way to synchronize with
> + * failing hardware.
> + */
> +static inline bool is_page_hwpoison(struct page *page)
> +{
> +	if (PageHWPoison(page))
> +		return true;
> +	return PageHuge(page) && PageHWPoison(compound_head(page));
> +}
> +
>  extern bool is_free_buddy_page(struct page *page);
>  
>  PAGEFLAG(Isolated, isolated, PF_ANY);
> @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
>   */
>  #define PAGE_FLAGS_SECOND						\
>  	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
> -	 1UL << PG_hugetlb		| 1UL << PG_large_rmappable)
> +	 1UL << PG_large_rmappable)
>  
>  #define PAGE_FLAGS_PRIVATE				\
>  	(1UL << PG_private | 1UL << PG_private_2)
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index d801409b33cf..d55e53ac91bd 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
>  #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
>  
>  #define __def_pagetype_names						\
> +	DEF_PAGETYPE_NAME(hugetlb),					\
>  	DEF_PAGETYPE_NAME(offline),					\
>  	DEF_PAGETYPE_NAME(guard),					\
>  	DEF_PAGETYPE_NAME(table),					\
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 7e9a766059aa..bdcbb62096cf 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h,
>  {
>  	lockdep_assert_held(&hugetlb_lock);
>  
> -	folio_clear_hugetlb(folio);
> +	__folio_clear_hugetlb(folio);
>  }
>  
>  /*
> @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
>  		h->surplus_huge_pages_node[nid]++;
>  	}
>  
> -	folio_set_hugetlb(folio);
> +	__folio_set_hugetlb(folio);
>  	folio_change_private(folio, NULL);
>  	/*
>  	 * We have to set hugetlb_vmemmap_optimized again as above
> @@ -2050,7 +2050,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
>  
>  static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
>  {
> -	folio_set_hugetlb(folio);
> +	__folio_set_hugetlb(folio);
>  	INIT_LIST_HEAD(&folio->lru);
>  	hugetlb_set_folio_subpool(folio, NULL);
>  	set_hugetlb_cgroup(folio, NULL);
> @@ -2160,22 +2160,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
>  	return __prep_compound_gigantic_folio(folio, order, true);
>  }
>  
> -/*
> - * PageHuge() only returns true for hugetlbfs pages, but not for normal or
> - * transparent huge pages.  See the PageTransHuge() documentation for more
> - * details.
> - */
> -int PageHuge(const struct page *page)
> -{
> -	const struct folio *folio;
> -
> -	if (!PageCompound(page))
> -		return 0;
> -	folio = page_folio(page);
> -	return folio_test_hugetlb(folio);
> -}
> -EXPORT_SYMBOL_GPL(PageHuge);
> -
>  /*
>   * Find and lock address space (mapping) in write mode.
>   *
Matthew Wilcox March 25, 2024, 3:14 p.m. UTC | #5
On Thu, Mar 21, 2024 at 02:24:43PM +0000, Matthew Wilcox (Oracle) wrote:
> The current folio_test_hugetlb() can be fooled by a concurrent folio split
> into returning true for a folio which has never belonged to hugetlbfs.
> This can't happen if the caller holds a refcount on it, but we have a
> few places (memory-failure, compaction, procfs) which do not and should
> not take a speculative reference.
> 
> Since hugetlb pages do not use individual page mapcounts (they are always
> fully mapped and use the entire_mapcount field to record the number
> of mappings), the PageType field is available now that page_mapcount()
> ignores the value in this field.

Update vmcoreinfo:

diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index f95516cd45bb..41372f5d5c19 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_head_mask);
 #define PAGE_BUDDY_MAPCOUNT_VALUE	(~PG_buddy)
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
-	VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE(	(~PG_hugetlb)
+	VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
 #define PAGE_OFFLINE_MAPCOUNT_VALUE	(~PG_offline)
 	VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
 
 #ifdef CONFIG_KALLSYMS
 	VMCOREINFO_SYMBOL(kallsyms_names);
Matthew Wilcox March 25, 2024, 3:18 p.m. UTC | #6
On Mon, Mar 25, 2024 at 03:14:53PM +0000, Matthew Wilcox wrote:
> Update vmcoreinfo:

Urgh, a typo slipped in.  use this instead:

diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index f95516cd45bb..41372f5d5c19 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_head_mask);
 #define PAGE_BUDDY_MAPCOUNT_VALUE	(~PG_buddy)
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
-	VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE(	(~PG_hugetlb)
+	VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
 #define PAGE_OFFLINE_MAPCOUNT_VALUE	(~PG_offline)
 	VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
 
 #ifdef CONFIG_KALLSYMS
 	VMCOREINFO_SYMBOL(kallsyms_names);
Matthew Wilcox March 25, 2024, 3:33 p.m. UTC | #7
On Mon, Mar 25, 2024 at 03:18:39PM +0000, Matthew Wilcox wrote:
> On Mon, Mar 25, 2024 at 03:14:53PM +0000, Matthew Wilcox wrote:
> > Update vmcoreinfo:
> 
> Urgh, a typo slipped in.  use this instead:

*sigh*.  Can I go back to bed now please?

diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index f95516cd45bb..23c125c2e243 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_head_mask);
 #define PAGE_BUDDY_MAPCOUNT_VALUE	(~PG_buddy)
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
-	VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE	(~PG_hugetlb)
+	VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
 #define PAGE_OFFLINE_MAPCOUNT_VALUE	(~PG_offline)
 	VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
 
 #ifdef CONFIG_KALLSYMS
 	VMCOREINFO_SYMBOL(kallsyms_names);
Andrew Morton March 25, 2024, 6:48 p.m. UTC | #8
On Mon, 25 Mar 2024 08:57:52 +0100 Vlastimil Babka <vbabka@suse.cz> wrote:

> On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote:
> > The current folio_test_hugetlb() can be fooled by a concurrent folio split
> > into returning true for a folio which has never belonged to hugetlbfs.
> > This can't happen if the caller holds a refcount on it, but we have a
> > few places (memory-failure, compaction, procfs) which do not and should
> > not take a speculative reference.
> 
> In compaction and with CONFIG_DEBUG_VM enabled, the current implementation
> can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b
> ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks
> in the PageHuge() testing path.
> 
> > Since hugetlb pages do not use individual page mapcounts (they are always
> > fully mapped and use the entire_mapcount field to record the number
> > of mappings), the PageType field is available now that page_mapcount()
> > ignores the value in this field.
> 
> Reported-by: Luis Chamberlain <mcgrof@kernel.org>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227
> Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
> Cc: <stable@vger.kernel.org>

Thanks.

The patch doesn't work as a standalone thing.

In file included from ./include/linux/mmzone.h:23,
                 from ./include/linux/gfp.h:7,
                 from ./include/linux/slab.h:16,
                 from ./include/linux/crypto.h:17,
                 from arch/x86/kernel/asm-offsets.c:9:
./include/linux/page-flags.h:1021:1: error: return type defaults to 'int' [-Werror=implicit-int]
 1021 | FOLIO_TYPE_OPS(hugetlb, hugetlb)
      | ^~~~~~~~~~~~~~
./include/linux/page-flags.h:1021:1: error: function declaration isn't a prototype [-Werror=strict-prototypes]
./include/linux/page-flags.h: In function 'FOLIO_TYPE_OPS':
./include/linux/page-flags.h:1035:1: error: expected '=', ',', ';', 'asm' or '__attribute__' before '{' token

<a million more>

Matthew, could you please redo this patch (and its vmcore fix) and send
as a standalone -stable patch?  It could be that the "Various
significant MM patches" will need a redo afterwards.
Matthew Wilcox March 25, 2024, 8:41 p.m. UTC | #9
On Mon, Mar 25, 2024 at 11:48:13AM -0700, Andrew Morton wrote:
> On Mon, 25 Mar 2024 08:57:52 +0100 Vlastimil Babka <vbabka@suse.cz> wrote:
> > Reported-by: Luis Chamberlain <mcgrof@kernel.org>
> > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227
> > Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
> > Cc: <stable@vger.kernel.org>
> 
> Thanks.
> 
> The patch doesn't work as a standalone thing.

No, it depends on both
    mm: support page_mapcount() on page_has_type() pages
    mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros

I was assuming both would get backported as dependencies.  If you want a
standalone patch, something like this would do the trick.

> Matthew, could you please redo this patch (and its vmcore fix) and send
> as a standalone -stable patch?  It could be that the "Various
> significant MM patches" will need a redo afterwards.

I'd rather keep the mapcount patch separate for upstream purposes.
I've build-tested against 6.6.22 with allmodconfig and then with
HUGETLB=n (but otherwise allmodconfig)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bf5d0b1b16f4..5e15004eab8c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1192,6 +1192,9 @@ static inline int page_mapcount(struct page *page)
 {
 	int mapcount = atomic_read(&page->_mapcount) + 1;
 
+	/* Handle page_has_type() pages */
+	if (mapcount < 0)
+		mapcount = 0;
 	if (unlikely(PageCompound(page)))
 		mapcount += folio_entire_mapcount(page_folio(page));
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5c02720c53a5..4d5f750551c5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -190,7 +190,6 @@ enum pageflags {
 
 	/* At least one page in this folio has the hwpoison flag set */
 	PG_has_hwpoisoned = PG_error,
-	PG_hugetlb = PG_active,
 	PG_large_rmappable = PG_workingset, /* anon or file-backed */
 };
 
@@ -815,29 +814,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
 
 #define PG_head_mask ((1UL << PG_head))
 
-#ifdef CONFIG_HUGETLB_PAGE
-int PageHuge(struct page *page);
-SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-
-/**
- * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
- * @folio: The folio to test.
- *
- * Context: Any context.  Caller should have a reference on the folio to
- * prevent it from being turned into a tail page.
- * Return: True for hugetlbfs folios, false for anon folios or folios
- * belonging to other filesystems.
- */
-static inline bool folio_test_hugetlb(struct folio *folio)
-{
-	return folio_test_large(folio) &&
-		test_bit(PG_hugetlb, folio_flags(folio, 1));
-}
-#else
-TESTPAGEFLAG_FALSE(Huge, hugetlb)
-#endif
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * PageHuge() only returns true for hugetlbfs pages, but not for
@@ -893,18 +869,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 	TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 #endif
 
-/*
- * Check if a page is currently marked HWPoisoned. Note that this check is
- * best effort only and inherently racy: there is no way to synchronize with
- * failing hardware.
- */
-static inline bool is_page_hwpoison(struct page *page)
-{
-	if (PageHWPoison(page))
-		return true;
-	return PageHuge(page) && PageHWPoison(compound_head(page));
-}
-
 /*
  * For pages that are never mapped to userspace (and aren't PageSlab),
  * page_type may be used.  Because it is initialised to -1, we invert the
@@ -921,6 +885,7 @@ static inline bool is_page_hwpoison(struct page *page)
 #define PG_offline	0x00000100
 #define PG_table	0x00000200
 #define PG_guard	0x00000400
+#define PG_hugetlb	0x00000800
 
 #define PageType(page, flag)						\
 	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -1012,6 +977,37 @@ PAGE_TYPE_OPS(Table, table, pgtable)
  */
 PAGE_TYPE_OPS(Guard, guard, guard)
 
+#ifdef CONFIG_HUGETLB_PAGE
+PAGE_TYPE_OPS(HeadHugeTLB, hugetlb, hugetlb)
+
+/**
+ * PageHuge - Determine if the folio belongs to hugetlbfs.
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for hugetlbfs folios, false for anon folios or folios
+ * belonging to other filesystems.
+ */
+static inline bool PageHuge(const struct page *page)
+{
+	return folio_test_hugetlb(page_folio(page));
+}
+#else
+TESTPAGEFLAG_FALSE(Huge, hugetlb)
+#endif
+
+/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(struct page *page)
+{
+	if (PageHWPoison(page))
+		return true;
+	return PageHuge(page) && PageHWPoison(compound_head(page));
+}
+
 extern bool is_free_buddy_page(struct page *page);
 
 PAGEFLAG(Isolated, isolated, PF_ANY);
@@ -1078,7 +1074,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
  */
 #define PAGE_FLAGS_SECOND						\
 	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
-	 1UL << PG_hugetlb		| 1UL << PG_large_rmappable)
+	 1UL << PG_large_rmappable)
 
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 1478b9dd05fa..e010618f9326 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3)
 #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
 
 #define __def_pagetype_names						\
+	DEF_PAGETYPE_NAME(hugetlb),					\
 	DEF_PAGETYPE_NAME(offline),					\
 	DEF_PAGETYPE_NAME(guard),					\
 	DEF_PAGETYPE_NAME(table),					\
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 2f675ef045d4..a2face7fbef8 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -675,8 +675,8 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_head_mask);
 #define PAGE_BUDDY_MAPCOUNT_VALUE	(~PG_buddy)
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
-	VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE	(~PG_hugetlb)
+	VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
 #define PAGE_OFFLINE_MAPCOUNT_VALUE	(~PG_offline)
 	VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
 #endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5e6c4d367d33..30b713d330ca 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1630,7 +1630,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h,
 {
 	lockdep_assert_held(&hugetlb_lock);
 
-	folio_clear_hugetlb(folio);
+	__folio_clear_hugetlb(folio);
 }
 
 /*
@@ -1717,7 +1717,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
 		h->surplus_huge_pages_node[nid]++;
 	}
 
-	folio_set_hugetlb(folio);
+	__folio_set_hugetlb(folio);
 	folio_change_private(folio, NULL);
 	/*
 	 * We have to set hugetlb_vmemmap_optimized again as above
@@ -1971,7 +1971,7 @@ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
 	hugetlb_vmemmap_optimize(h, &folio->page);
 	INIT_LIST_HEAD(&folio->lru);
-	folio_set_hugetlb(folio);
+	__folio_set_hugetlb(folio);
 	hugetlb_set_folio_subpool(folio, NULL);
 	set_hugetlb_cgroup(folio, NULL);
 	set_hugetlb_cgroup_rsvd(folio, NULL);
@@ -2074,22 +2074,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
 	return __prep_compound_gigantic_folio(folio, order, true);
 }
 
-/*
- * PageHuge() only returns true for hugetlbfs pages, but not for normal or
- * transparent huge pages.  See the PageTransHuge() documentation for more
- * details.
- */
-int PageHuge(struct page *page)
-{
-	struct folio *folio;
-
-	if (!PageCompound(page))
-		return 0;
-	folio = page_folio(page);
-	return folio_test_hugetlb(folio);
-}
-EXPORT_SYMBOL_GPL(PageHuge);
-
 /*
  * Find and lock address space (mapping) in write mode.
  *
Vlastimil Babka March 25, 2024, 8:47 p.m. UTC | #10
On 3/25/24 9:41 PM, Matthew Wilcox wrote:
> On Mon, Mar 25, 2024 at 11:48:13AM -0700, Andrew Morton wrote:
>> On Mon, 25 Mar 2024 08:57:52 +0100 Vlastimil Babka <vbabka@suse.cz> wrote:
>> > Reported-by: Luis Chamberlain <mcgrof@kernel.org>
>> > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227
>> > Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
>> > Cc: <stable@vger.kernel.org>
>> 
>> Thanks.
>> 
>> The patch doesn't work as a standalone thing.
> 
> No, it depends on both
>     mm: support page_mapcount() on page_has_type() pages
>     mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros

Stable maintainers are usually fine with dependency patches and these are
not hugely intrusive and risky? We should just order and mark it in a way to
make it all obvious.
diff mbox series

Patch

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5852f967c640..6fb3cd42ee59 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -190,7 +190,6 @@  enum pageflags {
 
 	/* At least one page in this folio has the hwpoison flag set */
 	PG_has_hwpoisoned = PG_error,
-	PG_hugetlb = PG_active,
 	PG_large_rmappable = PG_workingset, /* anon or file-backed */
 };
 
@@ -876,29 +875,6 @@  FOLIO_FLAG_FALSE(large_rmappable)
 
 #define PG_head_mask ((1UL << PG_head))
 
-#ifdef CONFIG_HUGETLB_PAGE
-int PageHuge(const struct page *page);
-SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-
-/**
- * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
- * @folio: The folio to test.
- *
- * Context: Any context.  Caller should have a reference on the folio to
- * prevent it from being turned into a tail page.
- * Return: True for hugetlbfs folios, false for anon folios or folios
- * belonging to other filesystems.
- */
-static inline bool folio_test_hugetlb(const struct folio *folio)
-{
-	return folio_test_large(folio) &&
-		test_bit(PG_hugetlb, const_folio_flags(folio, 1));
-}
-#else
-TESTPAGEFLAG_FALSE(Huge, hugetlb)
-#endif
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * PageHuge() only returns true for hugetlbfs pages, but not for
@@ -954,18 +930,6 @@  PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 	TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 #endif
 
-/*
- * Check if a page is currently marked HWPoisoned. Note that this check is
- * best effort only and inherently racy: there is no way to synchronize with
- * failing hardware.
- */
-static inline bool is_page_hwpoison(struct page *page)
-{
-	if (PageHWPoison(page))
-		return true;
-	return PageHuge(page) && PageHWPoison(compound_head(page));
-}
-
 /*
  * For pages that are never mapped to userspace (and aren't PageSlab),
  * page_type may be used.  Because it is initialised to -1, we invert the
@@ -982,6 +946,7 @@  static inline bool is_page_hwpoison(struct page *page)
 #define PG_offline	0x00000100
 #define PG_table	0x00000200
 #define PG_guard	0x00000400
+#define PG_hugetlb	0x00000800
 
 #define PageType(page, flag)						\
 	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -1076,6 +1041,37 @@  PAGE_TYPE_OPS(Table, table, pgtable)
  */
 PAGE_TYPE_OPS(Guard, guard, guard)
 
+#ifdef CONFIG_HUGETLB_PAGE
+FOLIO_TYPE_OPS(hugetlb, hugetlb)
+#else
+FOLIO_TEST_FLAG_FALSE(hugetlb)
+#endif
+
+/**
+ * PageHuge - Determine if the page belongs to hugetlbfs
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for hugetlbfs pages, false for anon pages or pages
+ * belonging to other filesystems.
+ */
+static inline bool PageHuge(const struct page *page)
+{
+	return folio_test_hugetlb(page_folio(page));
+}
+
+/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(struct page *page)
+{
+	if (PageHWPoison(page))
+		return true;
+	return PageHuge(page) && PageHWPoison(compound_head(page));
+}
+
 extern bool is_free_buddy_page(struct page *page);
 
 PAGEFLAG(Isolated, isolated, PF_ANY);
@@ -1142,7 +1138,7 @@  static __always_inline void __ClearPageAnonExclusive(struct page *page)
  */
 #define PAGE_FLAGS_SECOND						\
 	(0xffUL /* order */		| 1UL << PG_has_hwpoisoned |	\
-	 1UL << PG_hugetlb		| 1UL << PG_large_rmappable)
+	 1UL << PG_large_rmappable)
 
 #define PAGE_FLAGS_PRIVATE				\
 	(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index d801409b33cf..d55e53ac91bd 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -135,6 +135,7 @@  IF_HAVE_PG_ARCH_X(arch_3)
 #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
 
 #define __def_pagetype_names						\
+	DEF_PAGETYPE_NAME(hugetlb),					\
 	DEF_PAGETYPE_NAME(offline),					\
 	DEF_PAGETYPE_NAME(guard),					\
 	DEF_PAGETYPE_NAME(table),					\
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7e9a766059aa..bdcbb62096cf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1624,7 +1624,7 @@  static inline void __clear_hugetlb_destructor(struct hstate *h,
 {
 	lockdep_assert_held(&hugetlb_lock);
 
-	folio_clear_hugetlb(folio);
+	__folio_clear_hugetlb(folio);
 }
 
 /*
@@ -1711,7 +1711,7 @@  static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
 		h->surplus_huge_pages_node[nid]++;
 	}
 
-	folio_set_hugetlb(folio);
+	__folio_set_hugetlb(folio);
 	folio_change_private(folio, NULL);
 	/*
 	 * We have to set hugetlb_vmemmap_optimized again as above
@@ -2050,7 +2050,7 @@  static void __prep_account_new_huge_page(struct hstate *h, int nid)
 
 static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
-	folio_set_hugetlb(folio);
+	__folio_set_hugetlb(folio);
 	INIT_LIST_HEAD(&folio->lru);
 	hugetlb_set_folio_subpool(folio, NULL);
 	set_hugetlb_cgroup(folio, NULL);
@@ -2160,22 +2160,6 @@  static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
 	return __prep_compound_gigantic_folio(folio, order, true);
 }
 
-/*
- * PageHuge() only returns true for hugetlbfs pages, but not for normal or
- * transparent huge pages.  See the PageTransHuge() documentation for more
- * details.
- */
-int PageHuge(const struct page *page)
-{
-	const struct folio *folio;
-
-	if (!PageCompound(page))
-		return 0;
-	folio = page_folio(page);
-	return folio_test_hugetlb(folio);
-}
-EXPORT_SYMBOL_GPL(PageHuge);
-
 /*
  * Find and lock address space (mapping) in write mode.
  *