Message ID | 20240321142448.1645400-6-willy@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Various significant MM patches | expand |
On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote: > The current folio_test_hugetlb() can be fooled by a concurrent folio split > into returning true for a folio which has never belonged to hugetlbfs. > This can't happen if the caller holds a refcount on it, but we have a > few places (memory-failure, compaction, procfs) which do not and should > not take a speculative reference. Should we add metadata wrt closing the bug report from Luis? https://lore.kernel.org/all/8fa1c95c-4749-33dd-42ba-243e492ab109@suse.cz/ I assume this wouldn't be fun wrt stable... > Since hugetlb pages do not use individual page mapcounts (they are always > fully mapped and use the entire_mapcount field to record the number Wasn't there some discussions to allow partial mappings of hugetlb? What would be the implications? > of mappings), the PageType field is available now that page_mapcount() > ignores the value in this field. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > Reviewed-by: David Hildenbrand <david@redhat.com> Other than that, Acked-by: Vlastimil Babka <vbabka@suse.cz> > --- > include/linux/page-flags.h | 70 ++++++++++++++++------------------ > include/trace/events/mmflags.h | 1 + > mm/hugetlb.c | 22 ++--------- > 3 files changed, 37 insertions(+), 56 deletions(-) > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 5852f967c640..6fb3cd42ee59 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -190,7 +190,6 @@ enum pageflags { > > /* At least one page in this folio has the hwpoison flag set */ > PG_has_hwpoisoned = PG_error, > - PG_hugetlb = PG_active, > PG_large_rmappable = PG_workingset, /* anon or file-backed */ > }; > > @@ -876,29 +875,6 @@ FOLIO_FLAG_FALSE(large_rmappable) > > #define PG_head_mask ((1UL << PG_head)) > > -#ifdef CONFIG_HUGETLB_PAGE > -int PageHuge(const struct page *page); > -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) > -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) > - > -/** > - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs > - * @folio: The folio to test. > - * > - * Context: Any context. Caller should have a reference on the folio to > - * prevent it from being turned into a tail page. > - * Return: True for hugetlbfs folios, false for anon folios or folios > - * belonging to other filesystems. > - */ > -static inline bool folio_test_hugetlb(const struct folio *folio) > -{ > - return folio_test_large(folio) && > - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); > -} > -#else > -TESTPAGEFLAG_FALSE(Huge, hugetlb) > -#endif > - > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > /* > * PageHuge() only returns true for hugetlbfs pages, but not for > @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) > TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) > #endif > > -/* > - * Check if a page is currently marked HWPoisoned. Note that this check is > - * best effort only and inherently racy: there is no way to synchronize with > - * failing hardware. > - */ > -static inline bool is_page_hwpoison(struct page *page) > -{ > - if (PageHWPoison(page)) > - return true; > - return PageHuge(page) && PageHWPoison(compound_head(page)); > -} > - > /* > * For pages that are never mapped to userspace (and aren't PageSlab), > * page_type may be used. Because it is initialised to -1, we invert the > @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) > #define PG_offline 0x00000100 > #define PG_table 0x00000200 > #define PG_guard 0x00000400 > +#define PG_hugetlb 0x00000800 > > #define PageType(page, flag) \ > ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) > @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) > */ > PAGE_TYPE_OPS(Guard, guard, guard) > > +#ifdef CONFIG_HUGETLB_PAGE > +FOLIO_TYPE_OPS(hugetlb, hugetlb) > +#else > +FOLIO_TEST_FLAG_FALSE(hugetlb) > +#endif > + > +/** > + * PageHuge - Determine if the page belongs to hugetlbfs > + * @page: The page to test. > + * > + * Context: Any context. > + * Return: True for hugetlbfs pages, false for anon pages or pages > + * belonging to other filesystems. > + */ > +static inline bool PageHuge(const struct page *page) > +{ > + return folio_test_hugetlb(page_folio(page)); > +} > + > +/* > + * Check if a page is currently marked HWPoisoned. Note that this check is > + * best effort only and inherently racy: there is no way to synchronize with > + * failing hardware. > + */ > +static inline bool is_page_hwpoison(struct page *page) > +{ > + if (PageHWPoison(page)) > + return true; > + return PageHuge(page) && PageHWPoison(compound_head(page)); > +} > + > extern bool is_free_buddy_page(struct page *page); > > PAGEFLAG(Isolated, isolated, PF_ANY); > @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) > */ > #define PAGE_FLAGS_SECOND \ > (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ > - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) > + 1UL << PG_large_rmappable) > > #define PAGE_FLAGS_PRIVATE \ > (1UL << PG_private | 1UL << PG_private_2) > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index d801409b33cf..d55e53ac91bd 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) > #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } > > #define __def_pagetype_names \ > + DEF_PAGETYPE_NAME(hugetlb), \ > DEF_PAGETYPE_NAME(offline), \ > DEF_PAGETYPE_NAME(guard), \ > DEF_PAGETYPE_NAME(table), \ > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 7e9a766059aa..bdcbb62096cf 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, > { > lockdep_assert_held(&hugetlb_lock); > > - folio_clear_hugetlb(folio); > + __folio_clear_hugetlb(folio); > } > > /* > @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, > h->surplus_huge_pages_node[nid]++; > } > > - folio_set_hugetlb(folio); > + __folio_set_hugetlb(folio); > folio_change_private(folio, NULL); > /* > * We have to set hugetlb_vmemmap_optimized again as above > @@ -2050,7 +2050,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) > > static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) > { > - folio_set_hugetlb(folio); > + __folio_set_hugetlb(folio); > INIT_LIST_HEAD(&folio->lru); > hugetlb_set_folio_subpool(folio, NULL); > set_hugetlb_cgroup(folio, NULL); > @@ -2160,22 +2160,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, > return __prep_compound_gigantic_folio(folio, order, true); > } > > -/* > - * PageHuge() only returns true for hugetlbfs pages, but not for normal or > - * transparent huge pages. See the PageTransHuge() documentation for more > - * details. > - */ > -int PageHuge(const struct page *page) > -{ > - const struct folio *folio; > - > - if (!PageCompound(page)) > - return 0; > - folio = page_folio(page); > - return folio_test_hugetlb(folio); > -} > -EXPORT_SYMBOL_GPL(PageHuge); > - > /* > * Find and lock address space (mapping) in write mode. > *
On 22.03.24 11:19, Vlastimil Babka wrote: > On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote: >> The current folio_test_hugetlb() can be fooled by a concurrent folio split >> into returning true for a folio which has never belonged to hugetlbfs. >> This can't happen if the caller holds a refcount on it, but we have a >> few places (memory-failure, compaction, procfs) which do not and should >> not take a speculative reference. > > Should we add metadata wrt closing the bug report from Luis? > > https://lore.kernel.org/all/8fa1c95c-4749-33dd-42ba-243e492ab109@suse.cz/ > > I assume this wouldn't be fun wrt stable... > >> Since hugetlb pages do not use individual page mapcounts (they are always >> fully mapped and use the entire_mapcount field to record the number > > Wasn't there some discussions to allow partial mappings of hugetlb? What > would be the implications? If we ever go that path, we really should avoid messing with any subpages right from the start. We should make it work using a single total mapcount per folio. Anyhow, that's stuff for the future.
On Fri, Mar 22, 2024 at 11:19:34AM +0100, Vlastimil Babka wrote: > Should we add metadata wrt closing the bug report from Luis? > > https://lore.kernel.org/all/8fa1c95c-4749-33dd-42ba-243e492ab109@suse.cz/ Probably a good idea. > I assume this wouldn't be fun wrt stable... I don't think it should be too bad? I think we only need to backport it as far as v6.6 when I got rid of folio->dtor. Yes, it's unreliable before that, but it doesn't cause crashes, just bad decisions. > > Since hugetlb pages do not use individual page mapcounts (they are always > > fully mapped and use the entire_mapcount field to record the number > > Wasn't there some discussions to allow partial mappings of hugetlb? What > would be the implications? I think I'm hammering another nail into that coffin. As I understand it, everyone has given up on that proposal and they're looking to make THP more reliable so they can use THP. See Yu Zhao's recent proposals.
On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote: > The current folio_test_hugetlb() can be fooled by a concurrent folio split > into returning true for a folio which has never belonged to hugetlbfs. > This can't happen if the caller holds a refcount on it, but we have a > few places (memory-failure, compaction, procfs) which do not and should > not take a speculative reference. In compaction and with CONFIG_DEBUG_VM enabled, the current implementation can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks in the PageHuge() testing path. > Since hugetlb pages do not use individual page mapcounts (they are always > fully mapped and use the entire_mapcount field to record the number > of mappings), the PageType field is available now that page_mapcount() > ignores the value in this field. Reported-by: Luis Chamberlain <mcgrof@kernel.org> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Cc: <stable@vger.kernel.org> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > Reviewed-by: David Hildenbrand <david@redhat.com> > --- > include/linux/page-flags.h | 70 ++++++++++++++++------------------ > include/trace/events/mmflags.h | 1 + > mm/hugetlb.c | 22 ++--------- > 3 files changed, 37 insertions(+), 56 deletions(-) > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 5852f967c640..6fb3cd42ee59 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -190,7 +190,6 @@ enum pageflags { > > /* At least one page in this folio has the hwpoison flag set */ > PG_has_hwpoisoned = PG_error, > - PG_hugetlb = PG_active, > PG_large_rmappable = PG_workingset, /* anon or file-backed */ > }; > > @@ -876,29 +875,6 @@ FOLIO_FLAG_FALSE(large_rmappable) > > #define PG_head_mask ((1UL << PG_head)) > > -#ifdef CONFIG_HUGETLB_PAGE > -int PageHuge(const struct page *page); > -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) > -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) > - > -/** > - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs > - * @folio: The folio to test. > - * > - * Context: Any context. Caller should have a reference on the folio to > - * prevent it from being turned into a tail page. > - * Return: True for hugetlbfs folios, false for anon folios or folios > - * belonging to other filesystems. > - */ > -static inline bool folio_test_hugetlb(const struct folio *folio) > -{ > - return folio_test_large(folio) && > - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); > -} > -#else > -TESTPAGEFLAG_FALSE(Huge, hugetlb) > -#endif > - > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > /* > * PageHuge() only returns true for hugetlbfs pages, but not for > @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) > TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) > #endif > > -/* > - * Check if a page is currently marked HWPoisoned. Note that this check is > - * best effort only and inherently racy: there is no way to synchronize with > - * failing hardware. > - */ > -static inline bool is_page_hwpoison(struct page *page) > -{ > - if (PageHWPoison(page)) > - return true; > - return PageHuge(page) && PageHWPoison(compound_head(page)); > -} > - > /* > * For pages that are never mapped to userspace (and aren't PageSlab), > * page_type may be used. Because it is initialised to -1, we invert the > @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) > #define PG_offline 0x00000100 > #define PG_table 0x00000200 > #define PG_guard 0x00000400 > +#define PG_hugetlb 0x00000800 > > #define PageType(page, flag) \ > ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) > @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) > */ > PAGE_TYPE_OPS(Guard, guard, guard) > > +#ifdef CONFIG_HUGETLB_PAGE > +FOLIO_TYPE_OPS(hugetlb, hugetlb) > +#else > +FOLIO_TEST_FLAG_FALSE(hugetlb) > +#endif > + > +/** > + * PageHuge - Determine if the page belongs to hugetlbfs > + * @page: The page to test. > + * > + * Context: Any context. > + * Return: True for hugetlbfs pages, false for anon pages or pages > + * belonging to other filesystems. > + */ > +static inline bool PageHuge(const struct page *page) > +{ > + return folio_test_hugetlb(page_folio(page)); > +} > + > +/* > + * Check if a page is currently marked HWPoisoned. Note that this check is > + * best effort only and inherently racy: there is no way to synchronize with > + * failing hardware. > + */ > +static inline bool is_page_hwpoison(struct page *page) > +{ > + if (PageHWPoison(page)) > + return true; > + return PageHuge(page) && PageHWPoison(compound_head(page)); > +} > + > extern bool is_free_buddy_page(struct page *page); > > PAGEFLAG(Isolated, isolated, PF_ANY); > @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) > */ > #define PAGE_FLAGS_SECOND \ > (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ > - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) > + 1UL << PG_large_rmappable) > > #define PAGE_FLAGS_PRIVATE \ > (1UL << PG_private | 1UL << PG_private_2) > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index d801409b33cf..d55e53ac91bd 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) > #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } > > #define __def_pagetype_names \ > + DEF_PAGETYPE_NAME(hugetlb), \ > DEF_PAGETYPE_NAME(offline), \ > DEF_PAGETYPE_NAME(guard), \ > DEF_PAGETYPE_NAME(table), \ > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 7e9a766059aa..bdcbb62096cf 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, > { > lockdep_assert_held(&hugetlb_lock); > > - folio_clear_hugetlb(folio); > + __folio_clear_hugetlb(folio); > } > > /* > @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, > h->surplus_huge_pages_node[nid]++; > } > > - folio_set_hugetlb(folio); > + __folio_set_hugetlb(folio); > folio_change_private(folio, NULL); > /* > * We have to set hugetlb_vmemmap_optimized again as above > @@ -2050,7 +2050,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) > > static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) > { > - folio_set_hugetlb(folio); > + __folio_set_hugetlb(folio); > INIT_LIST_HEAD(&folio->lru); > hugetlb_set_folio_subpool(folio, NULL); > set_hugetlb_cgroup(folio, NULL); > @@ -2160,22 +2160,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, > return __prep_compound_gigantic_folio(folio, order, true); > } > > -/* > - * PageHuge() only returns true for hugetlbfs pages, but not for normal or > - * transparent huge pages. See the PageTransHuge() documentation for more > - * details. > - */ > -int PageHuge(const struct page *page) > -{ > - const struct folio *folio; > - > - if (!PageCompound(page)) > - return 0; > - folio = page_folio(page); > - return folio_test_hugetlb(folio); > -} > -EXPORT_SYMBOL_GPL(PageHuge); > - > /* > * Find and lock address space (mapping) in write mode. > *
On Thu, Mar 21, 2024 at 02:24:43PM +0000, Matthew Wilcox (Oracle) wrote: > The current folio_test_hugetlb() can be fooled by a concurrent folio split > into returning true for a folio which has never belonged to hugetlbfs. > This can't happen if the caller holds a refcount on it, but we have a > few places (memory-failure, compaction, procfs) which do not and should > not take a speculative reference. > > Since hugetlb pages do not use individual page mapcounts (they are always > fully mapped and use the entire_mapcount field to record the number > of mappings), the PageType field is available now that page_mapcount() > ignores the value in this field. Update vmcoreinfo: diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index f95516cd45bb..41372f5d5c19 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE( (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names);
On Mon, Mar 25, 2024 at 03:14:53PM +0000, Matthew Wilcox wrote:
> Update vmcoreinfo:
Urgh, a typo slipped in. use this instead:
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index f95516cd45bb..41372f5d5c19 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_NUMBER(PG_head_mask);
#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
- VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE( (~PG_hugetlb)
+ VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
On Mon, Mar 25, 2024 at 03:18:39PM +0000, Matthew Wilcox wrote: > On Mon, Mar 25, 2024 at 03:14:53PM +0000, Matthew Wilcox wrote: > > Update vmcoreinfo: > > Urgh, a typo slipped in. use this instead: *sigh*. Can I go back to bed now please? diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index f95516cd45bb..23c125c2e243 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names);
On Mon, 25 Mar 2024 08:57:52 +0100 Vlastimil Babka <vbabka@suse.cz> wrote: > On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote: > > The current folio_test_hugetlb() can be fooled by a concurrent folio split > > into returning true for a folio which has never belonged to hugetlbfs. > > This can't happen if the caller holds a refcount on it, but we have a > > few places (memory-failure, compaction, procfs) which do not and should > > not take a speculative reference. > > In compaction and with CONFIG_DEBUG_VM enabled, the current implementation > can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b > ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks > in the PageHuge() testing path. > > > Since hugetlb pages do not use individual page mapcounts (they are always > > fully mapped and use the entire_mapcount field to record the number > > of mappings), the PageType field is available now that page_mapcount() > > ignores the value in this field. > > Reported-by: Luis Chamberlain <mcgrof@kernel.org> > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 > Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") > Cc: <stable@vger.kernel.org> Thanks. The patch doesn't work as a standalone thing. In file included from ./include/linux/mmzone.h:23, from ./include/linux/gfp.h:7, from ./include/linux/slab.h:16, from ./include/linux/crypto.h:17, from arch/x86/kernel/asm-offsets.c:9: ./include/linux/page-flags.h:1021:1: error: return type defaults to 'int' [-Werror=implicit-int] 1021 | FOLIO_TYPE_OPS(hugetlb, hugetlb) | ^~~~~~~~~~~~~~ ./include/linux/page-flags.h:1021:1: error: function declaration isn't a prototype [-Werror=strict-prototypes] ./include/linux/page-flags.h: In function 'FOLIO_TYPE_OPS': ./include/linux/page-flags.h:1035:1: error: expected '=', ',', ';', 'asm' or '__attribute__' before '{' token <a million more> Matthew, could you please redo this patch (and its vmcore fix) and send as a standalone -stable patch? It could be that the "Various significant MM patches" will need a redo afterwards.
On Mon, Mar 25, 2024 at 11:48:13AM -0700, Andrew Morton wrote: > On Mon, 25 Mar 2024 08:57:52 +0100 Vlastimil Babka <vbabka@suse.cz> wrote: > > Reported-by: Luis Chamberlain <mcgrof@kernel.org> > > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 > > Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") > > Cc: <stable@vger.kernel.org> > > Thanks. > > The patch doesn't work as a standalone thing. No, it depends on both mm: support page_mapcount() on page_has_type() pages mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros I was assuming both would get backported as dependencies. If you want a standalone patch, something like this would do the trick. > Matthew, could you please redo this patch (and its vmcore fix) and send > as a standalone -stable patch? It could be that the "Various > significant MM patches" will need a redo afterwards. I'd rather keep the mapcount patch separate for upstream purposes. I've build-tested against 6.6.22 with allmodconfig and then with HUGETLB=n (but otherwise allmodconfig) diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..5e15004eab8c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1192,6 +1192,9 @@ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < 0) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5c02720c53a5..4d5f750551c5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,7 +190,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -815,29 +814,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -893,18 +869,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif -/* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - /* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the @@ -921,6 +885,7 @@ static inline bool is_page_hwpoison(struct page *page) #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -1012,6 +977,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +#ifdef CONFIG_HUGETLB_PAGE +PAGE_TYPE_OPS(HeadHugeTLB, hugetlb, hugetlb) + +/** + * PageHuge - Determine if the folio belongs to hugetlbfs. + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs folios, false for anon folios or folios + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} +#else +TESTPAGEFLAG_FALSE(Huge, hugetlb) +#endif + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); @@ -1078,7 +1074,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 1478b9dd05fa..e010618f9326 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } #define __def_pagetype_names \ + DEF_PAGETYPE_NAME(hugetlb), \ DEF_PAGETYPE_NAME(offline), \ DEF_PAGETYPE_NAME(guard), \ DEF_PAGETYPE_NAME(table), \ diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 2f675ef045d4..a2face7fbef8 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -675,8 +675,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5e6c4d367d33..30b713d330ca 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1630,7 +1630,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, { lockdep_assert_held(&hugetlb_lock); - folio_clear_hugetlb(folio); + __folio_clear_hugetlb(folio); } /* @@ -1717,7 +1717,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, h->surplus_huge_pages_node[nid]++; } - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); folio_change_private(folio, NULL); /* * We have to set hugetlb_vmemmap_optimized again as above @@ -1971,7 +1971,7 @@ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) { hugetlb_vmemmap_optimize(h, &folio->page); INIT_LIST_HEAD(&folio->lru); - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); hugetlb_set_folio_subpool(folio, NULL); set_hugetlb_cgroup(folio, NULL); set_hugetlb_cgroup_rsvd(folio, NULL); @@ -2074,22 +2074,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, return __prep_compound_gigantic_folio(folio, order, true); } -/* - * PageHuge() only returns true for hugetlbfs pages, but not for normal or - * transparent huge pages. See the PageTransHuge() documentation for more - * details. - */ -int PageHuge(struct page *page) -{ - struct folio *folio; - - if (!PageCompound(page)) - return 0; - folio = page_folio(page); - return folio_test_hugetlb(folio); -} -EXPORT_SYMBOL_GPL(PageHuge); - /* * Find and lock address space (mapping) in write mode. *
On 3/25/24 9:41 PM, Matthew Wilcox wrote: > On Mon, Mar 25, 2024 at 11:48:13AM -0700, Andrew Morton wrote: >> On Mon, 25 Mar 2024 08:57:52 +0100 Vlastimil Babka <vbabka@suse.cz> wrote: >> > Reported-by: Luis Chamberlain <mcgrof@kernel.org> >> > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 >> > Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") >> > Cc: <stable@vger.kernel.org> >> >> Thanks. >> >> The patch doesn't work as a standalone thing. > > No, it depends on both > mm: support page_mapcount() on page_has_type() pages > mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros Stable maintainers are usually fine with dependency patches and these are not hugely intrusive and risky? We should just order and mark it in a way to make it all obvious.
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5852f967c640..6fb3cd42ee59 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,7 +190,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -876,29 +875,6 @@ FOLIO_FLAG_FALSE(large_rmappable) #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(const struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(const struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif -/* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - /* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d801409b33cf..d55e53ac91bd 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } #define __def_pagetype_names \ + DEF_PAGETYPE_NAME(hugetlb), \ DEF_PAGETYPE_NAME(offline), \ DEF_PAGETYPE_NAME(guard), \ DEF_PAGETYPE_NAME(table), \ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7e9a766059aa..bdcbb62096cf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, { lockdep_assert_held(&hugetlb_lock); - folio_clear_hugetlb(folio); + __folio_clear_hugetlb(folio); } /* @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, h->surplus_huge_pages_node[nid]++; } - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); folio_change_private(folio, NULL); /* * We have to set hugetlb_vmemmap_optimized again as above @@ -2050,7 +2050,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) { - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); hugetlb_set_folio_subpool(folio, NULL); set_hugetlb_cgroup(folio, NULL); @@ -2160,22 +2160,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, return __prep_compound_gigantic_folio(folio, order, true); } -/* - * PageHuge() only returns true for hugetlbfs pages, but not for normal or - * transparent huge pages. See the PageTransHuge() documentation for more - * details. - */ -int PageHuge(const struct page *page) -{ - const struct folio *folio; - - if (!PageCompound(page)) - return 0; - folio = page_folio(page); - return folio_test_hugetlb(folio); -} -EXPORT_SYMBOL_GPL(PageHuge); - /* * Find and lock address space (mapping) in write mode. *