diff mbox series

[mm-unstable,v2,3/3] mm/hugetlb: use __GFP_COMP for gigantic folios

Message ID 20240814035451.773331-4-yuzhao@google.com (mailing list archive)
State New
Headers show
Series mm/hugetlb: alloc/free gigantic folios | expand

Commit Message

Yu Zhao Aug. 14, 2024, 3:54 a.m. UTC
Use __GFP_COMP for gigantic folios to greatly reduce not only the
amount of code but also the allocation and free time.

LOC (approximately): +60, -240

Allocate and free 500 1GB hugeTLB memory without HVO by:
  time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
  time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages

       Before  After
Alloc  ~13s    ~10s
Free   ~15s    <1s

The above magnitude generally holds for multiple x86 and arm64 CPU
models.

Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: Frank van der Linden <fvdl@google.com>
---
 include/linux/hugetlb.h |   9 +-
 mm/hugetlb.c            | 293 ++++++++--------------------------------
 2 files changed, 62 insertions(+), 240 deletions(-)

Comments

Zi Yan Aug. 16, 2024, 3:23 p.m. UTC | #1
On 13 Aug 2024, at 23:54, Yu Zhao wrote:

> Use __GFP_COMP for gigantic folios to greatly reduce not only the
> amount of code but also the allocation and free time.
>
> LOC (approximately): +60, -240
>
> Allocate and free 500 1GB hugeTLB memory without HVO by:
>   time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
>   time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
>
>        Before  After
> Alloc  ~13s    ~10s
> Free   ~15s    <1s
>
> The above magnitude generally holds for multiple x86 and arm64 CPU
> models.
>
> Signed-off-by: Yu Zhao <yuzhao@google.com>
> Reported-by: Frank van der Linden <fvdl@google.com>
> ---
>  include/linux/hugetlb.h |   9 +-
>  mm/hugetlb.c            | 293 ++++++++--------------------------------
>  2 files changed, 62 insertions(+), 240 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 3100a52ceb73..98c47c394b89 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -896,10 +896,11 @@ static inline bool hugepage_movable_supported(struct hstate *h)
>  /* Movability of hugepages depends on migration support. */
>  static inline gfp_t htlb_alloc_mask(struct hstate *h)
>  {
> -	if (hugepage_movable_supported(h))
> -		return GFP_HIGHUSER_MOVABLE;
> -	else
> -		return GFP_HIGHUSER;
> +	gfp_t gfp = __GFP_COMP | __GFP_NOWARN;
> +
> +	gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
> +
> +	return gfp;
>  }
>
>  static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 71d469c8e711..efa77ce87dcc 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -56,16 +56,6 @@ struct hstate hstates[HUGE_MAX_HSTATE];
>  #ifdef CONFIG_CMA
>  static struct cma *hugetlb_cma[MAX_NUMNODES];
>  static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
> -static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
> -{
> -	return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
> -				1 << order);
> -}
> -#else
> -static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
> -{
> -	return false;
> -}
>  #endif
>  static unsigned long hugetlb_cma_size __initdata;
>
> @@ -100,6 +90,17 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
>  		unsigned long start, unsigned long end);
>  static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
>
> +static void hugetlb_free_folio(struct folio *folio)
> +{
> +#ifdef CONFIG_CMA
> +	int nid = folio_nid(folio);
> +
> +	if (cma_free_folio(hugetlb_cma[nid], folio))
> +		return;
> +#endif
> +	folio_put(folio);
> +}
> +

It seems that we no longer use free_contig_range() to free gigantic
folios from alloc_contig_range(). Will it work? Or did I miss anything?


Best Regards,
Yan, Zi
Yu Zhao Aug. 16, 2024, 4:02 p.m. UTC | #2
On Fri, Aug 16, 2024 at 9:23 AM Zi Yan <ziy@nvidia.com> wrote:
>
> On 13 Aug 2024, at 23:54, Yu Zhao wrote:
>
> > Use __GFP_COMP for gigantic folios to greatly reduce not only the
> > amount of code but also the allocation and free time.
> >
> > LOC (approximately): +60, -240
> >
> > Allocate and free 500 1GB hugeTLB memory without HVO by:
> >   time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
> >   time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
> >
> >        Before  After
> > Alloc  ~13s    ~10s
> > Free   ~15s    <1s
> >
> > The above magnitude generally holds for multiple x86 and arm64 CPU
> > models.
> >
> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > Reported-by: Frank van der Linden <fvdl@google.com>
> > ---
> >  include/linux/hugetlb.h |   9 +-
> >  mm/hugetlb.c            | 293 ++++++++--------------------------------
> >  2 files changed, 62 insertions(+), 240 deletions(-)
> >
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index 3100a52ceb73..98c47c394b89 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -896,10 +896,11 @@ static inline bool hugepage_movable_supported(struct hstate *h)
> >  /* Movability of hugepages depends on migration support. */
> >  static inline gfp_t htlb_alloc_mask(struct hstate *h)
> >  {
> > -     if (hugepage_movable_supported(h))
> > -             return GFP_HIGHUSER_MOVABLE;
> > -     else
> > -             return GFP_HIGHUSER;
> > +     gfp_t gfp = __GFP_COMP | __GFP_NOWARN;
> > +
> > +     gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
> > +
> > +     return gfp;
> >  }
> >
> >  static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 71d469c8e711..efa77ce87dcc 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -56,16 +56,6 @@ struct hstate hstates[HUGE_MAX_HSTATE];
> >  #ifdef CONFIG_CMA
> >  static struct cma *hugetlb_cma[MAX_NUMNODES];
> >  static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
> > -static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
> > -{
> > -     return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
> > -                             1 << order);
> > -}
> > -#else
> > -static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
> > -{
> > -     return false;
> > -}
> >  #endif
> >  static unsigned long hugetlb_cma_size __initdata;
> >
> > @@ -100,6 +90,17 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
> >               unsigned long start, unsigned long end);
> >  static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
> >
> > +static void hugetlb_free_folio(struct folio *folio)
> > +{
> > +#ifdef CONFIG_CMA
> > +     int nid = folio_nid(folio);
> > +
> > +     if (cma_free_folio(hugetlb_cma[nid], folio))
> > +             return;
> > +#endif
> > +     folio_put(folio);
> > +}
> > +
>
> It seems that we no longer use free_contig_range() to free gigantic
> folios from alloc_contig_range().

We switched to two pairs of extern (to the allocator) APIs in this patch:
  folio_alloc_gigantic()
  folio_put()
and
  cma_alloc_folio()
  cma_free_folio()

> Will it work? Or did I miss anything?

alloc_contig_range and free_contig_range() also works with __GFP_COMP
/ large folios, but this pair is internal (to the allocator) and
shouldn't be used directly except to implement external APIs like
above.
Zi Yan Aug. 16, 2024, 4:30 p.m. UTC | #3
On 16 Aug 2024, at 12:02, Yu Zhao wrote:

> On Fri, Aug 16, 2024 at 9:23 AM Zi Yan <ziy@nvidia.com> wrote:
>>
>> On 13 Aug 2024, at 23:54, Yu Zhao wrote:
>>
>>> Use __GFP_COMP for gigantic folios to greatly reduce not only the
>>> amount of code but also the allocation and free time.
>>>
>>> LOC (approximately): +60, -240
>>>
>>> Allocate and free 500 1GB hugeTLB memory without HVO by:
>>>   time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
>>>   time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
>>>
>>>        Before  After
>>> Alloc  ~13s    ~10s
>>> Free   ~15s    <1s
>>>
>>> The above magnitude generally holds for multiple x86 and arm64 CPU
>>> models.
>>>
>>> Signed-off-by: Yu Zhao <yuzhao@google.com>
>>> Reported-by: Frank van der Linden <fvdl@google.com>
>>> ---
>>>  include/linux/hugetlb.h |   9 +-
>>>  mm/hugetlb.c            | 293 ++++++++--------------------------------
>>>  2 files changed, 62 insertions(+), 240 deletions(-)
>>>
>>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>>> index 3100a52ceb73..98c47c394b89 100644
>>> --- a/include/linux/hugetlb.h
>>> +++ b/include/linux/hugetlb.h
>>> @@ -896,10 +896,11 @@ static inline bool hugepage_movable_supported(struct hstate *h)
>>>  /* Movability of hugepages depends on migration support. */
>>>  static inline gfp_t htlb_alloc_mask(struct hstate *h)
>>>  {
>>> -     if (hugepage_movable_supported(h))
>>> -             return GFP_HIGHUSER_MOVABLE;
>>> -     else
>>> -             return GFP_HIGHUSER;
>>> +     gfp_t gfp = __GFP_COMP | __GFP_NOWARN;
>>> +
>>> +     gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
>>> +
>>> +     return gfp;
>>>  }
>>>
>>>  static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
>>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>>> index 71d469c8e711..efa77ce87dcc 100644
>>> --- a/mm/hugetlb.c
>>> +++ b/mm/hugetlb.c
>>> @@ -56,16 +56,6 @@ struct hstate hstates[HUGE_MAX_HSTATE];
>>>  #ifdef CONFIG_CMA
>>>  static struct cma *hugetlb_cma[MAX_NUMNODES];
>>>  static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
>>> -static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
>>> -{
>>> -     return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
>>> -                             1 << order);
>>> -}
>>> -#else
>>> -static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
>>> -{
>>> -     return false;
>>> -}
>>>  #endif
>>>  static unsigned long hugetlb_cma_size __initdata;
>>>
>>> @@ -100,6 +90,17 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
>>>               unsigned long start, unsigned long end);
>>>  static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
>>>
>>> +static void hugetlb_free_folio(struct folio *folio)
>>> +{
>>> +#ifdef CONFIG_CMA
>>> +     int nid = folio_nid(folio);
>>> +
>>> +     if (cma_free_folio(hugetlb_cma[nid], folio))
>>> +             return;
>>> +#endif
>>> +     folio_put(folio);
>>> +}
>>> +
>>
>> It seems that we no longer use free_contig_range() to free gigantic
>> folios from alloc_contig_range().
>
> We switched to two pairs of extern (to the allocator) APIs in this patch:
>   folio_alloc_gigantic()
>   folio_put()
> and
>   cma_alloc_folio()
>   cma_free_folio()
>
>> Will it work? Or did I miss anything?
>
> alloc_contig_range and free_contig_range() also works with __GFP_COMP
> / large folios, but this pair is internal (to the allocator) and
> shouldn't be used directly except to implement external APIs like
> above.

Oh, I missed split_large_buddy() addition to free_one_page() in patch 1.
That handles >MAX_ORDER page freeing. It also makes sure the pageblocks
of a free cross-pageblock gigantic hugetlb page go back to the right
free lists when each pageblock might have different migratetypes,
since the page is freed at pageblock granularity.

The whole series looks good to me.

Acked-by: Zi Yan <ziy@nvidia.com>

Best Regards,
Yan, Zi
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3100a52ceb73..98c47c394b89 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -896,10 +896,11 @@  static inline bool hugepage_movable_supported(struct hstate *h)
 /* Movability of hugepages depends on migration support. */
 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 {
-	if (hugepage_movable_supported(h))
-		return GFP_HIGHUSER_MOVABLE;
-	else
-		return GFP_HIGHUSER;
+	gfp_t gfp = __GFP_COMP | __GFP_NOWARN;
+
+	gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
+
+	return gfp;
 }
 
 static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 71d469c8e711..efa77ce87dcc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -56,16 +56,6 @@  struct hstate hstates[HUGE_MAX_HSTATE];
 #ifdef CONFIG_CMA
 static struct cma *hugetlb_cma[MAX_NUMNODES];
 static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
-static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
-{
-	return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
-				1 << order);
-}
-#else
-static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
-{
-	return false;
-}
 #endif
 static unsigned long hugetlb_cma_size __initdata;
 
@@ -100,6 +90,17 @@  static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end);
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
 
+static void hugetlb_free_folio(struct folio *folio)
+{
+#ifdef CONFIG_CMA
+	int nid = folio_nid(folio);
+
+	if (cma_free_folio(hugetlb_cma[nid], folio))
+		return;
+#endif
+	folio_put(folio);
+}
+
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
 	if (spool->count)
@@ -1512,95 +1513,54 @@  static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 		nr_nodes--)
 
-/* used to demote non-gigantic_huge pages as well */
-static void __destroy_compound_gigantic_folio(struct folio *folio,
-					unsigned int order, bool demote)
-{
-	int i;
-	int nr_pages = 1 << order;
-	struct page *p;
-
-	atomic_set(&folio->_entire_mapcount, 0);
-	atomic_set(&folio->_large_mapcount, 0);
-	atomic_set(&folio->_pincount, 0);
-
-	for (i = 1; i < nr_pages; i++) {
-		p = folio_page(folio, i);
-		p->flags &= ~PAGE_FLAGS_CHECK_AT_FREE;
-		p->mapping = NULL;
-		clear_compound_head(p);
-		if (!demote)
-			set_page_refcounted(p);
-	}
-
-	__folio_clear_head(folio);
-}
-
-static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
-					unsigned int order)
-{
-	__destroy_compound_gigantic_folio(folio, order, true);
-}
-
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static void destroy_compound_gigantic_folio(struct folio *folio,
-					unsigned int order)
-{
-	__destroy_compound_gigantic_folio(folio, order, false);
-}
-
-static void free_gigantic_folio(struct folio *folio, unsigned int order)
-{
-	/*
-	 * If the page isn't allocated using the cma allocator,
-	 * cma_release() returns false.
-	 */
-#ifdef CONFIG_CMA
-	int nid = folio_nid(folio);
-
-	if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
-		return;
-#endif
-
-	free_contig_range(folio_pfn(folio), 1 << order);
-}
-
 #ifdef CONFIG_CONTIG_ALLOC
 static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
-	struct page *page;
-	unsigned long nr_pages = pages_per_huge_page(h);
+	struct folio *folio;
+	int order = huge_page_order(h);
+	bool retried = false;
+
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
-
+retry:
+	folio = NULL;
 #ifdef CONFIG_CMA
 	{
 		int node;
 
-		if (hugetlb_cma[nid]) {
-			page = cma_alloc(hugetlb_cma[nid], nr_pages,
-					huge_page_order(h), true);
-			if (page)
-				return page_folio(page);
-		}
+		if (hugetlb_cma[nid])
+			folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask);
 
-		if (!(gfp_mask & __GFP_THISNODE)) {
+		if (!folio && !(gfp_mask & __GFP_THISNODE)) {
 			for_each_node_mask(node, *nodemask) {
 				if (node == nid || !hugetlb_cma[node])
 					continue;
 
-				page = cma_alloc(hugetlb_cma[node], nr_pages,
-						huge_page_order(h), true);
-				if (page)
-					return page_folio(page);
+				folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask);
+				if (folio)
+					break;
 			}
 		}
 	}
 #endif
+	if (!folio) {
+		folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask);
+		if (!folio)
+			return NULL;
+	}
 
-	page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
-	return page ? page_folio(page) : NULL;
+	if (folio_ref_freeze(folio, 1))
+		return folio;
+
+	pr_warn("HugeTLB: unexpected refcount on PFN %lu\n", folio_pfn(folio));
+	hugetlb_free_folio(folio);
+	if (!retried) {
+		retried = true;
+		goto retry;
+	}
+	return NULL;
 }
 
 #else /* !CONFIG_CONTIG_ALLOC */
@@ -1617,10 +1577,6 @@  static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 {
 	return NULL;
 }
-static inline void free_gigantic_folio(struct folio *folio,
-						unsigned int order) { }
-static inline void destroy_compound_gigantic_folio(struct folio *folio,
-						unsigned int order) { }
 #endif
 
 /*
@@ -1747,20 +1703,9 @@  static void __update_and_free_hugetlb_folio(struct hstate *h,
 		folio_clear_hugetlb_hwpoison(folio);
 
 	folio_ref_unfreeze(folio, 1);
-
-	/*
-	 * Non-gigantic pages demoted from CMA allocated gigantic pages
-	 * need to be given back to CMA in free_gigantic_folio.
-	 */
-	if (hstate_is_gigantic(h) ||
-	    hugetlb_cma_folio(folio, huge_page_order(h))) {
-		destroy_compound_gigantic_folio(folio, huge_page_order(h));
-		free_gigantic_folio(folio, huge_page_order(h));
-	} else {
-		INIT_LIST_HEAD(&folio->_deferred_list);
-		folio_clear_partially_mapped(folio);
-		folio_put(folio);
-	}
+	INIT_LIST_HEAD(&folio->_deferred_list);
+	folio_clear_partially_mapped(folio);
+	hugetlb_free_folio(folio);
 }
 
 /*
@@ -2033,95 +1978,6 @@  static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int ni
 	spin_unlock_irq(&hugetlb_lock);
 }
 
-static bool __prep_compound_gigantic_folio(struct folio *folio,
-					unsigned int order, bool demote)
-{
-	int i, j;
-	int nr_pages = 1 << order;
-	struct page *p;
-
-	__folio_clear_reserved(folio);
-	for (i = 0; i < nr_pages; i++) {
-		p = folio_page(folio, i);
-
-		/*
-		 * For gigantic hugepages allocated through bootmem at
-		 * boot, it's safer to be consistent with the not-gigantic
-		 * hugepages and clear the PG_reserved bit from all tail pages
-		 * too.  Otherwise drivers using get_user_pages() to access tail
-		 * pages may get the reference counting wrong if they see
-		 * PG_reserved set on a tail page (despite the head page not
-		 * having PG_reserved set).  Enforcing this consistency between
-		 * head and tail pages allows drivers to optimize away a check
-		 * on the head page when they need know if put_page() is needed
-		 * after get_user_pages().
-		 */
-		if (i != 0)	/* head page cleared above */
-			__ClearPageReserved(p);
-		/*
-		 * Subtle and very unlikely
-		 *
-		 * Gigantic 'page allocators' such as memblock or cma will
-		 * return a set of pages with each page ref counted.  We need
-		 * to turn this set of pages into a compound page with tail
-		 * page ref counts set to zero.  Code such as speculative page
-		 * cache adding could take a ref on a 'to be' tail page.
-		 * We need to respect any increased ref count, and only set
-		 * the ref count to zero if count is currently 1.  If count
-		 * is not 1, we return an error.  An error return indicates
-		 * the set of pages can not be converted to a gigantic page.
-		 * The caller who allocated the pages should then discard the
-		 * pages using the appropriate free interface.
-		 *
-		 * In the case of demote, the ref count will be zero.
-		 */
-		if (!demote) {
-			if (!page_ref_freeze(p, 1)) {
-				pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n");
-				goto out_error;
-			}
-		} else {
-			VM_BUG_ON_PAGE(page_count(p), p);
-		}
-		if (i != 0)
-			set_compound_head(p, &folio->page);
-	}
-	__folio_set_head(folio);
-	/* we rely on prep_new_hugetlb_folio to set the hugetlb flag */
-	folio_set_order(folio, order);
-	atomic_set(&folio->_entire_mapcount, -1);
-	atomic_set(&folio->_large_mapcount, -1);
-	atomic_set(&folio->_pincount, 0);
-	return true;
-
-out_error:
-	/* undo page modifications made above */
-	for (j = 0; j < i; j++) {
-		p = folio_page(folio, j);
-		if (j != 0)
-			clear_compound_head(p);
-		set_page_refcounted(p);
-	}
-	/* need to clear PG_reserved on remaining tail pages  */
-	for (; j < nr_pages; j++) {
-		p = folio_page(folio, j);
-		__ClearPageReserved(p);
-	}
-	return false;
-}
-
-static bool prep_compound_gigantic_folio(struct folio *folio,
-							unsigned int order)
-{
-	return __prep_compound_gigantic_folio(folio, order, false);
-}
-
-static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
-							unsigned int order)
-{
-	return __prep_compound_gigantic_folio(folio, order, true);
-}
-
 /*
  * Find and lock address space (mapping) in write mode.
  *
@@ -2160,7 +2016,6 @@  static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
 	 */
 	if (node_alloc_noretry && node_isset(nid, *node_alloc_noretry))
 		alloc_try_hard = false;
-	gfp_mask |= __GFP_COMP|__GFP_NOWARN;
 	if (alloc_try_hard)
 		gfp_mask |= __GFP_RETRY_MAYFAIL;
 	if (nid == NUMA_NO_NODE)
@@ -2207,48 +2062,16 @@  static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
 	return folio;
 }
 
-static struct folio *__alloc_fresh_hugetlb_folio(struct hstate *h,
-				gfp_t gfp_mask, int nid, nodemask_t *nmask,
-				nodemask_t *node_alloc_noretry)
-{
-	struct folio *folio;
-	bool retry = false;
-
-retry:
-	if (hstate_is_gigantic(h))
-		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
-	else
-		folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
-				nid, nmask, node_alloc_noretry);
-	if (!folio)
-		return NULL;
-
-	if (hstate_is_gigantic(h)) {
-		if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
-			/*
-			 * Rare failure to convert pages to compound page.
-			 * Free pages and try again - ONCE!
-			 */
-			free_gigantic_folio(folio, huge_page_order(h));
-			if (!retry) {
-				retry = true;
-				goto retry;
-			}
-			return NULL;
-		}
-	}
-
-	return folio;
-}
-
 static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask,
 		nodemask_t *node_alloc_noretry)
 {
 	struct folio *folio;
 
-	folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask,
-						node_alloc_noretry);
+	if (hstate_is_gigantic(h))
+		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
+	else
+		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry);
 	if (folio)
 		init_new_hugetlb_folio(h, folio);
 	return folio;
@@ -2266,7 +2089,10 @@  static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
 {
 	struct folio *folio;
 
-	folio = __alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	if (hstate_is_gigantic(h))
+		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
+	else
+		folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
 	if (!folio)
 		return NULL;
 
@@ -2550,9 +2376,8 @@  struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
 
 	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
 	if (mpol_is_preferred_many(mpol)) {
-		gfp_t gfp = gfp_mask | __GFP_NOWARN;
+		gfp_t gfp = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
 
-		gfp &=  ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
 		folio = alloc_surplus_hugetlb_folio(h, gfp, nid, nodemask);
 
 		/* Fallback to all nodes if page==NULL */
@@ -3334,6 +3159,7 @@  static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio,
 	for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) {
 		struct page *page = pfn_to_page(pfn);
 
+		__ClearPageReserved(folio_page(folio, pfn - head_pfn));
 		__init_single_page(page, pfn, zone, nid);
 		prep_compound_tail((struct page *)folio, pfn - head_pfn);
 		ret = page_ref_freeze(page, 1);
@@ -3950,21 +3776,16 @@  static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
 			continue;
 
 		list_del(&folio->lru);
-		/*
-		 * Use destroy_compound_hugetlb_folio_for_demote for all huge page
-		 * sizes as it will not ref count folios.
-		 */
-		destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(src));
+
+		split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
+		pgalloc_tag_split(&folio->page, 1 <<  huge_page_order(src));
 
 		for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) {
 			struct page *page = folio_page(folio, i);
 
-			if (hstate_is_gigantic(dst))
-				prep_compound_gigantic_folio_for_demote(page_folio(page),
-									dst->order);
-			else
-				prep_compound_page(page, dst->order);
-			set_page_private(page, 0);
+			page->mapping = NULL;
+			clear_compound_head(page);
+			prep_compound_page(page, dst->order);
 
 			init_new_hugetlb_folio(dst, page_folio(page));
 			list_add(&page->lru, &dst_list);