diff mbox series

[v4,3/8] hugetlb: perform vmemmap optimization on a list of pages

Message ID 20230918230202.254631-4-mike.kravetz@oracle.com (mailing list archive)
State New
Headers show
Series Batch hugetlb vmemmap modification operations | expand

Commit Message

Mike Kravetz Sept. 18, 2023, 11:01 p.m. UTC
When adding hugetlb pages to the pool, we first create a list of the
allocated pages before adding to the pool.  Pass this list of pages to a
new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization.

Due to significant differences in vmemmmap initialization for bootmem
allocated hugetlb pages, a new routine prep_and_add_bootmem_folios
is created.

We also modify the routine vmemmap_should_optimize() to check for pages
that are already optimized.  There are code paths that might request
vmemmap optimization twice and we want to make sure this is not
attempted.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 mm/hugetlb.c         | 50 +++++++++++++++++++++++++++++++++++++-------
 mm/hugetlb_vmemmap.c | 11 ++++++++++
 mm/hugetlb_vmemmap.h |  5 +++++
 3 files changed, 58 insertions(+), 8 deletions(-)

Comments

Muchun Song Sept. 19, 2023, 3:10 a.m. UTC | #1
On 2023/9/19 07:01, Mike Kravetz wrote:
> When adding hugetlb pages to the pool, we first create a list of the
> allocated pages before adding to the pool.  Pass this list of pages to a
> new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization.
>
> Due to significant differences in vmemmmap initialization for bootmem
> allocated hugetlb pages, a new routine prep_and_add_bootmem_folios
> is created.
>
> We also modify the routine vmemmap_should_optimize() to check for pages
> that are already optimized.  There are code paths that might request
> vmemmap optimization twice and we want to make sure this is not
> attempted.
>
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
> ---
>   mm/hugetlb.c         | 50 +++++++++++++++++++++++++++++++++++++-------
>   mm/hugetlb_vmemmap.c | 11 ++++++++++
>   mm/hugetlb_vmemmap.h |  5 +++++
>   3 files changed, 58 insertions(+), 8 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 8624286be273..d6f3db3c1313 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h,
>   {
>   	struct folio *folio, *tmp_f;
>   
> +	/*
> +	 * Send list for bulk vmemmap optimization processing
> +	 */

 From the kernel development document, one-line comment format is "/**/".

> +	hugetlb_vmemmap_optimize_folios(h, folio_list);
> +
>   	/*
>   	 * Add all new pool pages to free lists in one lock cycle
>   	 */
> @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
>   	prep_compound_head((struct page *)folio, huge_page_order(h));
>   }
>   
> +static void __init prep_and_add_bootmem_folios(struct hstate *h,
> +					struct list_head *folio_list)
> +{
> +	struct folio *folio, *tmp_f;
> +
> +	/*
> +	 * Send list for bulk vmemmap optimization processing
> +	 */
> +	hugetlb_vmemmap_optimize_folios(h, folio_list);
> +
> +	/*
> +	 * Add all new pool pages to free lists in one lock cycle
> +	 */
> +	spin_lock_irq(&hugetlb_lock);
> +	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
> +		if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
> +			/*
> +			 * If HVO fails, initialize all tail struct pages
> +			 * We do not worry about potential long lock hold
> +			 * time as this is early in boot and there should
> +			 * be no contention.
> +			 */
> +			hugetlb_folio_init_tail_vmemmap(folio,
> +					HUGETLB_VMEMMAP_RESERVE_PAGES,
> +					pages_per_huge_page(h));
> +		}
> +		__prep_account_new_huge_page(h, folio_nid(folio));
> +		enqueue_hugetlb_folio(h, folio);
> +	}
> +	spin_unlock_irq(&hugetlb_lock);
> +
> +	INIT_LIST_HEAD(folio_list);

I'm not sure what is the purpose of the reinitialization to list head?

> +}
> +
>   /*
>    * Put bootmem huge pages into the standard lists after mem_map is up.
>    * Note: This only applies to gigantic (order > MAX_ORDER) pages.
> @@ -3329,7 +3368,7 @@ static void __init gather_bootmem_prealloc(void)
>   		 * in this list.  If so, process each size separately.
>   		 */
>   		if (h != prev_h && prev_h != NULL)
> -			prep_and_add_allocated_folios(prev_h, &folio_list);
> +			prep_and_add_bootmem_folios(prev_h, &folio_list);
>   		prev_h = h;
>   
>   		VM_BUG_ON(!hstate_is_gigantic(h));
> @@ -3337,12 +3376,7 @@ static void __init gather_bootmem_prealloc(void)
>   
>   		hugetlb_folio_init_vmemmap(folio, h,
>   					   HUGETLB_VMEMMAP_RESERVE_PAGES);
> -		__prep_new_hugetlb_folio(h, folio);
> -		/* If HVO fails, initialize all tail struct pages */
> -		if (!HPageVmemmapOptimized(&folio->page))
> -			hugetlb_folio_init_tail_vmemmap(folio,
> -						HUGETLB_VMEMMAP_RESERVE_PAGES,
> -						pages_per_huge_page(h));
> +		init_new_hugetlb_folio(h, folio);
>   		list_add(&folio->lru, &folio_list);
>   
>   		/*
> @@ -3354,7 +3388,7 @@ static void __init gather_bootmem_prealloc(void)
>   		cond_resched();
>   	}
>   
> -	prep_and_add_allocated_folios(h, &folio_list);
> +	prep_and_add_bootmem_folios(h, &folio_list);
>   }
>   
>   static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 76682d1d79a7..4558b814ffab 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -483,6 +483,9 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
>   /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
>   static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
>   {
> +	if (HPageVmemmapOptimized((struct page *)head))
> +		return false;
> +
>   	if (!READ_ONCE(vmemmap_optimize_enabled))
>   		return false;
>   
> @@ -572,6 +575,14 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
>   		SetHPageVmemmapOptimized(head);
>   }
>   
> +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
> +{
> +	struct folio *folio;
> +
> +	list_for_each_entry(folio, folio_list, lru)
> +		hugetlb_vmemmap_optimize(h, &folio->page);
> +}
> +
>   static struct ctl_table hugetlb_vmemmap_sysctls[] = {
>   	{
>   		.procname	= "hugetlb_optimize_vmemmap",
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index 4573899855d7..c512e388dbb4 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -20,6 +20,7 @@
>   #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
>   int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
>   void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
> +void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
>   
>   static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
>   {
> @@ -48,6 +49,10 @@ static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page
>   {
>   }
>   
> +static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
> +{
> +}
> +
>   static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
>   {
>   	return 0;
Mike Kravetz Sept. 19, 2023, 8:49 p.m. UTC | #2
On 09/19/23 11:10, Muchun Song wrote:
> 
> 
> On 2023/9/19 07:01, Mike Kravetz wrote:
> > When adding hugetlb pages to the pool, we first create a list of the
> > allocated pages before adding to the pool.  Pass this list of pages to a
> > new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization.
> > 
> > Due to significant differences in vmemmmap initialization for bootmem
> > allocated hugetlb pages, a new routine prep_and_add_bootmem_folios
> > is created.
> > 
> > We also modify the routine vmemmap_should_optimize() to check for pages
> > that are already optimized.  There are code paths that might request
> > vmemmap optimization twice and we want to make sure this is not
> > attempted.
> > 
> > Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
> > ---
> >   mm/hugetlb.c         | 50 +++++++++++++++++++++++++++++++++++++-------
> >   mm/hugetlb_vmemmap.c | 11 ++++++++++
> >   mm/hugetlb_vmemmap.h |  5 +++++
> >   3 files changed, 58 insertions(+), 8 deletions(-)
> > 
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 8624286be273..d6f3db3c1313 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h,
> >   {
> >   	struct folio *folio, *tmp_f;
> > +	/*
> > +	 * Send list for bulk vmemmap optimization processing
> > +	 */
> 
> From the kernel development document, one-line comment format is "/**/".
> 

Will change the comments introduced here.

> > +	hugetlb_vmemmap_optimize_folios(h, folio_list);
> > +
> >   	/*
> >   	 * Add all new pool pages to free lists in one lock cycle
> >   	 */
> > @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
> >   	prep_compound_head((struct page *)folio, huge_page_order(h));
> >   }
> > +static void __init prep_and_add_bootmem_folios(struct hstate *h,
> > +					struct list_head *folio_list)
> > +{
> > +	struct folio *folio, *tmp_f;
> > +
> > +	/*
> > +	 * Send list for bulk vmemmap optimization processing
> > +	 */
> > +	hugetlb_vmemmap_optimize_folios(h, folio_list);
> > +
> > +	/*
> > +	 * Add all new pool pages to free lists in one lock cycle
> > +	 */
> > +	spin_lock_irq(&hugetlb_lock);
> > +	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
> > +		if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
> > +			/*
> > +			 * If HVO fails, initialize all tail struct pages
> > +			 * We do not worry about potential long lock hold
> > +			 * time as this is early in boot and there should
> > +			 * be no contention.
> > +			 */
> > +			hugetlb_folio_init_tail_vmemmap(folio,
> > +					HUGETLB_VMEMMAP_RESERVE_PAGES,
> > +					pages_per_huge_page(h));
> > +		}
> > +		__prep_account_new_huge_page(h, folio_nid(folio));
> > +		enqueue_hugetlb_folio(h, folio);
> > +	}
> > +	spin_unlock_irq(&hugetlb_lock);
> > +
> > +	INIT_LIST_HEAD(folio_list);
> 
> I'm not sure what is the purpose of the reinitialization to list head?
> 

There really is no purpose.  This was copied from
prep_and_add_allocated_folios which also has this unnecessary call.  It is
unnecessary as enqueue_hugetlb_folio() will do a list_move for each
folio on the list.  Therefore, at the end of the loop we KNOW the list
is empty.

I will remove here and in prep_and_add_allocated_folios.

Thanks,
Muchun Song Sept. 20, 2023, 3:05 a.m. UTC | #3
> On Sep 20, 2023, at 04:49, Mike Kravetz <mike.kravetz@oracle.com> wrote:
> 
> On 09/19/23 11:10, Muchun Song wrote:
>> 
>> 
>> On 2023/9/19 07:01, Mike Kravetz wrote:
>>> When adding hugetlb pages to the pool, we first create a list of the
>>> allocated pages before adding to the pool.  Pass this list of pages to a
>>> new routine hugetlb_vmemmap_optimize_folios() for vmemmap optimization.
>>> 
>>> Due to significant differences in vmemmmap initialization for bootmem
>>> allocated hugetlb pages, a new routine prep_and_add_bootmem_folios
>>> is created.
>>> 
>>> We also modify the routine vmemmap_should_optimize() to check for pages
>>> that are already optimized.  There are code paths that might request
>>> vmemmap optimization twice and we want to make sure this is not
>>> attempted.
>>> 
>>> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
>>> ---
>>>  mm/hugetlb.c         | 50 +++++++++++++++++++++++++++++++++++++-------
>>>  mm/hugetlb_vmemmap.c | 11 ++++++++++
>>>  mm/hugetlb_vmemmap.h |  5 +++++
>>>  3 files changed, 58 insertions(+), 8 deletions(-)
>>> 
>>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>>> index 8624286be273..d6f3db3c1313 100644
>>> --- a/mm/hugetlb.c
>>> +++ b/mm/hugetlb.c
>>> @@ -2269,6 +2269,11 @@ static void prep_and_add_allocated_folios(struct hstate *h,
>>>  {
>>>   struct folio *folio, *tmp_f;
>>> + /*
>>> +  * Send list for bulk vmemmap optimization processing
>>> +  */
>> 
>> From the kernel development document, one-line comment format is "/**/".
>> 
> 
> Will change the comments introduced here.

BTW, there are some places as well, please updates all, thanks.

> 
>>> + hugetlb_vmemmap_optimize_folios(h, folio_list);
>>> +
>>>   /*
>>>    * Add all new pool pages to free lists in one lock cycle
>>>    */
>>> @@ -3309,6 +3314,40 @@ static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
>>>   prep_compound_head((struct page *)folio, huge_page_order(h));
>>>  }
>>> +static void __init prep_and_add_bootmem_folios(struct hstate *h,
>>> + struct list_head *folio_list)
>>> +{
>>> + struct folio *folio, *tmp_f;
>>> +
>>> + /*
>>> +  * Send list for bulk vmemmap optimization processing
>>> +  */
>>> + hugetlb_vmemmap_optimize_folios(h, folio_list);
>>> +
>>> + /*
>>> +  * Add all new pool pages to free lists in one lock cycle
>>> +  */
>>> + spin_lock_irq(&hugetlb_lock);
>>> + list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
>>> + if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
>>> + /*
>>> +  * If HVO fails, initialize all tail struct pages
>>> +  * We do not worry about potential long lock hold
>>> +  * time as this is early in boot and there should
>>> +  * be no contention.
>>> +  */
>>> + hugetlb_folio_init_tail_vmemmap(folio,
>>> + HUGETLB_VMEMMAP_RESERVE_PAGES,
>>> + pages_per_huge_page(h));
>>> + }
>>> + __prep_account_new_huge_page(h, folio_nid(folio));
>>> + enqueue_hugetlb_folio(h, folio);
>>> + }
>>> + spin_unlock_irq(&hugetlb_lock);
>>> +
>>> + INIT_LIST_HEAD(folio_list);
>> 
>> I'm not sure what is the purpose of the reinitialization to list head?
>> 
> 
> There really is no purpose.  This was copied from
> prep_and_add_allocated_folios which also has this unnecessary call.  It is
> unnecessary as enqueue_hugetlb_folio() will do a list_move for each
> folio on the list.  Therefore, at the end of the loop we KNOW the list
> is empty.

Right.

> 
> I will remove here and in prep_and_add_allocated_folios.

Thanks.

> 
> Thanks,
> -- 
> Mike Kravetz
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8624286be273..d6f3db3c1313 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,6 +2269,11 @@  static void prep_and_add_allocated_folios(struct hstate *h,
 {
 	struct folio *folio, *tmp_f;
 
+	/*
+	 * Send list for bulk vmemmap optimization processing
+	 */
+	hugetlb_vmemmap_optimize_folios(h, folio_list);
+
 	/*
 	 * Add all new pool pages to free lists in one lock cycle
 	 */
@@ -3309,6 +3314,40 @@  static void __init hugetlb_folio_init_vmemmap(struct folio *folio,
 	prep_compound_head((struct page *)folio, huge_page_order(h));
 }
 
+static void __init prep_and_add_bootmem_folios(struct hstate *h,
+					struct list_head *folio_list)
+{
+	struct folio *folio, *tmp_f;
+
+	/*
+	 * Send list for bulk vmemmap optimization processing
+	 */
+	hugetlb_vmemmap_optimize_folios(h, folio_list);
+
+	/*
+	 * Add all new pool pages to free lists in one lock cycle
+	 */
+	spin_lock_irq(&hugetlb_lock);
+	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
+		if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
+			/*
+			 * If HVO fails, initialize all tail struct pages
+			 * We do not worry about potential long lock hold
+			 * time as this is early in boot and there should
+			 * be no contention.
+			 */
+			hugetlb_folio_init_tail_vmemmap(folio,
+					HUGETLB_VMEMMAP_RESERVE_PAGES,
+					pages_per_huge_page(h));
+		}
+		__prep_account_new_huge_page(h, folio_nid(folio));
+		enqueue_hugetlb_folio(h, folio);
+	}
+	spin_unlock_irq(&hugetlb_lock);
+
+	INIT_LIST_HEAD(folio_list);
+}
+
 /*
  * Put bootmem huge pages into the standard lists after mem_map is up.
  * Note: This only applies to gigantic (order > MAX_ORDER) pages.
@@ -3329,7 +3368,7 @@  static void __init gather_bootmem_prealloc(void)
 		 * in this list.  If so, process each size separately.
 		 */
 		if (h != prev_h && prev_h != NULL)
-			prep_and_add_allocated_folios(prev_h, &folio_list);
+			prep_and_add_bootmem_folios(prev_h, &folio_list);
 		prev_h = h;
 
 		VM_BUG_ON(!hstate_is_gigantic(h));
@@ -3337,12 +3376,7 @@  static void __init gather_bootmem_prealloc(void)
 
 		hugetlb_folio_init_vmemmap(folio, h,
 					   HUGETLB_VMEMMAP_RESERVE_PAGES);
-		__prep_new_hugetlb_folio(h, folio);
-		/* If HVO fails, initialize all tail struct pages */
-		if (!HPageVmemmapOptimized(&folio->page))
-			hugetlb_folio_init_tail_vmemmap(folio,
-						HUGETLB_VMEMMAP_RESERVE_PAGES,
-						pages_per_huge_page(h));
+		init_new_hugetlb_folio(h, folio);
 		list_add(&folio->lru, &folio_list);
 
 		/*
@@ -3354,7 +3388,7 @@  static void __init gather_bootmem_prealloc(void)
 		cond_resched();
 	}
 
-	prep_and_add_allocated_folios(h, &folio_list);
+	prep_and_add_bootmem_folios(h, &folio_list);
 }
 
 static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 76682d1d79a7..4558b814ffab 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -483,6 +483,9 @@  int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
 /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
 static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
 {
+	if (HPageVmemmapOptimized((struct page *)head))
+		return false;
+
 	if (!READ_ONCE(vmemmap_optimize_enabled))
 		return false;
 
@@ -572,6 +575,14 @@  void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
 		SetHPageVmemmapOptimized(head);
 }
 
+void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
+{
+	struct folio *folio;
+
+	list_for_each_entry(folio, folio_list, lru)
+		hugetlb_vmemmap_optimize(h, &folio->page);
+}
+
 static struct ctl_table hugetlb_vmemmap_sysctls[] = {
 	{
 		.procname	= "hugetlb_optimize_vmemmap",
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 4573899855d7..c512e388dbb4 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -20,6 +20,7 @@ 
 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
 void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
+void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
 
 static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
 {
@@ -48,6 +49,10 @@  static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page
 {
 }
 
+static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
+{
+}
+
 static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
 {
 	return 0;