diff mbox series

[3/3] mm/mm_init.c: not always search next deferred_init_pfn from very beginning

Message ID 20240531002613.5231-3-richard.weiyang@gmail.com (mailing list archive)
State New
Headers show
Series [1/3] mm/mm_init.c: get the highest zone directly | expand

Commit Message

Wei Yang May 31, 2024, 12:26 a.m. UTC
In function deferred_init_memmap(), we call
deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn.
But we always search it from the very beginning.

Since we save the index in i, we can leverage this to search from i next
time.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
---
 include/linux/memblock.h | 19 -------------------
 mm/mm_init.c             | 21 ++++++++++++---------
 2 files changed, 12 insertions(+), 28 deletions(-)

Comments

Mike Rapoport June 3, 2024, 6:27 a.m. UTC | #1
On Fri, May 31, 2024 at 12:26:13AM +0000, Wei Yang wrote:
> In function deferred_init_memmap(), we call
> deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn.
> But we always search it from the very beginning.
> 
> Since we save the index in i, we can leverage this to search from i next
> time.
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> ---
>  include/linux/memblock.h | 19 -------------------
>  mm/mm_init.c             | 21 ++++++++++++---------
>  2 files changed, 12 insertions(+), 28 deletions(-)
> 
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index 6cf18dc2b4d0..45cac33334c8 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
>  void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
>  				  unsigned long *out_spfn,
>  				  unsigned long *out_epfn);
> -/**
> - * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
> - * memblock areas
> - * @i: u64 used as loop variable
> - * @zone: zone in which all of the memory blocks reside
> - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
> - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
> - *
> - * Walks over free (memory && !reserved) areas of memblock in a specific
> - * zone. Available once memblock and an empty zone is initialized. The main
> - * assumption is that the zone start, end, and pgdat have been associated.
> - * This way we can use the zone to determine NUMA node, and if a given part
> - * of the memblock is valid for the zone.
> - */
> -#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end)	\
> -	for (i = 0,							\
> -	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end);	\
> -	     i != U64_MAX;					\
> -	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
>  
>  /**
>   * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index d05a4c38310f..ec17bf93f946 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -2023,18 +2023,21 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
>   * return false indicating there are no valid ranges left.
>   */
>  static bool __init
> -deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
> +deferred_init_mem_pfn_range_in_zone_from(u64 *i, struct zone *zone,

I don't think renaming this function is useful.
The comment above it, on the other hand, needs update.

>  				    unsigned long *spfn, unsigned long *epfn,
>  				    unsigned long first_init_pfn)
>  {
> -	u64 j;
> +	u64 j = *i;
> +
> +	if (j == 0)
> +		__next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
>  
>  	/*
>  	 * Start out by walking through the ranges in this zone that have
>  	 * already been initialized. We don't need to do anything with them
>  	 * so we just need to flush them out of the system.
>  	 */
> -	for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
> +	for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
>  		if (*epfn <= first_init_pfn)
>  			continue;
>  		if (*spfn < first_init_pfn)
> @@ -2106,9 +2109,9 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
>  {
>  	unsigned long spfn, epfn;
>  	struct zone *zone = arg;
> -	u64 i;
> +	u64 i = 0;
>  
> -	deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
> +	deferred_init_mem_pfn_range_in_zone_from(&i, zone, &spfn, &epfn, start_pfn);
>  
>  	/*
>  	 * Initialize and free pages in MAX_PAGE_ORDER sized increments so that
> @@ -2137,7 +2140,7 @@ static int __init deferred_init_memmap(void *data)
>  	unsigned long start = jiffies;
>  	struct zone *zone;
>  	int max_threads;
> -	u64 i;
> +	u64 i = 0;
>  
>  	/* Bind memory initialisation thread to a local node if possible */
>  	if (!cpumask_empty(cpumask))
> @@ -2168,7 +2171,7 @@ static int __init deferred_init_memmap(void *data)
>  
>  	max_threads = deferred_page_init_max_threads(cpumask);
>  
> -	while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
> +	while (deferred_init_mem_pfn_range_in_zone_from(&i, zone, &spfn, &epfn, first_init_pfn)) {
>  		first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
>  		struct padata_mt_job job = {
>  			.thread_fn   = deferred_init_memmap_chunk,
> @@ -2212,7 +2215,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
>  	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
>  	unsigned long spfn, epfn, flags;
>  	unsigned long nr_pages = 0;
> -	u64 i;
> +	u64 i = 0;
>  
>  	/* Only the last zone may have deferred pages */
>  	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
> @@ -2230,7 +2233,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
>  	}
>  
>  	/* If the zone is empty somebody else may have cleared out the zone */
> -	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
> +	if (!deferred_init_mem_pfn_range_in_zone_from(&i, zone, &spfn, &epfn,
>  						 first_deferred_pfn)) {
>  		pgdat->first_deferred_pfn = ULONG_MAX;
>  		pgdat_resize_unlock(pgdat, &flags);
> -- 
> 2.34.1
>
Wei Yang June 3, 2024, 8:16 p.m. UTC | #2
On Mon, Jun 03, 2024 at 09:27:10AM +0300, Mike Rapoport wrote:
>On Fri, May 31, 2024 at 12:26:13AM +0000, Wei Yang wrote:
>> In function deferred_init_memmap(), we call
>> deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn.
>> But we always search it from the very beginning.
>> 
>> Since we save the index in i, we can leverage this to search from i next
>> time.
>> 
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>> ---
>>  include/linux/memblock.h | 19 -------------------
>>  mm/mm_init.c             | 21 ++++++++++++---------
>>  2 files changed, 12 insertions(+), 28 deletions(-)
>> 
>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>> index 6cf18dc2b4d0..45cac33334c8 100644
>> --- a/include/linux/memblock.h
>> +++ b/include/linux/memblock.h
>> @@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
>>  void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
>>  				  unsigned long *out_spfn,
>>  				  unsigned long *out_epfn);
>> -/**
>> - * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
>> - * memblock areas
>> - * @i: u64 used as loop variable
>> - * @zone: zone in which all of the memory blocks reside
>> - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
>> - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
>> - *
>> - * Walks over free (memory && !reserved) areas of memblock in a specific
>> - * zone. Available once memblock and an empty zone is initialized. The main
>> - * assumption is that the zone start, end, and pgdat have been associated.
>> - * This way we can use the zone to determine NUMA node, and if a given part
>> - * of the memblock is valid for the zone.
>> - */
>> -#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end)	\
>> -	for (i = 0,							\
>> -	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end);	\
>> -	     i != U64_MAX;					\
>> -	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
>>  
>>  /**
>>   * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
>> diff --git a/mm/mm_init.c b/mm/mm_init.c
>> index d05a4c38310f..ec17bf93f946 100644
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -2023,18 +2023,21 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
>>   * return false indicating there are no valid ranges left.
>>   */
>>  static bool __init
>> -deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
>> +deferred_init_mem_pfn_range_in_zone_from(u64 *i, struct zone *zone,
>
>I don't think renaming this function is useful.
>The comment above it, on the other hand, needs update.
>

Not good at the description, below is my adjustment. Does it looks good to
you?

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 1780567b5f6b..bc065728261d 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2003,13 +2003,14 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
 }
 
 /*
- * This function is meant to pre-load the iterator for the zone init.
+ * This function is meant to pre-load the iterator for the zone init from a
+ * given point. If it specifies to start from 0, do the initial iteration.
  * Specifically it walks through the ranges until we are caught up to the
  * first_init_pfn value and exits there. If we never encounter the value we
  * return false indicating there are no valid ranges left.
  */
Mike Rapoport June 5, 2024, 5:29 a.m. UTC | #3
On Mon, Jun 03, 2024 at 08:16:11PM +0000, Wei Yang wrote:
> On Mon, Jun 03, 2024 at 09:27:10AM +0300, Mike Rapoport wrote:
> >On Fri, May 31, 2024 at 12:26:13AM +0000, Wei Yang wrote:
> >> In function deferred_init_memmap(), we call
> >> deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn.
> >> But we always search it from the very beginning.
> >> 
> >> Since we save the index in i, we can leverage this to search from i next
> >> time.
> >> 
> >> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> >> ---
> >>  include/linux/memblock.h | 19 -------------------
> >>  mm/mm_init.c             | 21 ++++++++++++---------
> >>  2 files changed, 12 insertions(+), 28 deletions(-)
> >> 
> >> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> >> index 6cf18dc2b4d0..45cac33334c8 100644
> >> --- a/include/linux/memblock.h
> >> +++ b/include/linux/memblock.h
> >> @@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
> >>  void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> >>  				  unsigned long *out_spfn,
> >>  				  unsigned long *out_epfn);
> >> -/**
> >> - * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
> >> - * memblock areas
> >> - * @i: u64 used as loop variable
> >> - * @zone: zone in which all of the memory blocks reside
> >> - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
> >> - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
> >> - *
> >> - * Walks over free (memory && !reserved) areas of memblock in a specific
> >> - * zone. Available once memblock and an empty zone is initialized. The main
> >> - * assumption is that the zone start, end, and pgdat have been associated.
> >> - * This way we can use the zone to determine NUMA node, and if a given part
> >> - * of the memblock is valid for the zone.
> >> - */
> >> -#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end)	\
> >> -	for (i = 0,							\
> >> -	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end);	\
> >> -	     i != U64_MAX;					\
> >> -	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
> >>  
> >>  /**
> >>   * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
> >> diff --git a/mm/mm_init.c b/mm/mm_init.c
> >> index d05a4c38310f..ec17bf93f946 100644
> >> --- a/mm/mm_init.c
> >> +++ b/mm/mm_init.c
> >> @@ -2023,18 +2023,21 @@ static unsigned long  __init deferred_init_pages(struct zone *zone,
> >>   * return false indicating there are no valid ranges left.
> >>   */
> >>  static bool __init
> >> -deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
> >> +deferred_init_mem_pfn_range_in_zone_from(u64 *i, struct zone *zone,
> >
> >I don't think renaming this function is useful.
> >The comment above it, on the other hand, needs update.
> >
> 
> Not good at the description, below is my adjustment. Does it looks good to
> you?
> 
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 1780567b5f6b..bc065728261d 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -2003,13 +2003,14 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
>  }
>  
>  /*
> - * This function is meant to pre-load the iterator for the zone init.
> + * This function is meant to pre-load the iterator for the zone init from a
> + * given point. If it specifies to start from 0, do the initial iteration.
>   * Specifically it walks through the ranges until we are caught up to the
>   * first_init_pfn value and exits there. If we never encounter the value we
>   * return false indicating there are no valid ranges left.
>   */

I've slightly updated it:

/*
 * This function is meant to pre-load the iterator for the zone init from
 * a given point.
 * Specifically it walks through the ranges starting with initial index
 * passed to it until we are caught up to the first_init_pfn value and
 * exits there. If we never encounter the value we return false indicating
 * there are no valid ranges left.
 */

> -- 
> Wei Yang
> Help you, Help me
diff mbox series

Patch

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6cf18dc2b4d0..45cac33334c8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -299,25 +299,6 @@  void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
 				  unsigned long *out_spfn,
 				  unsigned long *out_epfn);
-/**
- * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
- * memblock areas
- * @i: u64 used as loop variable
- * @zone: zone in which all of the memory blocks reside
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- *
- * Walks over free (memory && !reserved) areas of memblock in a specific
- * zone. Available once memblock and an empty zone is initialized. The main
- * assumption is that the zone start, end, and pgdat have been associated.
- * This way we can use the zone to determine NUMA node, and if a given part
- * of the memblock is valid for the zone.
- */
-#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end)	\
-	for (i = 0,							\
-	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end);	\
-	     i != U64_MAX;					\
-	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
 
 /**
  * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
diff --git a/mm/mm_init.c b/mm/mm_init.c
index d05a4c38310f..ec17bf93f946 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2023,18 +2023,21 @@  static unsigned long  __init deferred_init_pages(struct zone *zone,
  * return false indicating there are no valid ranges left.
  */
 static bool __init
-deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
+deferred_init_mem_pfn_range_in_zone_from(u64 *i, struct zone *zone,
 				    unsigned long *spfn, unsigned long *epfn,
 				    unsigned long first_init_pfn)
 {
-	u64 j;
+	u64 j = *i;
+
+	if (j == 0)
+		__next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
 
 	/*
 	 * Start out by walking through the ranges in this zone that have
 	 * already been initialized. We don't need to do anything with them
 	 * so we just need to flush them out of the system.
 	 */
-	for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
+	for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
 		if (*epfn <= first_init_pfn)
 			continue;
 		if (*spfn < first_init_pfn)
@@ -2106,9 +2109,9 @@  deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
 {
 	unsigned long spfn, epfn;
 	struct zone *zone = arg;
-	u64 i;
+	u64 i = 0;
 
-	deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
+	deferred_init_mem_pfn_range_in_zone_from(&i, zone, &spfn, &epfn, start_pfn);
 
 	/*
 	 * Initialize and free pages in MAX_PAGE_ORDER sized increments so that
@@ -2137,7 +2140,7 @@  static int __init deferred_init_memmap(void *data)
 	unsigned long start = jiffies;
 	struct zone *zone;
 	int max_threads;
-	u64 i;
+	u64 i = 0;
 
 	/* Bind memory initialisation thread to a local node if possible */
 	if (!cpumask_empty(cpumask))
@@ -2168,7 +2171,7 @@  static int __init deferred_init_memmap(void *data)
 
 	max_threads = deferred_page_init_max_threads(cpumask);
 
-	while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
+	while (deferred_init_mem_pfn_range_in_zone_from(&i, zone, &spfn, &epfn, first_init_pfn)) {
 		first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
 		struct padata_mt_job job = {
 			.thread_fn   = deferred_init_memmap_chunk,
@@ -2212,7 +2215,7 @@  bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
 	unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
 	unsigned long spfn, epfn, flags;
 	unsigned long nr_pages = 0;
-	u64 i;
+	u64 i = 0;
 
 	/* Only the last zone may have deferred pages */
 	if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
@@ -2230,7 +2233,7 @@  bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
 	}
 
 	/* If the zone is empty somebody else may have cleared out the zone */
-	if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
+	if (!deferred_init_mem_pfn_range_in_zone_from(&i, zone, &spfn, &epfn,
 						 first_deferred_pfn)) {
 		pgdat->first_deferred_pfn = ULONG_MAX;
 		pgdat_resize_unlock(pgdat, &flags);