diff mbox series

[RFC] page_ext: create page extension for all memblock memory regions

Message ID 20220509074330.4822-1-jaewon31.kim@samsung.com (mailing list archive)
State New
Headers show
Series [RFC] page_ext: create page extension for all memblock memory regions | expand

Commit Message

Jaewon Kim May 9, 2022, 7:43 a.m. UTC
The page extension can be prepared for each section. But if the first
page is not valid, the page extension for the section was not
initialized though there were many other valid pages within the section.

To support the page extension for all sections, refer to memblock memory
regions. If the page is valid use the nid from pfn_to_nid, otherwise use
the previous nid.

Also this pagech changed log to include total sections and a section
size.

i.e.
allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)

Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
---
 mm/page_ext.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

Comments

Jaewon Kim May 10, 2022, midnight UTC | #1
let me add Joonsoo Kim

2022년 5월 9일 (월) 오후 4:39, Jaewon Kim <jaewon31.kim@samsung.com>님이 작성:
>
> The page extension can be prepared for each section. But if the first
> page is not valid, the page extension for the section was not
> initialized though there were many other valid pages within the section.
>
> To support the page extension for all sections, refer to memblock memory
> regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> the previous nid.
>
> Also this pagech changed log to include total sections and a section
> size.
>
> i.e.
> allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
>
> Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
> ---
>  mm/page_ext.c | 42 ++++++++++++++++++++++--------------------
>  1 file changed, 22 insertions(+), 20 deletions(-)
>
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 2e66d934d63f..506d58b36a1d 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
>  void __init page_ext_init(void)
>  {
>         unsigned long pfn;
> -       int nid;
> +       int nid = 0;
> +       struct memblock_region *rgn;
> +       int nr_section = 0;
> +       unsigned long next_section_pfn = 0;
>
>         if (!invoke_need_callbacks())
>                 return;
>
> -       for_each_node_state(nid, N_MEMORY) {
> +       /*
> +        * iterate each memblock memory region and do not skip a section having
> +        * !pfn_valid(pfn)
> +        */
> +       for_each_mem_region(rgn) {
>                 unsigned long start_pfn, end_pfn;
>
> -               start_pfn = node_start_pfn(nid);
> -               end_pfn = node_end_pfn(nid);
> -               /*
> -                * start_pfn and end_pfn may not be aligned to SECTION and the
> -                * page->flags of out of node pages are not initialized.  So we
> -                * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> -                */
> +               start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> +               end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> +
> +               if (start_pfn < next_section_pfn)
> +                       start_pfn = next_section_pfn;
> +
>                 for (pfn = start_pfn; pfn < end_pfn;
>                         pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
>
> -                       if (!pfn_valid(pfn))
> -                               continue;
> -                       /*
> -                        * Nodes's pfns can be overlapping.
> -                        * We know some arch can have a nodes layout such as
> -                        * -------------pfn-------------->
> -                        * N0 | N1 | N2 | N0 | N1 | N2|....
> -                        */
> -                       if (pfn_to_nid(pfn) != nid)
> -                               continue;
> +                       if (pfn_valid(pfn))
> +                               nid = pfn_to_nid(pfn);
> +                       nr_section++;
>                         if (init_section_page_ext(pfn, nid))
>                                 goto oom;
>                         cond_resched();
>                 }
> +               next_section_pfn = pfn;
>         }
> +
>         hotplug_memory_notifier(page_ext_callback, 0);
> -       pr_info("allocated %ld bytes of page_ext\n", total_usage);
> +       pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> +               total_usage, nr_section, (1 << SECTION_SIZE_BITS));
>         invoke_init_callbacks();
>         return;
>
> --
> 2.17.1
>
Jaewon Kim May 17, 2022, 12:01 a.m. UTC | #2
Hello guys, could look into this patch?

2022년 5월 10일 (화) 오전 9:00, Jaewon Kim <jaewon31.kim@gmail.com>님이 작성:
>
> let me add Joonsoo Kim
>
> 2022년 5월 9일 (월) 오후 4:39, Jaewon Kim <jaewon31.kim@samsung.com>님이 작성:
> >
> > The page extension can be prepared for each section. But if the first
> > page is not valid, the page extension for the section was not
> > initialized though there were many other valid pages within the section.
> >
> > To support the page extension for all sections, refer to memblock memory
> > regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> > the previous nid.
> >
> > Also this pagech changed log to include total sections and a section
> > size.
> >
> > i.e.
> > allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
> >
> > Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
> > ---
> >  mm/page_ext.c | 42 ++++++++++++++++++++++--------------------
> >  1 file changed, 22 insertions(+), 20 deletions(-)
> >
> > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > index 2e66d934d63f..506d58b36a1d 100644
> > --- a/mm/page_ext.c
> > +++ b/mm/page_ext.c
> > @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
> >  void __init page_ext_init(void)
> >  {
> >         unsigned long pfn;
> > -       int nid;
> > +       int nid = 0;
> > +       struct memblock_region *rgn;
> > +       int nr_section = 0;
> > +       unsigned long next_section_pfn = 0;
> >
> >         if (!invoke_need_callbacks())
> >                 return;
> >
> > -       for_each_node_state(nid, N_MEMORY) {
> > +       /*
> > +        * iterate each memblock memory region and do not skip a section having
> > +        * !pfn_valid(pfn)
> > +        */
> > +       for_each_mem_region(rgn) {
> >                 unsigned long start_pfn, end_pfn;
> >
> > -               start_pfn = node_start_pfn(nid);
> > -               end_pfn = node_end_pfn(nid);
> > -               /*
> > -                * start_pfn and end_pfn may not be aligned to SECTION and the
> > -                * page->flags of out of node pages are not initialized.  So we
> > -                * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> > -                */
> > +               start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> > +               end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> > +
> > +               if (start_pfn < next_section_pfn)
> > +                       start_pfn = next_section_pfn;
> > +
> >                 for (pfn = start_pfn; pfn < end_pfn;
> >                         pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
> >
> > -                       if (!pfn_valid(pfn))
> > -                               continue;
> > -                       /*
> > -                        * Nodes's pfns can be overlapping.
> > -                        * We know some arch can have a nodes layout such as
> > -                        * -------------pfn-------------->
> > -                        * N0 | N1 | N2 | N0 | N1 | N2|....
> > -                        */
> > -                       if (pfn_to_nid(pfn) != nid)
> > -                               continue;
> > +                       if (pfn_valid(pfn))
> > +                               nid = pfn_to_nid(pfn);
> > +                       nr_section++;
> >                         if (init_section_page_ext(pfn, nid))
> >                                 goto oom;
> >                         cond_resched();
> >                 }
> > +               next_section_pfn = pfn;
> >         }
> > +
> >         hotplug_memory_notifier(page_ext_callback, 0);
> > -       pr_info("allocated %ld bytes of page_ext\n", total_usage);
> > +       pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> > +               total_usage, nr_section, (1 << SECTION_SIZE_BITS));
> >         invoke_init_callbacks();
> >         return;
> >
> > --
> > 2.17.1
> >
Andrew Morton May 17, 2022, 12:33 a.m. UTC | #3
On Mon,  9 May 2022 16:43:30 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote:

> The page extension can be prepared for each section. But if the first
> page is not valid, the page extension for the section was not
> initialized though there were many other valid pages within the section.
> 
> To support the page extension for all sections, refer to memblock memory
> regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> the previous nid.
> 
> Also this pagech changed log to include total sections and a section
> size.
> 
> i.e.
> allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)

Cc Joonsoo, who wrote this code.
Cc Mike, for memblock.

Thanks.

> 
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 2e66d934d63f..506d58b36a1d 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
>  void __init page_ext_init(void)
>  {
>  	unsigned long pfn;
> -	int nid;
> +	int nid = 0;
> +	struct memblock_region *rgn;
> +	int nr_section = 0;
> +	unsigned long next_section_pfn = 0;
>  
>  	if (!invoke_need_callbacks())
>  		return;
>  
> -	for_each_node_state(nid, N_MEMORY) {
> +	/*
> +	 * iterate each memblock memory region and do not skip a section having
> +	 * !pfn_valid(pfn)
> +	 */
> +	for_each_mem_region(rgn) {
>  		unsigned long start_pfn, end_pfn;
>  
> -		start_pfn = node_start_pfn(nid);
> -		end_pfn = node_end_pfn(nid);
> -		/*
> -		 * start_pfn and end_pfn may not be aligned to SECTION and the
> -		 * page->flags of out of node pages are not initialized.  So we
> -		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> -		 */
> +		start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> +		end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> +
> +		if (start_pfn < next_section_pfn)
> +			start_pfn = next_section_pfn;
> +
>  		for (pfn = start_pfn; pfn < end_pfn;
>  			pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
>  
> -			if (!pfn_valid(pfn))
> -				continue;
> -			/*
> -			 * Nodes's pfns can be overlapping.
> -			 * We know some arch can have a nodes layout such as
> -			 * -------------pfn-------------->
> -			 * N0 | N1 | N2 | N0 | N1 | N2|....
> -			 */
> -			if (pfn_to_nid(pfn) != nid)
> -				continue;
> +			if (pfn_valid(pfn))
> +				nid = pfn_to_nid(pfn);
> +			nr_section++;
>  			if (init_section_page_ext(pfn, nid))
>  				goto oom;
>  			cond_resched();
>  		}
> +		next_section_pfn = pfn;
>  	}
> +
>  	hotplug_memory_notifier(page_ext_callback, 0);
> -	pr_info("allocated %ld bytes of page_ext\n", total_usage);
> +	pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> +		total_usage, nr_section, (1 << SECTION_SIZE_BITS));
>  	invoke_init_callbacks();
>  	return;
>  
> -- 
> 2.17.1
>
Mike Rapoport May 17, 2022, 8:25 a.m. UTC | #4
On Mon, May 16, 2022 at 05:33:21PM -0700, Andrew Morton wrote:
> On Mon,  9 May 2022 16:43:30 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote:
> 
> > The page extension can be prepared for each section. But if the first
> > page is not valid, the page extension for the section was not
> > initialized though there were many other valid pages within the section.

What do you mean by "first page [in a section] is not valid"?
In recent kernels all struct pages in any section should be valid and
properly initialized.
 
> > To support the page extension for all sections, refer to memblock memory
> > regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> > the previous nid.
> > 
> > Also this pagech changed log to include total sections and a section
> > size.
> > 
> > i.e.
> > allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
> 
> Cc Joonsoo, who wrote this code.
> Cc Mike, for memblock.
> 
> Thanks.
> 
> > 
> > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > index 2e66d934d63f..506d58b36a1d 100644
> > --- a/mm/page_ext.c
> > +++ b/mm/page_ext.c
> > @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
> >  void __init page_ext_init(void)
> >  {
> >  	unsigned long pfn;
> > -	int nid;
> > +	int nid = 0;
> > +	struct memblock_region *rgn;
> > +	int nr_section = 0;
> > +	unsigned long next_section_pfn = 0;
> >  
> >  	if (!invoke_need_callbacks())
> >  		return;
> >  
> > -	for_each_node_state(nid, N_MEMORY) {
> > +	/*
> > +	 * iterate each memblock memory region and do not skip a section having
> > +	 * !pfn_valid(pfn)
> > +	 */
> > +	for_each_mem_region(rgn) {
> >  		unsigned long start_pfn, end_pfn;
> >  
> > -		start_pfn = node_start_pfn(nid);
> > -		end_pfn = node_end_pfn(nid);
> > -		/*
> > -		 * start_pfn and end_pfn may not be aligned to SECTION and the
> > -		 * page->flags of out of node pages are not initialized.  So we
> > -		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> > -		 */
> > +		start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> > +		end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> > +
> > +		if (start_pfn < next_section_pfn)
> > +			start_pfn = next_section_pfn;
> > +
> >  		for (pfn = start_pfn; pfn < end_pfn;
> >  			pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
> >  
> > -			if (!pfn_valid(pfn))
> > -				continue;
> > -			/*
> > -			 * Nodes's pfns can be overlapping.
> > -			 * We know some arch can have a nodes layout such as
> > -			 * -------------pfn-------------->
> > -			 * N0 | N1 | N2 | N0 | N1 | N2|....
> > -			 */
> > -			if (pfn_to_nid(pfn) != nid)
> > -				continue;
> > +			if (pfn_valid(pfn))
> > +				nid = pfn_to_nid(pfn);
> > +			nr_section++;
> >  			if (init_section_page_ext(pfn, nid))
> >  				goto oom;
> >  			cond_resched();
> >  		}
> > +		next_section_pfn = pfn;
> >  	}
> > +
> >  	hotplug_memory_notifier(page_ext_callback, 0);
> > -	pr_info("allocated %ld bytes of page_ext\n", total_usage);
> > +	pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> > +		total_usage, nr_section, (1 << SECTION_SIZE_BITS));
> >  	invoke_init_callbacks();
> >  	return;
> >  
> > -- 
> > 2.17.1
> >
Jaewon Kim May 17, 2022, 11:38 a.m. UTC | #5
Hello Mike Rapoport
Thank you for your comment.

Oh really? Could you point out the code or the commit regarding 'all
struct pages in any section should be valid and
properly initialized' ?

Actually I am using v5.10 based source tree on an arm64 device.
I tried to look up and found the following commit in v5.16-rc1, did
you mean this?
3de360c3fdb3 arm64/mm: drop HAVE_ARCH_PFN_VALID

I guess memblock_is_memory code in pfn_valid in arch/arm64/mm/init.c, v5.10
might affect the page_ext_init.

Thank you

2022년 5월 17일 (화) 오후 5:25, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
>
> On Mon, May 16, 2022 at 05:33:21PM -0700, Andrew Morton wrote:
> > On Mon,  9 May 2022 16:43:30 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote:
> >
> > > The page extension can be prepared for each section. But if the first
> > > page is not valid, the page extension for the section was not
> > > initialized though there were many other valid pages within the section.
>
> What do you mean by "first page [in a section] is not valid"?
> In recent kernels all struct pages in any section should be valid and
> properly initialized.
>
> > > To support the page extension for all sections, refer to memblock memory
> > > regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> > > the previous nid.
> > >
> > > Also this pagech changed log to include total sections and a section
> > > size.
> > >
> > > i.e.
> > > allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
> >
> > Cc Joonsoo, who wrote this code.
> > Cc Mike, for memblock.
> >
> > Thanks.
> >
> > >
> > > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > > index 2e66d934d63f..506d58b36a1d 100644
> > > --- a/mm/page_ext.c
> > > +++ b/mm/page_ext.c
> > > @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
> > >  void __init page_ext_init(void)
> > >  {
> > >     unsigned long pfn;
> > > -   int nid;
> > > +   int nid = 0;
> > > +   struct memblock_region *rgn;
> > > +   int nr_section = 0;
> > > +   unsigned long next_section_pfn = 0;
> > >
> > >     if (!invoke_need_callbacks())
> > >             return;
> > >
> > > -   for_each_node_state(nid, N_MEMORY) {
> > > +   /*
> > > +    * iterate each memblock memory region and do not skip a section having
> > > +    * !pfn_valid(pfn)
> > > +    */
> > > +   for_each_mem_region(rgn) {
> > >             unsigned long start_pfn, end_pfn;
> > >
> > > -           start_pfn = node_start_pfn(nid);
> > > -           end_pfn = node_end_pfn(nid);
> > > -           /*
> > > -            * start_pfn and end_pfn may not be aligned to SECTION and the
> > > -            * page->flags of out of node pages are not initialized.  So we
> > > -            * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> > > -            */
> > > +           start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> > > +           end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> > > +
> > > +           if (start_pfn < next_section_pfn)
> > > +                   start_pfn = next_section_pfn;
> > > +
> > >             for (pfn = start_pfn; pfn < end_pfn;
> > >                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
> > >
> > > -                   if (!pfn_valid(pfn))
> > > -                           continue;
> > > -                   /*
> > > -                    * Nodes's pfns can be overlapping.
> > > -                    * We know some arch can have a nodes layout such as
> > > -                    * -------------pfn-------------->
> > > -                    * N0 | N1 | N2 | N0 | N1 | N2|....
> > > -                    */
> > > -                   if (pfn_to_nid(pfn) != nid)
> > > -                           continue;
> > > +                   if (pfn_valid(pfn))
> > > +                           nid = pfn_to_nid(pfn);
> > > +                   nr_section++;
> > >                     if (init_section_page_ext(pfn, nid))
> > >                             goto oom;
> > >                     cond_resched();
> > >             }
> > > +           next_section_pfn = pfn;
> > >     }
> > > +
> > >     hotplug_memory_notifier(page_ext_callback, 0);
> > > -   pr_info("allocated %ld bytes of page_ext\n", total_usage);
> > > +   pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> > > +           total_usage, nr_section, (1 << SECTION_SIZE_BITS));
> > >     invoke_init_callbacks();
> > >     return;
> > >
> > > --
> > > 2.17.1
> > >
>
> --
> Sincerely yours,
> Mike.
Mike Rapoport May 17, 2022, 12:55 p.m. UTC | #6
On Tue, May 17, 2022 at 08:38:18PM +0900, Jaewon Kim wrote:
> Hello Mike Rapoport
> Thank you for your comment.
> 
> Oh really? Could you point out the code or the commit regarding 'all
> struct pages in any section should be valid and
> properly initialized' ?

There were several commits that refactored the memory map initialization,
freeing of the unused memory map and abuse of pfn_valid() as a substitute
of "is memory valid" semantics.

> Actually I am using v5.10 based source tree on an arm64 device.

Then most probably your change is not relevant for the upstream kernel.
Did you observe any issues with page_ext initialization on v5.18-rcN
kernels?

> I tried to look up and found the following commit in v5.16-rc1, did
> you mean this?
> 3de360c3fdb3 arm64/mm: drop HAVE_ARCH_PFN_VALID

Yes, this is one of those commits.
 
> I guess memblock_is_memory code in pfn_valid in arch/arm64/mm/init.c, v5.10
> might affect the page_ext_init.

Yes. In 5.10 the pfn_valid() test in page_ext_init() will skip an entire
section if the first pfn in that section is not memory that can be mapped
in the linear map.

But again, this should be fixed in the latest kernels.
 
> Thank you
> 
> 2022년 5월 17일 (화) 오후 5:25, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
> >
> > On Mon, May 16, 2022 at 05:33:21PM -0700, Andrew Morton wrote:
> > > On Mon,  9 May 2022 16:43:30 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote:
> > >
> > > > The page extension can be prepared for each section. But if the first
> > > > page is not valid, the page extension for the section was not
> > > > initialized though there were many other valid pages within the section.
> >
> > What do you mean by "first page [in a section] is not valid"?
> > In recent kernels all struct pages in any section should be valid and
> > properly initialized.
> >
> > > > To support the page extension for all sections, refer to memblock memory
> > > > regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> > > > the previous nid.
> > > >
> > > > Also this pagech changed log to include total sections and a section
> > > > size.
> > > >
> > > > i.e.
> > > > allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
> > >
> > > Cc Joonsoo, who wrote this code.
> > > Cc Mike, for memblock.
> > >
> > > Thanks.
> > >
> > > >
> > > > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > > > index 2e66d934d63f..506d58b36a1d 100644
> > > > --- a/mm/page_ext.c
> > > > +++ b/mm/page_ext.c
> > > > @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
> > > >  void __init page_ext_init(void)
> > > >  {
> > > >     unsigned long pfn;
> > > > -   int nid;
> > > > +   int nid = 0;
> > > > +   struct memblock_region *rgn;
> > > > +   int nr_section = 0;
> > > > +   unsigned long next_section_pfn = 0;
> > > >
> > > >     if (!invoke_need_callbacks())
> > > >             return;
> > > >
> > > > -   for_each_node_state(nid, N_MEMORY) {
> > > > +   /*
> > > > +    * iterate each memblock memory region and do not skip a section having
> > > > +    * !pfn_valid(pfn)
> > > > +    */
> > > > +   for_each_mem_region(rgn) {
> > > >             unsigned long start_pfn, end_pfn;
> > > >
> > > > -           start_pfn = node_start_pfn(nid);
> > > > -           end_pfn = node_end_pfn(nid);
> > > > -           /*
> > > > -            * start_pfn and end_pfn may not be aligned to SECTION and the
> > > > -            * page->flags of out of node pages are not initialized.  So we
> > > > -            * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> > > > -            */
> > > > +           start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> > > > +           end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> > > > +
> > > > +           if (start_pfn < next_section_pfn)
> > > > +                   start_pfn = next_section_pfn;
> > > > +
> > > >             for (pfn = start_pfn; pfn < end_pfn;
> > > >                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
> > > >
> > > > -                   if (!pfn_valid(pfn))
> > > > -                           continue;
> > > > -                   /*
> > > > -                    * Nodes's pfns can be overlapping.
> > > > -                    * We know some arch can have a nodes layout such as
> > > > -                    * -------------pfn-------------->
> > > > -                    * N0 | N1 | N2 | N0 | N1 | N2|....
> > > > -                    */
> > > > -                   if (pfn_to_nid(pfn) != nid)
> > > > -                           continue;
> > > > +                   if (pfn_valid(pfn))
> > > > +                           nid = pfn_to_nid(pfn);
> > > > +                   nr_section++;
> > > >                     if (init_section_page_ext(pfn, nid))
> > > >                             goto oom;
> > > >                     cond_resched();
> > > >             }
> > > > +           next_section_pfn = pfn;
> > > >     }
> > > > +
> > > >     hotplug_memory_notifier(page_ext_callback, 0);
> > > > -   pr_info("allocated %ld bytes of page_ext\n", total_usage);
> > > > +   pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> > > > +           total_usage, nr_section, (1 << SECTION_SIZE_BITS));
> > > >     invoke_init_callbacks();
> > > >     return;
> > > >
> > > > --
> > > > 2.17.1
> > > >
> >
> > --
> > Sincerely yours,
> > Mike.
Jaewon Kim May 17, 2022, 1:10 p.m. UTC | #7
64
59

2022년 5월 17일 (화) 오후 9:55, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
>
> On Tue, May 17, 2022 at 08:38:18PM +0900, Jaewon Kim wrote:
> > Hello Mike Rapoport
> > Thank you for your comment.
> >
> > Oh really? Could you point out the code or the commit regarding 'all
> > struct pages in any section should be valid and
> > properly initialized' ?
>
> There were several commits that refactored the memory map initialization,
> freeing of the unused memory map and abuse of pfn_valid() as a substitute
> of "is memory valid" semantics.
>
> > Actually I am using v5.10 based source tree on an arm64 device.
>
> Then most probably your change is not relevant for the upstream kernel.
> Did you observe any issues with page_ext initialization on v5.18-rcN
> kernels?

Actually I observed only 59 sections were initialized for page_ext and
missed 5 sections.
It should be totally 64 sections * 128 MB = 8,192 MB

>
> > I tried to look up and found the following commit in v5.16-rc1, did
> > you mean this?
> > 3de360c3fdb3 arm64/mm: drop HAVE_ARCH_PFN_VALID
>
> Yes, this is one of those commits.
>
> > I guess memblock_is_memory code in pfn_valid in arch/arm64/mm/init.c, v5.10
> > might affect the page_ext_init.
>
> Yes. In 5.10 the pfn_valid() test in page_ext_init() will skip an entire
> section if the first pfn in that section is not memory that can be mapped
> in the linear map.
>
> But again, this should be fixed in the latest kernels.

Great! Thank you for your explanation.
I will check it someday later when I use the latest kernel on our devices.
The next version on our devices seems to be v5.15 though.

Thank you
Jaewon Kim

>
> > Thank you
> >
> > 2022년 5월 17일 (화) 오후 5:25, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
> > >
> > > On Mon, May 16, 2022 at 05:33:21PM -0700, Andrew Morton wrote:
> > > > On Mon,  9 May 2022 16:43:30 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote:
> > > >
> > > > > The page extension can be prepared for each section. But if the first
> > > > > page is not valid, the page extension for the section was not
> > > > > initialized though there were many other valid pages within the section.
> > >
> > > What do you mean by "first page [in a section] is not valid"?
> > > In recent kernels all struct pages in any section should be valid and
> > > properly initialized.
> > >
> > > > > To support the page extension for all sections, refer to memblock memory
> > > > > regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> > > > > the previous nid.
> > > > >
> > > > > Also this pagech changed log to include total sections and a section
> > > > > size.
> > > > >
> > > > > i.e.
> > > > > allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
> > > >
> > > > Cc Joonsoo, who wrote this code.
> > > > Cc Mike, for memblock.
> > > >
> > > > Thanks.
> > > >
> > > > >
> > > > > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > > > > index 2e66d934d63f..506d58b36a1d 100644
> > > > > --- a/mm/page_ext.c
> > > > > +++ b/mm/page_ext.c
> > > > > @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
> > > > >  void __init page_ext_init(void)
> > > > >  {
> > > > >     unsigned long pfn;
> > > > > -   int nid;
> > > > > +   int nid = 0;
> > > > > +   struct memblock_region *rgn;
> > > > > +   int nr_section = 0;
> > > > > +   unsigned long next_section_pfn = 0;
> > > > >
> > > > >     if (!invoke_need_callbacks())
> > > > >             return;
> > > > >
> > > > > -   for_each_node_state(nid, N_MEMORY) {
> > > > > +   /*
> > > > > +    * iterate each memblock memory region and do not skip a section having
> > > > > +    * !pfn_valid(pfn)
> > > > > +    */
> > > > > +   for_each_mem_region(rgn) {
> > > > >             unsigned long start_pfn, end_pfn;
> > > > >
> > > > > -           start_pfn = node_start_pfn(nid);
> > > > > -           end_pfn = node_end_pfn(nid);
> > > > > -           /*
> > > > > -            * start_pfn and end_pfn may not be aligned to SECTION and the
> > > > > -            * page->flags of out of node pages are not initialized.  So we
> > > > > -            * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> > > > > -            */
> > > > > +           start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> > > > > +           end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> > > > > +
> > > > > +           if (start_pfn < next_section_pfn)
> > > > > +                   start_pfn = next_section_pfn;
> > > > > +
> > > > >             for (pfn = start_pfn; pfn < end_pfn;
> > > > >                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
> > > > >
> > > > > -                   if (!pfn_valid(pfn))
> > > > > -                           continue;
> > > > > -                   /*
> > > > > -                    * Nodes's pfns can be overlapping.
> > > > > -                    * We know some arch can have a nodes layout such as
> > > > > -                    * -------------pfn-------------->
> > > > > -                    * N0 | N1 | N2 | N0 | N1 | N2|....
> > > > > -                    */
> > > > > -                   if (pfn_to_nid(pfn) != nid)
> > > > > -                           continue;
> > > > > +                   if (pfn_valid(pfn))
> > > > > +                           nid = pfn_to_nid(pfn);
> > > > > +                   nr_section++;
> > > > >                     if (init_section_page_ext(pfn, nid))
> > > > >                             goto oom;
> > > > >                     cond_resched();
> > > > >             }
> > > > > +           next_section_pfn = pfn;
> > > > >     }
> > > > > +
> > > > >     hotplug_memory_notifier(page_ext_callback, 0);
> > > > > -   pr_info("allocated %ld bytes of page_ext\n", total_usage);
> > > > > +   pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> > > > > +           total_usage, nr_section, (1 << SECTION_SIZE_BITS));
> > > > >     invoke_init_callbacks();
> > > > >     return;
> > > > >
> > > > > --
> > > > > 2.17.1
> > > > >
> > >
> > > --
> > > Sincerely yours,
> > > Mike.
>
> --
> Sincerely yours,
> Mike.

2022년 5월 17일 (화) 오후 9:55, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
>
> On Tue, May 17, 2022 at 08:38:18PM +0900, Jaewon Kim wrote:
> > Hello Mike Rapoport
> > Thank you for your comment.
> >
> > Oh really? Could you point out the code or the commit regarding 'all
> > struct pages in any section should be valid and
> > properly initialized' ?
>
> There were several commits that refactored the memory map initialization,
> freeing of the unused memory map and abuse of pfn_valid() as a substitute
> of "is memory valid" semantics.
>
> > Actually I am using v5.10 based source tree on an arm64 device.
>
> Then most probably your change is not relevant for the upstream kernel.
> Did you observe any issues with page_ext initialization on v5.18-rcN
> kernels?
>
> > I tried to look up and found the following commit in v5.16-rc1, did
> > you mean this?
> > 3de360c3fdb3 arm64/mm: drop HAVE_ARCH_PFN_VALID
>
> Yes, this is one of those commits.
>
> > I guess memblock_is_memory code in pfn_valid in arch/arm64/mm/init.c, v5.10
> > might affect the page_ext_init.
>
> Yes. In 5.10 the pfn_valid() test in page_ext_init() will skip an entire
> section if the first pfn in that section is not memory that can be mapped
> in the linear map.
>
> But again, this should be fixed in the latest kernels.
>
> > Thank you
> >
> > 2022년 5월 17일 (화) 오후 5:25, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
> > >
> > > On Mon, May 16, 2022 at 05:33:21PM -0700, Andrew Morton wrote:
> > > > On Mon,  9 May 2022 16:43:30 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote:
> > > >
> > > > > The page extension can be prepared for each section. But if the first
> > > > > page is not valid, the page extension for the section was not
> > > > > initialized though there were many other valid pages within the section.
> > >
> > > What do you mean by "first page [in a section] is not valid"?
> > > In recent kernels all struct pages in any section should be valid and
> > > properly initialized.
> > >
> > > > > To support the page extension for all sections, refer to memblock memory
> > > > > regions. If the page is valid use the nid from pfn_to_nid, otherwise use
> > > > > the previous nid.
> > > > >
> > > > > Also this pagech changed log to include total sections and a section
> > > > > size.
> > > > >
> > > > > i.e.
> > > > > allocated 100663296 bytes of page_ext for 64 sections (1 section : 0x8000000)
> > > >
> > > > Cc Joonsoo, who wrote this code.
> > > > Cc Mike, for memblock.
> > > >
> > > > Thanks.
> > > >
> > > > >
> > > > > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > > > > index 2e66d934d63f..506d58b36a1d 100644
> > > > > --- a/mm/page_ext.c
> > > > > +++ b/mm/page_ext.c
> > > > > @@ -381,41 +381,43 @@ static int __meminit page_ext_callback(struct notifier_block *self,
> > > > >  void __init page_ext_init(void)
> > > > >  {
> > > > >     unsigned long pfn;
> > > > > -   int nid;
> > > > > +   int nid = 0;
> > > > > +   struct memblock_region *rgn;
> > > > > +   int nr_section = 0;
> > > > > +   unsigned long next_section_pfn = 0;
> > > > >
> > > > >     if (!invoke_need_callbacks())
> > > > >             return;
> > > > >
> > > > > -   for_each_node_state(nid, N_MEMORY) {
> > > > > +   /*
> > > > > +    * iterate each memblock memory region and do not skip a section having
> > > > > +    * !pfn_valid(pfn)
> > > > > +    */
> > > > > +   for_each_mem_region(rgn) {
> > > > >             unsigned long start_pfn, end_pfn;
> > > > >
> > > > > -           start_pfn = node_start_pfn(nid);
> > > > > -           end_pfn = node_end_pfn(nid);
> > > > > -           /*
> > > > > -            * start_pfn and end_pfn may not be aligned to SECTION and the
> > > > > -            * page->flags of out of node pages are not initialized.  So we
> > > > > -            * scan [start_pfn, the biggest section's pfn < end_pfn) here.
> > > > > -            */
> > > > > +           start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
> > > > > +           end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
> > > > > +
> > > > > +           if (start_pfn < next_section_pfn)
> > > > > +                   start_pfn = next_section_pfn;
> > > > > +
> > > > >             for (pfn = start_pfn; pfn < end_pfn;
> > > > >                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
> > > > >
> > > > > -                   if (!pfn_valid(pfn))
> > > > > -                           continue;
> > > > > -                   /*
> > > > > -                    * Nodes's pfns can be overlapping.
> > > > > -                    * We know some arch can have a nodes layout such as
> > > > > -                    * -------------pfn-------------->
> > > > > -                    * N0 | N1 | N2 | N0 | N1 | N2|....
> > > > > -                    */
> > > > > -                   if (pfn_to_nid(pfn) != nid)
> > > > > -                           continue;
> > > > > +                   if (pfn_valid(pfn))
> > > > > +                           nid = pfn_to_nid(pfn);
> > > > > +                   nr_section++;
> > > > >                     if (init_section_page_ext(pfn, nid))
> > > > >                             goto oom;
> > > > >                     cond_resched();
> > > > >             }
> > > > > +           next_section_pfn = pfn;
> > > > >     }
> > > > > +
> > > > >     hotplug_memory_notifier(page_ext_callback, 0);
> > > > > -   pr_info("allocated %ld bytes of page_ext\n", total_usage);
> > > > > +   pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
> > > > > +           total_usage, nr_section, (1 << SECTION_SIZE_BITS));
> > > > >     invoke_init_callbacks();
> > > > >     return;
> > > > >
> > > > > --
> > > > > 2.17.1
> > > > >
> > >
> > > --
> > > Sincerely yours,
> > > Mike.
>
> --
> Sincerely yours,
> Mike.
Mike Rapoport May 18, 2022, 1:31 p.m. UTC | #8
On Tue, May 17, 2022 at 10:10:20PM +0900, Jaewon Kim wrote:
> 64
> 59
> 
> 2022년 5월 17일 (화) 오후 9:55, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
> >
> > On Tue, May 17, 2022 at 08:38:18PM +0900, Jaewon Kim wrote:
> > > Hello Mike Rapoport
> > > Thank you for your comment.
> > >
> > > Oh really? Could you point out the code or the commit regarding 'all
> > > struct pages in any section should be valid and
> > > properly initialized' ?
> >
> > There were several commits that refactored the memory map initialization,
> > freeing of the unused memory map and abuse of pfn_valid() as a substitute
> > of "is memory valid" semantics.
> >
> > > Actually I am using v5.10 based source tree on an arm64 device.
> >
> > Then most probably your change is not relevant for the upstream kernel.
> > Did you observe any issues with page_ext initialization on v5.18-rcN
> > kernels?
> 
> Actually I observed only 59 sections were initialized for page_ext and
> missed 5 sections.
> It should be totally 64 sections * 128 MB = 8,192 MB

Does this happen with v5.10 based kernel or with v5.18-rcN based kernel? 

> > > I tried to look up and found the following commit in v5.16-rc1, did
> > > you mean this?
> > > 3de360c3fdb3 arm64/mm: drop HAVE_ARCH_PFN_VALID
> >
> > Yes, this is one of those commits.
> >
> > > I guess memblock_is_memory code in pfn_valid in arch/arm64/mm/init.c, v5.10
> > > might affect the page_ext_init.
> >
> > Yes. In 5.10 the pfn_valid() test in page_ext_init() will skip an entire
> > section if the first pfn in that section is not memory that can be mapped
> > in the linear map.
> >
> > But again, this should be fixed in the latest kernels.
> 
> Great! Thank you for your explanation.
> I will check it someday later when I use the latest kernel on our devices.
> The next version on our devices seems to be v5.15 though.
> 
> Thank you
> Jaewon Kim
Jaewon Kim May 19, 2022, 12:20 a.m. UTC | #9
As I said it is v5.10 based kernel.
 ; Actually I am using v5.10 based source tree on an arm64 device.

Thank you
Jaewon Kim

2022년 5월 18일 (수) 오후 10:31, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
>
> On Tue, May 17, 2022 at 10:10:20PM +0900, Jaewon Kim wrote:
> > 64
> > 59
> >
> > 2022년 5월 17일 (화) 오후 9:55, Mike Rapoport <rppt@linux.ibm.com>님이 작성:
> > >
> > > On Tue, May 17, 2022 at 08:38:18PM +0900, Jaewon Kim wrote:
> > > > Hello Mike Rapoport
> > > > Thank you for your comment.
> > > >
> > > > Oh really? Could you point out the code or the commit regarding 'all
> > > > struct pages in any section should be valid and
> > > > properly initialized' ?
> > >
> > > There were several commits that refactored the memory map initialization,
> > > freeing of the unused memory map and abuse of pfn_valid() as a substitute
> > > of "is memory valid" semantics.
> > >
> > > > Actually I am using v5.10 based source tree on an arm64 device.
> > >
> > > Then most probably your change is not relevant for the upstream kernel.
> > > Did you observe any issues with page_ext initialization on v5.18-rcN
> > > kernels?
> >
> > Actually I observed only 59 sections were initialized for page_ext and
> > missed 5 sections.
> > It should be totally 64 sections * 128 MB = 8,192 MB
>
> Does this happen with v5.10 based kernel or with v5.18-rcN based kernel?
>
> > > > I tried to look up and found the following commit in v5.16-rc1, did
> > > > you mean this?
> > > > 3de360c3fdb3 arm64/mm: drop HAVE_ARCH_PFN_VALID
> > >
> > > Yes, this is one of those commits.
> > >
> > > > I guess memblock_is_memory code in pfn_valid in arch/arm64/mm/init.c, v5.10
> > > > might affect the page_ext_init.
> > >
> > > Yes. In 5.10 the pfn_valid() test in page_ext_init() will skip an entire
> > > section if the first pfn in that section is not memory that can be mapped
> > > in the linear map.
> > >
> > > But again, this should be fixed in the latest kernels.
> >
> > Great! Thank you for your explanation.
> > I will check it someday later when I use the latest kernel on our devices.
> > The next version on our devices seems to be v5.15 though.
> >
> > Thank you
> > Jaewon Kim
>
> --
> Sincerely yours,
> Mike.
diff mbox series

Patch

diff --git a/mm/page_ext.c b/mm/page_ext.c
index 2e66d934d63f..506d58b36a1d 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -381,41 +381,43 @@  static int __meminit page_ext_callback(struct notifier_block *self,
 void __init page_ext_init(void)
 {
 	unsigned long pfn;
-	int nid;
+	int nid = 0;
+	struct memblock_region *rgn;
+	int nr_section = 0;
+	unsigned long next_section_pfn = 0;
 
 	if (!invoke_need_callbacks())
 		return;
 
-	for_each_node_state(nid, N_MEMORY) {
+	/*
+	 * iterate each memblock memory region and do not skip a section having
+	 * !pfn_valid(pfn)
+	 */
+	for_each_mem_region(rgn) {
 		unsigned long start_pfn, end_pfn;
 
-		start_pfn = node_start_pfn(nid);
-		end_pfn = node_end_pfn(nid);
-		/*
-		 * start_pfn and end_pfn may not be aligned to SECTION and the
-		 * page->flags of out of node pages are not initialized.  So we
-		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
-		 */
+		start_pfn = (unsigned long)(rgn->base >> PAGE_SHIFT);
+		end_pfn = start_pfn + (unsigned long)(rgn->size >> PAGE_SHIFT);
+
+		if (start_pfn < next_section_pfn)
+			start_pfn = next_section_pfn;
+
 		for (pfn = start_pfn; pfn < end_pfn;
 			pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
 
-			if (!pfn_valid(pfn))
-				continue;
-			/*
-			 * Nodes's pfns can be overlapping.
-			 * We know some arch can have a nodes layout such as
-			 * -------------pfn-------------->
-			 * N0 | N1 | N2 | N0 | N1 | N2|....
-			 */
-			if (pfn_to_nid(pfn) != nid)
-				continue;
+			if (pfn_valid(pfn))
+				nid = pfn_to_nid(pfn);
+			nr_section++;
 			if (init_section_page_ext(pfn, nid))
 				goto oom;
 			cond_resched();
 		}
+		next_section_pfn = pfn;
 	}
+
 	hotplug_memory_notifier(page_ext_callback, 0);
-	pr_info("allocated %ld bytes of page_ext\n", total_usage);
+	pr_info("allocated %ld bytes of page_ext for %d sections (1 section : 0x%x)\n",
+		total_usage, nr_section, (1 << SECTION_SIZE_BITS));
 	invoke_init_callbacks();
 	return;