diff mbox series

[v2] mm: increase totalram_pages on freeing to buddy system

Message ID 20240726003612.5578-1-richard.weiyang@gmail.com (mailing list archive)
State New
Headers show
Series [v2] mm: increase totalram_pages on freeing to buddy system | expand

Commit Message

Wei Yang July 26, 2024, 12:36 a.m. UTC
Total memory represents pages managed by buddy system. After the
introduction of DEFERRED_STRUCT_PAGE_INIT, it may count the pages before
being managed.

free_low_memory_core_early() returns number of pages for all free pages,
even at this moment only early initialized pages are freed to buddy
system. This means the total memory at this moment is not correct.

Let's increase it when pages are freed to buddy system.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
CC: David Hildenbrand <david@redhat.com>

---
v2:
  * rebase on current master
  * those places would be affected are merged
---
 mm/memblock.c   | 22 ++++++----------------
 mm/page_alloc.c |  4 +---
 2 files changed, 7 insertions(+), 19 deletions(-)

Comments

David Hildenbrand July 26, 2024, 12:06 p.m. UTC | #1
On 26.07.24 02:36, Wei Yang wrote:
> Total memory represents pages managed by buddy system. After the
> introduction of DEFERRED_STRUCT_PAGE_INIT, it may count the pages before
> being managed.
> 
> free_low_memory_core_early() returns number of pages for all free pages,
> even at this moment only early initialized pages are freed to buddy
> system. This means the total memory at this moment is not correct.
> 
> Let's increase it when pages are freed to buddy system.
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> CC: David Hildenbrand <david@redhat.com>

[...]

> index 71d2716a554f..4701bc442df6 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1248,16 +1248,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
>   		 * map it first.
>   		 */
>   		debug_pagealloc_map_pages(page, nr_pages);
> -		adjust_managed_page_count(page, nr_pages);
>   	} else {
>   		for (loop = 0; loop < nr_pages; loop++, p++) {
>   			__ClearPageReserved(p);
>   			set_page_count(p, 0);
>   		}
>   
> -		/* memblock adjusts totalram_pages() manually. */
> -		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>   	}
> +	adjust_managed_page_count(page, nr_pages);
>   
>   	if (page_contains_unaccepted(page, order)) {
>   		if (order == MAX_PAGE_ORDER && __free_unaccepted(page))

Nice!

Acked-by: David Hildenbrand <david@redhat.com>
Mike Rapoport July 28, 2024, 6:01 a.m. UTC | #2
On Fri, Jul 26, 2024 at 12:36:12AM +0000, Wei Yang wrote:
> Total memory represents pages managed by buddy system. After the
> introduction of DEFERRED_STRUCT_PAGE_INIT, it may count the pages before
> being managed.
> 
> free_low_memory_core_early() returns number of pages for all free pages,
> even at this moment only early initialized pages are freed to buddy
> system. This means the total memory at this moment is not correct.
> 
> Let's increase it when pages are freed to buddy system.
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> CC: David Hildenbrand <david@redhat.com>

Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
 
> ---
> v2:
>   * rebase on current master
>   * those places would be affected are merged
> ---
>  mm/memblock.c   | 22 ++++++----------------
>  mm/page_alloc.c |  4 +---
>  2 files changed, 7 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 213057603b65..592a22b64682 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1711,10 +1711,8 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
>  	cursor = PFN_UP(base);
>  	end = PFN_DOWN(base + size);
>  
> -	for (; cursor < end; cursor++) {
> +	for (; cursor < end; cursor++)
>  		memblock_free_pages(pfn_to_page(cursor), cursor, 0);
> -		totalram_pages_inc();
> -	}
>  }
>  
>  /*
> @@ -2140,7 +2138,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
>  	}
>  }
>  
> -static unsigned long __init __free_memory_core(phys_addr_t start,
> +static void __init __free_memory_core(phys_addr_t start,
>  				 phys_addr_t end)
>  {
>  	unsigned long start_pfn = PFN_UP(start);
> @@ -2148,11 +2146,9 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
>  				      PFN_DOWN(end), max_low_pfn);
>  
>  	if (start_pfn >= end_pfn)
> -		return 0;
> +		return;
>  
>  	__free_pages_memory(start_pfn, end_pfn);
> -
> -	return end_pfn - start_pfn;
>  }
>  
>  static void __init memmap_init_reserved_pages(void)
> @@ -2194,9 +2190,8 @@ static void __init memmap_init_reserved_pages(void)
>  	}
>  }
>  
> -static unsigned long __init free_low_memory_core_early(void)
> +static void __init free_low_memory_core_early(void)
>  {
> -	unsigned long count = 0;
>  	phys_addr_t start, end;
>  	u64 i;
>  
> @@ -2211,9 +2206,7 @@ static unsigned long __init free_low_memory_core_early(void)
>  	 */
>  	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
>  				NULL)
> -		count += __free_memory_core(start, end);
> -
> -	return count;
> +		__free_memory_core(start, end);
>  }
>  
>  static int reset_managed_pages_done __initdata;
> @@ -2244,13 +2237,10 @@ void __init reset_all_zones_managed_pages(void)
>   */
>  void __init memblock_free_all(void)
>  {
> -	unsigned long pages;
> -
>  	free_unused_memmap();
>  	reset_all_zones_managed_pages();
>  
> -	pages = free_low_memory_core_early();
> -	totalram_pages_add(pages);
> +	free_low_memory_core_early();
>  }
>  
>  /* Keep a table to reserve named memory */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 71d2716a554f..4701bc442df6 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1248,16 +1248,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
>  		 * map it first.
>  		 */
>  		debug_pagealloc_map_pages(page, nr_pages);
> -		adjust_managed_page_count(page, nr_pages);
>  	} else {
>  		for (loop = 0; loop < nr_pages; loop++, p++) {
>  			__ClearPageReserved(p);
>  			set_page_count(p, 0);
>  		}
>  
> -		/* memblock adjusts totalram_pages() manually. */
> -		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>  	}
> +	adjust_managed_page_count(page, nr_pages);
>  
>  	if (page_contains_unaccepted(page, order)) {
>  		if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
> -- 
> 2.34.1
>
Oscar Salvador July 29, 2024, 4:43 a.m. UTC | #3
On Fri, Jul 26, 2024 at 12:36:12AM +0000, Wei Yang wrote:
> Total memory represents pages managed by buddy system. After the
> introduction of DEFERRED_STRUCT_PAGE_INIT, it may count the pages before
> being managed.
> 
> free_low_memory_core_early() returns number of pages for all free pages,
> even at this moment only early initialized pages are freed to buddy
> system. This means the total memory at this moment is not correct.
> 
> Let's increase it when pages are freed to buddy system.
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> CC: David Hildenbrand <david@redhat.com>

Reviewed-by: Oscar Salvador <osalvador@suse.de>
Nathan Chancellor Aug. 3, 2024, 12:07 a.m. UTC | #4
Hi Wei,

On Fri, Jul 26, 2024 at 12:36:12AM +0000, Wei Yang wrote:
> Total memory represents pages managed by buddy system. After the
> introduction of DEFERRED_STRUCT_PAGE_INIT, it may count the pages before
> being managed.
> 
> free_low_memory_core_early() returns number of pages for all free pages,
> even at this moment only early initialized pages are freed to buddy
> system. This means the total memory at this moment is not correct.
> 
> Let's increase it when pages are freed to buddy system.
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> CC: David Hildenbrand <david@redhat.com>
> 
> ---
> v2:
>   * rebase on current master
>   * those places would be affected are merged
> ---
>  mm/memblock.c   | 22 ++++++----------------
>  mm/page_alloc.c |  4 +---
>  2 files changed, 7 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 213057603b65..592a22b64682 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1711,10 +1711,8 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
>  	cursor = PFN_UP(base);
>  	end = PFN_DOWN(base + size);
>  
> -	for (; cursor < end; cursor++) {
> +	for (; cursor < end; cursor++)
>  		memblock_free_pages(pfn_to_page(cursor), cursor, 0);
> -		totalram_pages_inc();
> -	}
>  }
>  
>  /*
> @@ -2140,7 +2138,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
>  	}
>  }
>  
> -static unsigned long __init __free_memory_core(phys_addr_t start,
> +static void __init __free_memory_core(phys_addr_t start,
>  				 phys_addr_t end)
>  {
>  	unsigned long start_pfn = PFN_UP(start);
> @@ -2148,11 +2146,9 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
>  				      PFN_DOWN(end), max_low_pfn);
>  
>  	if (start_pfn >= end_pfn)
> -		return 0;
> +		return;
>  
>  	__free_pages_memory(start_pfn, end_pfn);
> -
> -	return end_pfn - start_pfn;
>  }
>  
>  static void __init memmap_init_reserved_pages(void)
> @@ -2194,9 +2190,8 @@ static void __init memmap_init_reserved_pages(void)
>  	}
>  }
>  
> -static unsigned long __init free_low_memory_core_early(void)
> +static void __init free_low_memory_core_early(void)
>  {
> -	unsigned long count = 0;
>  	phys_addr_t start, end;
>  	u64 i;
>  
> @@ -2211,9 +2206,7 @@ static unsigned long __init free_low_memory_core_early(void)
>  	 */
>  	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
>  				NULL)
> -		count += __free_memory_core(start, end);
> -
> -	return count;
> +		__free_memory_core(start, end);
>  }
>  
>  static int reset_managed_pages_done __initdata;
> @@ -2244,13 +2237,10 @@ void __init reset_all_zones_managed_pages(void)
>   */
>  void __init memblock_free_all(void)
>  {
> -	unsigned long pages;
> -
>  	free_unused_memmap();
>  	reset_all_zones_managed_pages();
>  
> -	pages = free_low_memory_core_early();
> -	totalram_pages_add(pages);
> +	free_low_memory_core_early();
>  }
>  
>  /* Keep a table to reserve named memory */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 71d2716a554f..4701bc442df6 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1248,16 +1248,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
>  		 * map it first.
>  		 */
>  		debug_pagealloc_map_pages(page, nr_pages);
> -		adjust_managed_page_count(page, nr_pages);
>  	} else {
>  		for (loop = 0; loop < nr_pages; loop++, p++) {
>  			__ClearPageReserved(p);
>  			set_page_count(p, 0);
>  		}
>  
> -		/* memblock adjusts totalram_pages() manually. */
> -		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>  	}
> +	adjust_managed_page_count(page, nr_pages);
>  
>  	if (page_contains_unaccepted(page, order)) {
>  		if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
> -- 
> 2.34.1
> 
> 

After this change as commit 0e690b558b53 ("mm: increase totalram_pages
on freeing to buddy system") in -next, I see an issue when booting
OpenSUSE's powerpc64le configuration in QEMU (I have not tried to see if
there is a specific configuration option triggers this yet but it does
not happen with all of my powerpc configurations):

$ curl -LSso .config https://github.com/openSUSE/kernel-source/raw/master/config/ppc64le/default

$ make -skj"$(nproc)" ARCH=powerpc CROSS_COMPILE=powerpc64-linux- olddefconfig zImage.epapr

$ qemu-system-ppc64 \
    -display none \
    -nodefaults \
    -device ipmi-bmc-sim,id=bmc0 \
    -device isa-ipmi-bt,bmc=bmc0,irq=10 \
    -machine powernv \
    -kernel arch/powerpc/boot/zImage.epapr \
    -initrd rootfs.cpio \
    -m 2G \
    -serial mon:stdio
...
[    0.000000][    T0] Linux version 6.11.0-rc1-default+ (nathan@thelio-3990X) (powerpc64-linux-gcc (GCC) 14.1.0, GNU ld (GNU Binutils) 2.42) #1 SMP Fri Aug  2 16:58:44 MST 2024
...
[    1.583547][    T1] Run /init as init process
/init: exec: line 15: /sbin/init: not found
[    1.810389][    T1] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00007f00
[    1.811467][    T1] CPU: 0 UID: 0 PID: 1 Comm: init Not tainted 6.11.0-rc1-default+ #1 531e1f40bec7af44d9126123987f957a9e2338c9
[    1.812238][    T1] Hardware name: IBM PowerNV (emulated by qemu) POWER10 0x801200 opal:v7.1 PowerNV
[    1.812801][    T1] Call Trace:
[    1.813011][    T1] [c0000000031f79b0] [c000000001036910] dump_stack_lvl+0x84/0xc0 (unreliable)
[    1.813977][    T1] [c0000000031f79e0] [c00000000015c1c0] panic+0x174/0x454
[    1.814298][    T1] [c0000000031f7a80] [c0000000001658c0] do_exit+0xb70/0xb80
[    1.814648][    T1] [c0000000031f7b50] [c000000000165b3c] do_group_exit+0x4c/0xc0
[    1.814975][    T1] [c0000000031f7b90] [c000000000165bd8] sys_exit_group+0x28/0x30
[    1.815307][    T1] [c0000000031f7bb0] [c00000000002e850] system_call_exception+0x120/0x240
[    1.815704][    T1] [c0000000031f7e50] [c00000000000cfdc] system_call_vectored_common+0x15c/0x2ec
[    1.816108][    T1] --- interrupt: 3000 at 0x7fff88e61bd8
[    1.816561][    T1] NIP:  00007fff88e61bd8 LR: 00007fff88e61bd8 CTR: 0000000000000000
[    1.816910][    T1] REGS: c0000000031f7e80 TRAP: 3000   Not tainted  (6.11.0-rc1-default+)
[    1.817301][    T1] MSR:  900000000280f033 <SF,HV,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 42002828  XER: 00000000
[    1.817866][    T1] IRQMASK: 0
[    1.817866][    T1] GPR00: 00000000000000ea 00007ffff22b6400 00007fff88f57300 000000000000007f
[    1.817866][    T1] GPR04: 0000000000000000 00007ffff22b6600 00007ffff22b6610 00007fff8900ad00
[    1.817866][    T1] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[    1.817866][    T1] GPR12: 0000000000000000 00007fff8900ad00 0000000000000000 0000000000000000
[    1.817866][    T1] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[    1.817866][    T1] GPR20: 0000000000000000 0000000000000000 00007fff89000000 000000011cf103f8
[    1.817866][    T1] GPR24: 00007ffff22b6f70 0000000000000000 00007ffff22b6f58 00007fff890012a0
[    1.817866][    T1] GPR28: 0000000000000002 fffffffffffff000 000000000000007f 00007fff89003d10
[    1.821081][    T1] NIP [00007fff88e61bd8] 0x7fff88e61bd8
[    1.821327][    T1] LR [00007fff88e61bd8] 0x7fff88e61bd8
[    1.821577][    T1] --- interrupt: 3000
[    1.823172][    T1] Rebooting in 90 seconds.

The rootfs is available from [1] (arm-rootfs.cpio.zst, decompress it
with zstd first); it just shuts down the machine on boot.

If there is any other information I can provide or patches I can test, I
am more than happy to do so.

Cheers,
Nathan

[1]: https://github.com/ClangBuiltLinux/boot-utils/releases/latest

# bad: [f524a5e4dfb75b277c9a5ad819ca5f035f490f14] Add linux-next specific files for 20240802
# good: [c0ecd6388360d930440cc5554026818895199923] Merge tag 'pci-v6.11-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
git bisect start 'f524a5e4dfb75b277c9a5ad819ca5f035f490f14' 'c0ecd6388360d930440cc5554026818895199923'
# bad: [ef6591d6312c51ae1d10c5e89e6e1e76f06a464f] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
git bisect bad ef6591d6312c51ae1d10c5e89e6e1e76f06a464f
# bad: [5c8256684186fe9d766fa96c9f9de693beda6deb] Merge branch 'for-next' of https://github.com/sophgo/linux.git
git bisect bad 5c8256684186fe9d766fa96c9f9de693beda6deb
# bad: [a97441aa9432ebdceb6216d14553f88555d2f980] Merge branch 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
git bisect bad a97441aa9432ebdceb6216d14553f88555d2f980
# good: [961f57fbf5386c7e04bb8e8d2467bca5ab96b11d] Merge branch 'clk-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git
git bisect good 961f57fbf5386c7e04bb8e8d2467bca5ab96b11d
# bad: [0e690b558b53293f236ebd80b73e90e510c624c9] mm: increase totalram_pages on freeing to buddy system
git bisect bad 0e690b558b53293f236ebd80b73e90e510c624c9
# good: [1e29ee6c9f6b04bf40cffdffb21853fd77ca0163] mm/swap: rename cpu_fbatches->activate
git bisect good 1e29ee6c9f6b04bf40cffdffb21853fd77ca0163
# good: [d88e021d3f5bd6708b786ca129b187d6e6e9de69] s390/uv: drop arch_make_page_accessible()
git bisect good d88e021d3f5bd6708b786ca129b187d6e6e9de69
# good: [dedad7f924e24c1ad124b4586f8b937614ddc1d8] mm/cma: change the addition of totalcma_pages in the cma_init_reserved_mem
git bisect good dedad7f924e24c1ad124b4586f8b937614ddc1d8
# good: [0e004bbadb9c350807ff253335152b516a6f0083] slub: introduce CONFIG_SLUB_RCU_DEBUG
git bisect good 0e004bbadb9c350807ff253335152b516a6f0083
# good: [726459950d9f530a7bc4410b88573bbfd9563560] mm: document __GFP_NOFAIL must be blockable
git bisect good 726459950d9f530a7bc4410b88573bbfd9563560
# good: [6284ca1e5dd29188a970b188b0da5bb9142f3e91] mm: prohibit NULL deference exposed for unsupported non-blockable __GFP_NOFAIL
git bisect good 6284ca1e5dd29188a970b188b0da5bb9142f3e91
# good: [9cd03c3e91270d0e9d0d7e57c81dc10c1de94f6a] mm/memory_hotplug: get rid of __ref
git bisect good 9cd03c3e91270d0e9d0d7e57c81dc10c1de94f6a
# first bad commit: [0e690b558b53293f236ebd80b73e90e510c624c9] mm: increase totalram_pages on freeing to buddy system
Wei Yang Aug. 5, 2024, 3:47 p.m. UTC | #5
On Fri, Aug 02, 2024 at 05:07:48PM -0700, Nathan Chancellor wrote:
>Hi Wei,
>
>On Fri, Jul 26, 2024 at 12:36:12AM +0000, Wei Yang wrote:
>> Total memory represents pages managed by buddy system. After the
>> introduction of DEFERRED_STRUCT_PAGE_INIT, it may count the pages before
>> being managed.
>> 
>> free_low_memory_core_early() returns number of pages for all free pages,
>> even at this moment only early initialized pages are freed to buddy
>> system. This means the total memory at this moment is not correct.
>> 
>> Let's increase it when pages are freed to buddy system.
>> 
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>> CC: David Hildenbrand <david@redhat.com>
>> 
>> ---
>> v2:
>>   * rebase on current master
>>   * those places would be affected are merged
>> ---
>>  mm/memblock.c   | 22 ++++++----------------
>>  mm/page_alloc.c |  4 +---
>>  2 files changed, 7 insertions(+), 19 deletions(-)
>> 
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index 213057603b65..592a22b64682 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1711,10 +1711,8 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
>>  	cursor = PFN_UP(base);
>>  	end = PFN_DOWN(base + size);
>>  
>> -	for (; cursor < end; cursor++) {
>> +	for (; cursor < end; cursor++)
>>  		memblock_free_pages(pfn_to_page(cursor), cursor, 0);
>> -		totalram_pages_inc();
>> -	}
>>  }
>>  
>>  /*
>> @@ -2140,7 +2138,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
>>  	}
>>  }
>>  
>> -static unsigned long __init __free_memory_core(phys_addr_t start,
>> +static void __init __free_memory_core(phys_addr_t start,
>>  				 phys_addr_t end)
>>  {
>>  	unsigned long start_pfn = PFN_UP(start);
>> @@ -2148,11 +2146,9 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
>>  				      PFN_DOWN(end), max_low_pfn);
>>  
>>  	if (start_pfn >= end_pfn)
>> -		return 0;
>> +		return;
>>  
>>  	__free_pages_memory(start_pfn, end_pfn);
>> -
>> -	return end_pfn - start_pfn;
>>  }
>>  
>>  static void __init memmap_init_reserved_pages(void)
>> @@ -2194,9 +2190,8 @@ static void __init memmap_init_reserved_pages(void)
>>  	}
>>  }
>>  
>> -static unsigned long __init free_low_memory_core_early(void)
>> +static void __init free_low_memory_core_early(void)
>>  {
>> -	unsigned long count = 0;
>>  	phys_addr_t start, end;
>>  	u64 i;
>>  
>> @@ -2211,9 +2206,7 @@ static unsigned long __init free_low_memory_core_early(void)
>>  	 */
>>  	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
>>  				NULL)
>> -		count += __free_memory_core(start, end);
>> -
>> -	return count;
>> +		__free_memory_core(start, end);
>>  }
>>  
>>  static int reset_managed_pages_done __initdata;
>> @@ -2244,13 +2237,10 @@ void __init reset_all_zones_managed_pages(void)
>>   */
>>  void __init memblock_free_all(void)
>>  {
>> -	unsigned long pages;
>> -
>>  	free_unused_memmap();
>>  	reset_all_zones_managed_pages();
>>  
>> -	pages = free_low_memory_core_early();
>> -	totalram_pages_add(pages);
>> +	free_low_memory_core_early();
>>  }
>>  
>>  /* Keep a table to reserve named memory */
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 71d2716a554f..4701bc442df6 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -1248,16 +1248,14 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
>>  		 * map it first.
>>  		 */
>>  		debug_pagealloc_map_pages(page, nr_pages);
>> -		adjust_managed_page_count(page, nr_pages);
>>  	} else {
>>  		for (loop = 0; loop < nr_pages; loop++, p++) {
>>  			__ClearPageReserved(p);
>>  			set_page_count(p, 0);
>>  		}
>>  
>> -		/* memblock adjusts totalram_pages() manually. */
>> -		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
>>  	}
>> +	adjust_managed_page_count(page, nr_pages);
>>  
>>  	if (page_contains_unaccepted(page, order)) {
>>  		if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
>> -- 
>> 2.34.1
>> 
>> 
>
>After this change as commit 0e690b558b53 ("mm: increase totalram_pages
>on freeing to buddy system") in -next, I see an issue when booting
>OpenSUSE's powerpc64le configuration in QEMU (I have not tried to see if
>there is a specific configuration option triggers this yet but it does
>not happen with all of my powerpc configurations):
>
>$ curl -LSso .config https://github.com/openSUSE/kernel-source/raw/master/config/ppc64le/default
>
>$ make -skj"$(nproc)" ARCH=powerpc CROSS_COMPILE=powerpc64-linux- olddefconfig zImage.epapr
>
>$ qemu-system-ppc64 \
>    -display none \
>    -nodefaults \
>    -device ipmi-bmc-sim,id=bmc0 \
>    -device isa-ipmi-bt,bmc=bmc0,irq=10 \
>    -machine powernv \
>    -kernel arch/powerpc/boot/zImage.epapr \
>    -initrd rootfs.cpio \
>    -m 2G \
>    -serial mon:stdio

Hi, Nathan

Thanks for testing.

After some debug, the broken point is in mm/shmem.c. Function
shmem_default_max_blocks() / shmem_default_max_inodex() is called by
shmem_fill_super() during early stage.

But I can't get the total free pages from memblock here as those functions
will be called later, when memblock is discarded.

I may need to find other way to handle it.
diff mbox series

Patch

diff --git a/mm/memblock.c b/mm/memblock.c
index 213057603b65..592a22b64682 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1711,10 +1711,8 @@  void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
 	cursor = PFN_UP(base);
 	end = PFN_DOWN(base + size);
 
-	for (; cursor < end; cursor++) {
+	for (; cursor < end; cursor++)
 		memblock_free_pages(pfn_to_page(cursor), cursor, 0);
-		totalram_pages_inc();
-	}
 }
 
 /*
@@ -2140,7 +2138,7 @@  static void __init __free_pages_memory(unsigned long start, unsigned long end)
 	}
 }
 
-static unsigned long __init __free_memory_core(phys_addr_t start,
+static void __init __free_memory_core(phys_addr_t start,
 				 phys_addr_t end)
 {
 	unsigned long start_pfn = PFN_UP(start);
@@ -2148,11 +2146,9 @@  static unsigned long __init __free_memory_core(phys_addr_t start,
 				      PFN_DOWN(end), max_low_pfn);
 
 	if (start_pfn >= end_pfn)
-		return 0;
+		return;
 
 	__free_pages_memory(start_pfn, end_pfn);
-
-	return end_pfn - start_pfn;
 }
 
 static void __init memmap_init_reserved_pages(void)
@@ -2194,9 +2190,8 @@  static void __init memmap_init_reserved_pages(void)
 	}
 }
 
-static unsigned long __init free_low_memory_core_early(void)
+static void __init free_low_memory_core_early(void)
 {
-	unsigned long count = 0;
 	phys_addr_t start, end;
 	u64 i;
 
@@ -2211,9 +2206,7 @@  static unsigned long __init free_low_memory_core_early(void)
 	 */
 	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
 				NULL)
-		count += __free_memory_core(start, end);
-
-	return count;
+		__free_memory_core(start, end);
 }
 
 static int reset_managed_pages_done __initdata;
@@ -2244,13 +2237,10 @@  void __init reset_all_zones_managed_pages(void)
  */
 void __init memblock_free_all(void)
 {
-	unsigned long pages;
-
 	free_unused_memmap();
 	reset_all_zones_managed_pages();
 
-	pages = free_low_memory_core_early();
-	totalram_pages_add(pages);
+	free_low_memory_core_early();
 }
 
 /* Keep a table to reserve named memory */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 71d2716a554f..4701bc442df6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1248,16 +1248,14 @@  void __meminit __free_pages_core(struct page *page, unsigned int order,
 		 * map it first.
 		 */
 		debug_pagealloc_map_pages(page, nr_pages);
-		adjust_managed_page_count(page, nr_pages);
 	} else {
 		for (loop = 0; loop < nr_pages; loop++, p++) {
 			__ClearPageReserved(p);
 			set_page_count(p, 0);
 		}
 
-		/* memblock adjusts totalram_pages() manually. */
-		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
 	}
+	adjust_managed_page_count(page, nr_pages);
 
 	if (page_contains_unaccepted(page, order)) {
 		if (order == MAX_PAGE_ORDER && __free_unaccepted(page))