diff mbox

[v6,04/15] mm: discard memblock data later

Message ID 1502138329-123460-5-git-send-email-pasha.tatashin@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Tatashin Aug. 7, 2017, 8:38 p.m. UTC
There is existing use after free bug when deferred struct pages are
enabled:

The memblock_add() allocates memory for the memory array if more than
128 entries are needed.  See comment in e820__memblock_setup():

  * The bootstrap memblock region count maximum is 128 entries
  * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
  * than that - so allow memblock resizing.

This memblock memory is freed here:
        free_low_memory_core_early()

We access the freed memblock.memory later in boot when deferred pages are
initialized in this path:

        deferred_init_memmap()
                for_each_mem_pfn_range()
                  __next_mem_pfn_range()
                    type = &memblock.memory;

One possible explanation for why this use-after-free hasn't been hit
before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded
at least on systems where deferred struct pages were enabled.

Another reason why we want this problem fixed in this patch series is,
in the next patch, we will need to access memblock.reserved from
deferred_init_memmap().

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
---
 include/linux/memblock.h |  6 ++++--
 mm/memblock.c            | 38 +++++++++++++++++---------------------
 mm/nobootmem.c           | 16 ----------------
 mm/page_alloc.c          |  4 ++++
 4 files changed, 25 insertions(+), 39 deletions(-)

Comments

Michal Hocko Aug. 11, 2017, 9:32 a.m. UTC | #1
[CC Mel]

On Mon 07-08-17 16:38:38, Pavel Tatashin wrote:
> There is existing use after free bug when deferred struct pages are
> enabled:
> 
> The memblock_add() allocates memory for the memory array if more than
> 128 entries are needed.  See comment in e820__memblock_setup():
> 
>   * The bootstrap memblock region count maximum is 128 entries
>   * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries
>   * than that - so allow memblock resizing.
> 
> This memblock memory is freed here:
>         free_low_memory_core_early()
> 
> We access the freed memblock.memory later in boot when deferred pages are
> initialized in this path:
> 
>         deferred_init_memmap()
>                 for_each_mem_pfn_range()
>                   __next_mem_pfn_range()
>                     type = &memblock.memory;

Yes you seem to be right.
>
> One possible explanation for why this use-after-free hasn't been hit
> before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded
> at least on systems where deferred struct pages were enabled.

Yeah this sounds like the case.
 
> Another reason why we want this problem fixed in this patch series is,
> in the next patch, we will need to access memblock.reserved from
> deferred_init_memmap().
> 

I guess this goes all the way down to 
Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd")
> Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
> Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
> Reviewed-by: Bob Picco <bob.picco@oracle.com>

Considering that some HW might behave strangely and this would be rather
hard to debug I would be tempted to mark this for stable. It should also
be merged separately from the rest of the series.

I have just one nit below
Acked-by: Michal Hocko <mhocko@suse.com>

[...]
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 2cb25fe4452c..bf14aea6ab70 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
>  }
>  
>  #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK

pull this ifdef inside memblock_discard and you do not have an another
one in page_alloc_init_late

[...]
> +/**
> + * Discard memory and reserved arrays if they were allocated
> + */
> +void __init memblock_discard(void)
>  {

here

> -	if (memblock.memory.regions == memblock_memory_init_regions)
> -		return 0;
> +	phys_addr_t addr, size;
>  
> -	*addr = __pa(memblock.memory.regions);
> +	if (memblock.reserved.regions != memblock_reserved_init_regions) {
> +		addr = __pa(memblock.reserved.regions);
> +		size = PAGE_ALIGN(sizeof(struct memblock_region) *
> +				  memblock.reserved.max);
> +		__memblock_free_late(addr, size);
> +	}
>  
> -	return PAGE_ALIGN(sizeof(struct memblock_region) *
> -			  memblock.memory.max);
> +	if (memblock.memory.regions == memblock_memory_init_regions) {
> +		addr = __pa(memblock.memory.regions);
> +		size = PAGE_ALIGN(sizeof(struct memblock_region) *
> +				  memblock.memory.max);
> +		__memblock_free_late(addr, size);
> +	}
>  }
> -
>  #endif
[...]
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index fc32aa81f359..63d16c185736 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void)
>  	/* Reinit limits that are based on free pages after the kernel is up */
>  	files_maxfiles_init();
>  #endif
> +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
> +	/* Discard memblock private memory */
> +	memblock_discard();
> +#endif
>  
>  	for_each_populated_zone(zone)
>  		set_zone_contiguous(zone);
> -- 
> 2.14.0
Mel Gorman Aug. 11, 2017, 9:50 a.m. UTC | #2
On Fri, Aug 11, 2017 at 11:32:49AM +0200, Michal Hocko wrote:
> > Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
> > Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
> > Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
> > Reviewed-by: Bob Picco <bob.picco@oracle.com>
> 
> Considering that some HW might behave strangely and this would be rather
> hard to debug I would be tempted to mark this for stable. It should also
> be merged separately from the rest of the series.
> 
> I have just one nit below
> Acked-by: Michal Hocko <mhocko@suse.com>
> 

Agreed.
Pavel Tatashin Aug. 11, 2017, 3:49 p.m. UTC | #3
> I guess this goes all the way down to
> Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd")

I will add this to the patch.

>> Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
>> Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
>> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
>> Reviewed-by: Bob Picco <bob.picco@oracle.com>
> 
> Considering that some HW might behave strangely and this would be rather
> hard to debug I would be tempted to mark this for stable. It should also
> be merged separately from the rest of the series.
> 
> I have just one nit below
> Acked-by: Michal Hocko <mhocko@suse.com>

I will address your comment, and send out a new patch. Should I send it 
out separately from the series or should I keep it inside?

Also, before I send out a new patch, I will need to root cause and 
resolve problem found by kernel test robot <fengguang.wu@intel.com>, and 
bisected down to this patch.

[  156.659400] BUG: Bad page state in process swapper  pfn:03147
[  156.660051] page:ffff88001ed8a1c0 count:0 mapcount:-127 mapping: 
     (null) index:0x1
[  156.660917] flags: 0x0()
[  156.661198] raw: 0000000000000000 0000000000000000 0000000000000001 
00000000ffffff80
[  156.662006] raw: ffff88001f4a8120 ffff88001ed85ce0 0000000000000000 
0000000000000000
[  156.662811] page dumped because: nonzero mapcount
[  156.663307] CPU: 0 PID: 1 Comm: swapper Not tainted 
4.13.0-rc3-00220-g1aad694 #1
[  156.664077] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS 1.9.3-20161025_171302-gandalf 04/01/2014
[  156.665129] Call Trace:
[  156.665422]  dump_stack+0x1e/0x20
[  156.665802]  bad_page+0x122/0x148

I was not able to reproduce this problem, even-though I used their qemu 
script and config. But I am getting the following panic both base and fix:

[  115.763259] VFS: Cannot open root device "ram0" or 
unknown-block(0,0): error -6
[  115.764511] Please append a correct "root=" boot option; here are the 
available partitions:
[  115.765816] Kernel panic - not syncing: VFS: Unable to mount root fs 
on unknown-block(0,0)
[  115.767124] CPU: 0 PID: 1 Comm: swapper Not tainted 
4.13.0-rc4_pt_memset6-00033-g7e65200b1473 #7
[  115.768506] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
[  115.770368] Call Trace:
[  115.770771]  dump_stack+0x1e/0x20
[  115.771310]  panic+0xf8/0x2bc
[  115.771792]  mount_block_root+0x3bb/0x441
[  115.772437]  ? do_early_param+0xc5/0xc5
[  115.773051]  ? do_early_param+0xc5/0xc5
[  115.773683]  mount_root+0x7c/0x7f
[  115.774243]  prepare_namespace+0x194/0x1d1
[  115.774898]  kernel_init_freeable+0x1c8/0x1df
[  115.775575]  ? rest_init+0x13f/0x13f
[  115.776153]  kernel_init+0x14/0x142
[  115.776711]  ? rest_init+0x13f/0x13f
[  115.777285]  ret_from_fork+0x2a/0x40
[  115.777864] Kernel Offset: disabled

Their config has CONFIG_BLK_DEV_RAM disabled, but qemu script has:
root=/dev/ram0, so I enabled dev_ram, but still getting a panic when 
root is mounted both in base and fix.

Pasha
Michal Hocko Aug. 11, 2017, 4:04 p.m. UTC | #4
On Fri 11-08-17 11:49:15, Pasha Tatashin wrote:
> >I guess this goes all the way down to
> >Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd")
> 
> I will add this to the patch.
> 
> >>Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
> >>Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
> >>Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
> >>Reviewed-by: Bob Picco <bob.picco@oracle.com>
> >
> >Considering that some HW might behave strangely and this would be rather
> >hard to debug I would be tempted to mark this for stable. It should also
> >be merged separately from the rest of the series.
> >
> >I have just one nit below
> >Acked-by: Michal Hocko <mhocko@suse.com>
> 
> I will address your comment, and send out a new patch. Should I send it out
> separately from the series or should I keep it inside?

I would post it separatelly. It doesn't depend on the rest.

> Also, before I send out a new patch, I will need to root cause and resolve
> problem found by kernel test robot <fengguang.wu@intel.com>, and bisected
> down to this patch.
> 
> [  156.659400] BUG: Bad page state in process swapper  pfn:03147
> [  156.660051] page:ffff88001ed8a1c0 count:0 mapcount:-127 mapping:
> (null) index:0x1
> [  156.660917] flags: 0x0()
> [  156.661198] raw: 0000000000000000 0000000000000000 0000000000000001
> 00000000ffffff80
> [  156.662006] raw: ffff88001f4a8120 ffff88001ed85ce0 0000000000000000
> 0000000000000000
> [  156.662811] page dumped because: nonzero mapcount
> [  156.663307] CPU: 0 PID: 1 Comm: swapper Not tainted
> 4.13.0-rc3-00220-g1aad694 #1
> [  156.664077] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> 1.9.3-20161025_171302-gandalf 04/01/2014
> [  156.665129] Call Trace:
> [  156.665422]  dump_stack+0x1e/0x20
> [  156.665802]  bad_page+0x122/0x148

Was the report related with this patch?
Pavel Tatashin Aug. 11, 2017, 4:22 p.m. UTC | #5
>> I will address your comment, and send out a new patch. Should I send it out
>> separately from the series or should I keep it inside?
> 
> I would post it separatelly. It doesn't depend on the rest.

OK, I will post it separately. No it does not depend on the rest, but 
the reset depends on this. So, I am not sure how to enforce that this 
comes before the rest.

> 
>> Also, before I send out a new patch, I will need to root cause and resolve
>> problem found by kernel test robot <fengguang.wu@intel.com>, and bisected
>> down to this patch.
>>
>> [  156.659400] BUG: Bad page state in process swapper  pfn:03147
>> [  156.660051] page:ffff88001ed8a1c0 count:0 mapcount:-127 mapping:
>> (null) index:0x1
>> [  156.660917] flags: 0x0()
>> [  156.661198] raw: 0000000000000000 0000000000000000 0000000000000001
>> 00000000ffffff80
>> [  156.662006] raw: ffff88001f4a8120 ffff88001ed85ce0 0000000000000000
>> 0000000000000000
>> [  156.662811] page dumped because: nonzero mapcount
>> [  156.663307] CPU: 0 PID: 1 Comm: swapper Not tainted
>> 4.13.0-rc3-00220-g1aad694 #1
>> [  156.664077] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>> 1.9.3-20161025_171302-gandalf 04/01/2014
>> [  156.665129] Call Trace:
>> [  156.665422]  dump_stack+0x1e/0x20
>> [  156.665802]  bad_page+0x122/0x148
> 
> Was the report related with this patch?

Yes, they said that the problem was bisected down to this patch. Do you 
know if there is a way to submit a patch to this test robot?

Thank you,
Pasha
Pavel Tatashin Aug. 11, 2017, 7 p.m. UTC | #6
Hi Michal,

This suggestion won't work, because there are arches without memblock 
support: tile, sh...

So, I would still need to have:

#ifdef CONFIG_MEMBLOCK in page_alloc, or define memblock_discard() stubs 
in nobootmem headfile. In either case it would become messier than what 
it is right now.

Pasha

> I have just one nit below
> Acked-by: Michal Hocko <mhocko@suse.com>
> 
> [...]
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index 2cb25fe4452c..bf14aea6ab70 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
>>   }
>>   
>>   #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
> 
> pull this ifdef inside memblock_discard and you do not have an another
> one in page_alloc_init_late
> 
> [...]
>> +/**
>> + * Discard memory and reserved arrays if they were allocated
>> + */
>> +void __init memblock_discard(void)
>>   {
> 
> here
> 
>> -	if (memblock.memory.regions == memblock_memory_init_regions)
>> -		return 0;
>> +	phys_addr_t addr, size;
>>   
>> -	*addr = __pa(memblock.memory.regions);
>> +	if (memblock.reserved.regions != memblock_reserved_init_regions) {
>> +		addr = __pa(memblock.reserved.regions);
>> +		size = PAGE_ALIGN(sizeof(struct memblock_region) *
>> +				  memblock.reserved.max);
>> +		__memblock_free_late(addr, size);
>> +	}
>>   
>> -	return PAGE_ALIGN(sizeof(struct memblock_region) *
>> -			  memblock.memory.max);
>> +	if (memblock.memory.regions == memblock_memory_init_regions) {
>> +		addr = __pa(memblock.memory.regions);
>> +		size = PAGE_ALIGN(sizeof(struct memblock_region) *
>> +				  memblock.memory.max);
>> +		__memblock_free_late(addr, size);
>> +	}
>>   }
>> -
>>   #endif
Michal Hocko Aug. 14, 2017, 11:34 a.m. UTC | #7
On Fri 11-08-17 15:00:47, Pasha Tatashin wrote:
> Hi Michal,
> 
> This suggestion won't work, because there are arches without memblock
> support: tile, sh...
> 
> So, I would still need to have:
> 
> #ifdef CONFIG_MEMBLOCK in page_alloc, or define memblock_discard() stubs in
> nobootmem headfile. 

This is the standard way to do this. And it is usually preferred to
proliferate ifdefs in the code.
Michal Hocko Aug. 14, 2017, 11:36 a.m. UTC | #8
On Fri 11-08-17 12:22:52, Pasha Tatashin wrote:
> >>I will address your comment, and send out a new patch. Should I send it out
> >>separately from the series or should I keep it inside?
> >
> >I would post it separatelly. It doesn't depend on the rest.
> 
> OK, I will post it separately. No it does not depend on the rest, but the
> reset depends on this. So, I am not sure how to enforce that this comes
> before the rest.

Andrew will take care of that. Just make it explicit that some of the
patch depends on an earlier work when reposting.
 
> >>Also, before I send out a new patch, I will need to root cause and resolve
> >>problem found by kernel test robot <fengguang.wu@intel.com>, and bisected
> >>down to this patch.
> >>
> >>[  156.659400] BUG: Bad page state in process swapper  pfn:03147
> >>[  156.660051] page:ffff88001ed8a1c0 count:0 mapcount:-127 mapping:
> >>(null) index:0x1
> >>[  156.660917] flags: 0x0()
> >>[  156.661198] raw: 0000000000000000 0000000000000000 0000000000000001
> >>00000000ffffff80
> >>[  156.662006] raw: ffff88001f4a8120 ffff88001ed85ce0 0000000000000000
> >>0000000000000000
> >>[  156.662811] page dumped because: nonzero mapcount
> >>[  156.663307] CPU: 0 PID: 1 Comm: swapper Not tainted
> >>4.13.0-rc3-00220-g1aad694 #1
> >>[  156.664077] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> >>1.9.3-20161025_171302-gandalf 04/01/2014
> >>[  156.665129] Call Trace:
> >>[  156.665422]  dump_stack+0x1e/0x20
> >>[  156.665802]  bad_page+0x122/0x148
> >
> >Was the report related with this patch?
> 
> Yes, they said that the problem was bisected down to this patch. Do you know
> if there is a way to submit a patch to this test robot?

You can ask them for re testing with an updated patch by replying to
their report. ANyway I fail to see how the change could lead to this
patch.
Pavel Tatashin Aug. 14, 2017, 1:35 p.m. UTC | #9
>> OK, I will post it separately. No it does not depend on the rest, but the
>> reset depends on this. So, I am not sure how to enforce that this comes
>> before the rest.
> 
> Andrew will take care of that. Just make it explicit that some of the
> patch depends on an earlier work when reposting.

Ok.

>> Yes, they said that the problem was bisected down to this patch. Do you know
>> if there is a way to submit a patch to this test robot?
> 
> You can ask them for re testing with an updated patch by replying to
> their report. ANyway I fail to see how the change could lead to this
> patch.

I have already done that. Anyway, I think it is unrelated. I have used 
their scripts to test the patch alone, with number of elements in 
memblock array reduced down to 4. Verified that my freeing code is 
called, and never hit the problem that they reported.
Pavel Tatashin Aug. 14, 2017, 1:39 p.m. UTC | #10
>> #ifdef CONFIG_MEMBLOCK in page_alloc, or define memblock_discard() stubs in
>> nobootmem headfile.
> 
> This is the standard way to do this. And it is usually preferred to
> proliferate ifdefs in the code.

Hi Michal,

As you suggested, I sent-out this patch separately. If you feel 
strongly, that this should be updated to have stubs for platforms that 
do not implement memblock, please send a reply to that e-mail, so those 
who do not follow this tread will see it. Otherwise, I can leave it as 
is, page_alloc file already has a number memblock related ifdefs all of 
which can be cleaned out once every platform implements it (is it even 
achievable?)

Thank you,
Pasha
Michal Hocko Aug. 14, 2017, 1:42 p.m. UTC | #11
On Mon 14-08-17 09:39:17, Pasha Tatashin wrote:
> >>#ifdef CONFIG_MEMBLOCK in page_alloc, or define memblock_discard() stubs in
> >>nobootmem headfile.
> >
> >This is the standard way to do this. And it is usually preferred to
> >proliferate ifdefs in the code.
> 
> Hi Michal,
> 
> As you suggested, I sent-out this patch separately. If you feel strongly,
> that this should be updated to have stubs for platforms that do not
> implement memblock, please send a reply to that e-mail, so those who do not
> follow this tread will see it. Otherwise, I can leave it as is, page_alloc
> file already has a number memblock related ifdefs all of which can be
> cleaned out once every platform implements it (is it even achievable?)

I do not think this needs a repost just for this. This was merely a
JFYI, in case you would need to repost for other reasons then just
update that as well. But nothing really earth shattering.
diff mbox

Patch

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 77d427974f57..bae11c7e7bf3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -61,6 +61,7 @@  extern int memblock_debug;
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 #define __init_memblock __meminit
 #define __initdata_memblock __meminitdata
+void memblock_discard(void);
 #else
 #define __init_memblock
 #define __initdata_memblock
@@ -74,8 +75,6 @@  phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
 					int nid, ulong flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
-phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
-phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr);
 void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
@@ -110,6 +109,9 @@  void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
 void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
 				phys_addr_t *out_end);
 
+void __memblock_free_early(phys_addr_t base, phys_addr_t size);
+void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+
 /**
  * for_each_mem_range - iterate through memblock areas from type_a and not
  * included in type_b. Or just type_a if type_b is NULL.
diff --git a/mm/memblock.c b/mm/memblock.c
index 2cb25fe4452c..bf14aea6ab70 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -285,31 +285,27 @@  static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 }
 
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-
-phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
-					phys_addr_t *addr)
-{
-	if (memblock.reserved.regions == memblock_reserved_init_regions)
-		return 0;
-
-	*addr = __pa(memblock.reserved.regions);
-
-	return PAGE_ALIGN(sizeof(struct memblock_region) *
-			  memblock.reserved.max);
-}
-
-phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info(
-					phys_addr_t *addr)
+/**
+ * Discard memory and reserved arrays if they were allocated
+ */
+void __init memblock_discard(void)
 {
-	if (memblock.memory.regions == memblock_memory_init_regions)
-		return 0;
+	phys_addr_t addr, size;
 
-	*addr = __pa(memblock.memory.regions);
+	if (memblock.reserved.regions != memblock_reserved_init_regions) {
+		addr = __pa(memblock.reserved.regions);
+		size = PAGE_ALIGN(sizeof(struct memblock_region) *
+				  memblock.reserved.max);
+		__memblock_free_late(addr, size);
+	}
 
-	return PAGE_ALIGN(sizeof(struct memblock_region) *
-			  memblock.memory.max);
+	if (memblock.memory.regions == memblock_memory_init_regions) {
+		addr = __pa(memblock.memory.regions);
+		size = PAGE_ALIGN(sizeof(struct memblock_region) *
+				  memblock.memory.max);
+		__memblock_free_late(addr, size);
+	}
 }
-
 #endif
 
 /**
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 36454d0f96ee..3637809a18d0 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -146,22 +146,6 @@  static unsigned long __init free_low_memory_core_early(void)
 				NULL)
 		count += __free_memory_core(start, end);
 
-#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
-	{
-		phys_addr_t size;
-
-		/* Free memblock.reserved array if it was allocated */
-		size = get_allocated_memblock_reserved_regions_info(&start);
-		if (size)
-			count += __free_memory_core(start, start + size);
-
-		/* Free memblock.memory array if it was allocated */
-		size = get_allocated_memblock_memory_regions_info(&start);
-		if (size)
-			count += __free_memory_core(start, start + size);
-	}
-#endif
-
 	return count;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fc32aa81f359..63d16c185736 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1584,6 +1584,10 @@  void __init page_alloc_init_late(void)
 	/* Reinit limits that are based on free pages after the kernel is up */
 	files_maxfiles_init();
 #endif
+#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
+	/* Discard memblock private memory */
+	memblock_discard();
+#endif
 
 	for_each_populated_zone(zone)
 		set_zone_contiguous(zone);