diff mbox series

mm, sparse: do not waste pre allocated memmap space

Message ID 20191119092642.31799-1-mhocko@kernel.org (mailing list archive)
State New, archived
Headers show
Series mm, sparse: do not waste pre allocated memmap space | expand

Commit Message

Michal Hocko Nov. 19, 2019, 9:26 a.m. UTC
From: Michal Hocko <mhocko@suse.com>

Vincent has noticed [1] that there is something unusual with the memmap
allocations going on on his platform
: I noticed this because on my ARM64 platform, with 1 GiB of memory the
: first [and only] section is allocated from the zeroing path while with
: 2 GiB of memory the first 1 GiB section is allocated from the
: non-zeroing path.

The underlying problem is that although sparse_buffer_init allocates enough
memory for all sections on the node sparse_buffer_alloc is not able to
consume them due to mismatch in the expected allocation alignement.
While sparse_buffer_init preallocation uses the PAGE_SIZE alignment the
real memmap has to be aligned to section_map_size() this results in a
wasted initial chunk of the preallocated memmap and unnecessary fallback
allocation for a section.

While we are at it also change __populate_section_memmap to align to the
requested size because at least VMEMMAP has constrains to have memmap
properly aligned.

[1] http://lkml.kernel.org/r/20191030131122.8256-1-vincent.whitchurch@axis.com
Reported-and-debugged-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Fixes: 35fd1eb1e821 ("mm/sparse: abstract sparse buffer allocations")
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 mm/sparse.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

Comments

David Hildenbrand Nov. 19, 2019, 10:03 a.m. UTC | #1
On 19.11.19 10:26, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
> 
> Vincent has noticed [1] that there is something unusual with the memmap
> allocations going on on his platform
> : I noticed this because on my ARM64 platform, with 1 GiB of memory the
> : first [and only] section is allocated from the zeroing path while with
> : 2 GiB of memory the first 1 GiB section is allocated from the
> : non-zeroing path.
> 
> The underlying problem is that although sparse_buffer_init allocates enough
> memory for all sections on the node sparse_buffer_alloc is not able to
> consume them due to mismatch in the expected allocation alignement.
> While sparse_buffer_init preallocation uses the PAGE_SIZE alignment the
> real memmap has to be aligned to section_map_size() this results in a
> wasted initial chunk of the preallocated memmap and unnecessary fallback
> allocation for a section.
> 
> While we are at it also change __populate_section_memmap to align to the
> requested size because at least VMEMMAP has constrains to have memmap
> properly aligned.
> 
> [1] http://lkml.kernel.org/r/20191030131122.8256-1-vincent.whitchurch@axis.com
> Reported-and-debugged-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
> Fixes: 35fd1eb1e821 ("mm/sparse: abstract sparse buffer allocations")
> Signed-off-by: Michal Hocko <mhocko@suse.com>
> ---
>   mm/sparse.c | 10 +++++++---
>   1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/mm/sparse.c b/mm/sparse.c
> index f6891c1992b1..079f3e3c4cab 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -458,8 +458,7 @@ struct page __init *__populate_section_memmap(unsigned long pfn,
>   	if (map)
>   		return map;
>   
> -	map = memblock_alloc_try_nid(size,
> -					  PAGE_SIZE, addr,
> +	map = memblock_alloc_try_nid(size, size, addr,
>   					  MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>   	if (!map)
>   		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
> @@ -482,8 +481,13 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
>   {
>   	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
>   	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
> +	/*
> +	 * Pre-allocated buffer is mainly used by __populate_section_memmap
> +	 * and we want it to be properly aligned to the section size - this is
> +	 * especially the case for VMEMMAP which maps memmap to PMDs
> +	 */
>   	sparsemap_buf =
> -		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
> +		memblock_alloc_try_nid_raw(size, section_map_size(),
>   						addr,
>   						MEMBLOCK_ALLOC_ACCESSIBLE, nid);

Wow, that alignment/layout gives me nightmares  ^

None of your business, though :)

>   	sparsemap_buf_end = sparsemap_buf + size;
> 

Acked-by: David Hildenbrand <david@redhat.com>
Andrew Morton Nov. 19, 2019, 10:10 p.m. UTC | #2
On Tue, 19 Nov 2019 11:03:58 +0100 David Hildenbrand <david@redhat.com> wrote:

> > @@ -482,8 +481,13 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
> >   {
> >   	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
> >   	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
> > +	/*
> > +	 * Pre-allocated buffer is mainly used by __populate_section_memmap
> > +	 * and we want it to be properly aligned to the section size - this is
> > +	 * especially the case for VMEMMAP which maps memmap to PMDs
> > +	 */
> >   	sparsemap_buf =
> > -		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
> > +		memblock_alloc_try_nid_raw(size, section_map_size(),
> >   						addr,
> >   						MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> 
> Wow, that alignment/layout gives me nightmares  ^
> 
> None of your business, though :)

We're allowed to change it ;)

--- a/mm/sparse.c~mm-sparse-do-not-waste-pre-allocated-memmap-space-fix
+++ a/mm/sparse.c
@@ -486,10 +486,8 @@ static void __init sparse_buffer_init(un
 	 * and we want it to be properly aligned to the section size - this is
 	 * especially the case for VMEMMAP which maps memmap to PMDs
 	 */
-	sparsemap_buf =
-		memblock_alloc_try_nid_raw(size, section_map_size(),
-						addr,
-						MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+	sparsemap_buf = memblock_alloc_try_nid_raw(size, section_map_size(),
+					addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 	sparsemap_buf_end = sparsemap_buf + size;
 }
Michal Hocko Nov. 20, 2019, 7:52 a.m. UTC | #3
On Tue 19-11-19 14:10:47, Andrew Morton wrote:
> On Tue, 19 Nov 2019 11:03:58 +0100 David Hildenbrand <david@redhat.com> wrote:
> 
> > > @@ -482,8 +481,13 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
> > >   {
> > >   	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
> > >   	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
> > > +	/*
> > > +	 * Pre-allocated buffer is mainly used by __populate_section_memmap
> > > +	 * and we want it to be properly aligned to the section size - this is
> > > +	 * especially the case for VMEMMAP which maps memmap to PMDs
> > > +	 */
> > >   	sparsemap_buf =
> > > -		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
> > > +		memblock_alloc_try_nid_raw(size, section_map_size(),
> > >   						addr,
> > >   						MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> > 
> > Wow, that alignment/layout gives me nightmares  ^
> > 
> > None of your business, though :)
> 
> We're allowed to change it ;)
> 
> --- a/mm/sparse.c~mm-sparse-do-not-waste-pre-allocated-memmap-space-fix
> +++ a/mm/sparse.c
> @@ -486,10 +486,8 @@ static void __init sparse_buffer_init(un
>  	 * and we want it to be properly aligned to the section size - this is
>  	 * especially the case for VMEMMAP which maps memmap to PMDs
>  	 */
> -	sparsemap_buf =
> -		memblock_alloc_try_nid_raw(size, section_map_size(),
> -						addr,
> -						MEMBLOCK_ALLOC_ACCESSIBLE, nid);
> +	sparsemap_buf = memblock_alloc_try_nid_raw(size, section_map_size(),
> +					addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>  	sparsemap_buf_end = sparsemap_buf + size;
>  }

I didn't bother mostly because the creative code layout made the
intention of the patch more obvious.  But if it saves from nightmares
then why not.
diff mbox series

Patch

diff --git a/mm/sparse.c b/mm/sparse.c
index f6891c1992b1..079f3e3c4cab 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -458,8 +458,7 @@  struct page __init *__populate_section_memmap(unsigned long pfn,
 	if (map)
 		return map;
 
-	map = memblock_alloc_try_nid(size,
-					  PAGE_SIZE, addr,
+	map = memblock_alloc_try_nid(size, size, addr,
 					  MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 	if (!map)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
@@ -482,8 +481,13 @@  static void __init sparse_buffer_init(unsigned long size, int nid)
 {
 	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
 	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
+	/*
+	 * Pre-allocated buffer is mainly used by __populate_section_memmap
+	 * and we want it to be properly aligned to the section size - this is
+	 * especially the case for VMEMMAP which maps memmap to PMDs
+	 */
 	sparsemap_buf =
-		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
+		memblock_alloc_try_nid_raw(size, section_map_size(),
 						addr,
 						MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 	sparsemap_buf_end = sparsemap_buf + size;