diff mbox series

[06/17] x86/numa: simplify numa_distance allocation

Message ID 20240716111346.3676969-7-rppt@kernel.org (mailing list archive)
State New
Headers show
Series mm: introduce numa_memblks | expand

Commit Message

Mike Rapoport July 16, 2024, 11:13 a.m. UTC
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>

Allocation of numa_distance uses memblock_phys_alloc_range() to limit
allocation to be below the last mapped page.

But NUMA initializaition runs after the direct map is populated and
there is also code in setup_arch() that adjusts memblock limit to
reflect how much memory is already mapped in the direct map.

Simplify the allocation of numa_distance and use plain memblock_alloc().
This makes the code clearer and ensures that when numa_distance is not
allocated it is always NULL.

Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 arch/x86/mm/numa.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

Comments

Jonathan Cameron July 19, 2024, 4:28 p.m. UTC | #1
On Tue, 16 Jul 2024 14:13:35 +0300
Mike Rapoport <rppt@kernel.org> wrote:

> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> 
> Allocation of numa_distance uses memblock_phys_alloc_range() to limit
> allocation to be below the last mapped page.
> 
> But NUMA initializaition runs after the direct map is populated and

initialization (one too many 'i's)

> there is also code in setup_arch() that adjusts memblock limit to
> reflect how much memory is already mapped in the direct map.
> 
> Simplify the allocation of numa_distance and use plain memblock_alloc().
> This makes the code clearer and ensures that when numa_distance is not
> allocated it is always NULL.
Doesn't this break the comment in numa_set_distance() kernel-doc?
"
 * If such table cannot be allocated, a warning is printed and further
 * calls are ignored until the distance table is reset with
 * numa_reset_distance().
"

Superficially that looks to be to avoid repeatedly hitting the
singleton bit at the top of numa_set_distance() as SRAT or similar
parsing occurs.

> 
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
>  arch/x86/mm/numa.c | 12 +++---------
>  1 file changed, 3 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 5e1dde26674b..ab2d4ecef786 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -319,8 +319,7 @@ void __init numa_reset_distance(void)
>  {
>  	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
>  
> -	/* numa_distance could be 1LU marking allocation failure, test cnt */
> -	if (numa_distance_cnt)
> +	if (numa_distance)
>  		memblock_free(numa_distance, size);
>  	numa_distance_cnt = 0;
>  	numa_distance = NULL;	/* enable table creation */
> @@ -331,7 +330,6 @@ static int __init numa_alloc_distance(void)
>  	nodemask_t nodes_parsed;
>  	size_t size;
>  	int i, j, cnt = 0;
> -	u64 phys;
>  
>  	/* size the new table and allocate it */
>  	nodes_parsed = numa_nodes_parsed;
> @@ -342,16 +340,12 @@ static int __init numa_alloc_distance(void)
>  	cnt++;
>  	size = cnt * cnt * sizeof(numa_distance[0]);
>  
> -	phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
> -					 PFN_PHYS(max_pfn_mapped));
> -	if (!phys) {
> +	numa_distance = memblock_alloc(size, PAGE_SIZE);
> +	if (!numa_distance) {
>  		pr_warn("Warning: can't allocate distance table!\n");
> -		/* don't retry until explicitly reset */
> -		numa_distance = (void *)1LU;
>  		return -ENOMEM;
>  	}
>  
> -	numa_distance = __va(phys);
>  	numa_distance_cnt = cnt;
>  
>  	/* fill with the default distances */
Mike Rapoport July 22, 2024, 7:51 a.m. UTC | #2
On Fri, Jul 19, 2024 at 05:28:49PM +0100, Jonathan Cameron wrote:
> On Tue, 16 Jul 2024 14:13:35 +0300
> Mike Rapoport <rppt@kernel.org> wrote:
> 
> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> > 
> > Allocation of numa_distance uses memblock_phys_alloc_range() to limit
> > allocation to be below the last mapped page.
> > 
> > But NUMA initializaition runs after the direct map is populated and
> 
> initialization (one too many 'i's)

Thanks.
 
> > there is also code in setup_arch() that adjusts memblock limit to
> > reflect how much memory is already mapped in the direct map.
> > 
> > Simplify the allocation of numa_distance and use plain memblock_alloc().
> > This makes the code clearer and ensures that when numa_distance is not
> > allocated it is always NULL.
> Doesn't this break the comment in numa_set_distance() kernel-doc?
> "
>  * If such table cannot be allocated, a warning is printed and further
>  * calls are ignored until the distance table is reset with
>  * numa_reset_distance().
> "
> 
> Superficially that looks to be to avoid repeatedly hitting the
> singleton bit at the top of numa_set_distance() as SRAT or similar
> parsing occurs.

I believe it's there to avoid allocation of numa_distance in the middle of
distance parsing (SLIT or DT numa-distance-map).

If the allocation fails for the first element in the table, the
numa_distance and numa_distance_cnt remain zero and node_distance() falls
back to

	return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;

It's different from arch_numa that always tries to allocate MAX_NUMNODES *
MAX_NUMNODES for numa_distance and treats the allocation failure as a
failure to initialize NUMA.

I like the general approach x86 uses more, i.e. in case distance parsing
fails in some way NUMA is still initialized with probably suboptimal
distances between nodes.

I'm going to restore that "singleton" behavior for now and will look into
making this all less cumbersome later.
 
> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> > ---
> >  arch/x86/mm/numa.c | 12 +++---------
> >  1 file changed, 3 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> > index 5e1dde26674b..ab2d4ecef786 100644
> > --- a/arch/x86/mm/numa.c
> > +++ b/arch/x86/mm/numa.c
> > @@ -319,8 +319,7 @@ void __init numa_reset_distance(void)
> >  {
> >  	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
> >  
> > -	/* numa_distance could be 1LU marking allocation failure, test cnt */
> > -	if (numa_distance_cnt)
> > +	if (numa_distance)
> >  		memblock_free(numa_distance, size);
> >  	numa_distance_cnt = 0;
> >  	numa_distance = NULL;	/* enable table creation */
> > @@ -331,7 +330,6 @@ static int __init numa_alloc_distance(void)
> >  	nodemask_t nodes_parsed;
> >  	size_t size;
> >  	int i, j, cnt = 0;
> > -	u64 phys;
> >  
> >  	/* size the new table and allocate it */
> >  	nodes_parsed = numa_nodes_parsed;
> > @@ -342,16 +340,12 @@ static int __init numa_alloc_distance(void)
> >  	cnt++;
> >  	size = cnt * cnt * sizeof(numa_distance[0]);
> >  
> > -	phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
> > -					 PFN_PHYS(max_pfn_mapped));
> > -	if (!phys) {
> > +	numa_distance = memblock_alloc(size, PAGE_SIZE);
> > +	if (!numa_distance) {
> >  		pr_warn("Warning: can't allocate distance table!\n");
> > -		/* don't retry until explicitly reset */
> > -		numa_distance = (void *)1LU;
> >  		return -ENOMEM;
> >  	}
> >  
> > -	numa_distance = __va(phys);
> >  	numa_distance_cnt = cnt;
> >  
> >  	/* fill with the default distances */
> 
>
diff mbox series

Patch

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 5e1dde26674b..ab2d4ecef786 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -319,8 +319,7 @@  void __init numa_reset_distance(void)
 {
 	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
 
-	/* numa_distance could be 1LU marking allocation failure, test cnt */
-	if (numa_distance_cnt)
+	if (numa_distance)
 		memblock_free(numa_distance, size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;	/* enable table creation */
@@ -331,7 +330,6 @@  static int __init numa_alloc_distance(void)
 	nodemask_t nodes_parsed;
 	size_t size;
 	int i, j, cnt = 0;
-	u64 phys;
 
 	/* size the new table and allocate it */
 	nodes_parsed = numa_nodes_parsed;
@@ -342,16 +340,12 @@  static int __init numa_alloc_distance(void)
 	cnt++;
 	size = cnt * cnt * sizeof(numa_distance[0]);
 
-	phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
-					 PFN_PHYS(max_pfn_mapped));
-	if (!phys) {
+	numa_distance = memblock_alloc(size, PAGE_SIZE);
+	if (!numa_distance) {
 		pr_warn("Warning: can't allocate distance table!\n");
-		/* don't retry until explicitly reset */
-		numa_distance = (void *)1LU;
 		return -ENOMEM;
 	}
 
-	numa_distance = __va(phys);
 	numa_distance_cnt = cnt;
 
 	/* fill with the default distances */