diff mbox series

MIPS: loongson64: alloc pglist_data at run time

Message ID 20210227062957.269156-1-huangpei@loongson.cn (mailing list archive)
State Superseded
Headers show
Series MIPS: loongson64: alloc pglist_data at run time | expand

Commit Message

Huang Pei Feb. 27, 2021, 6:29 a.m. UTC
It can make some metadata of MM, like pglist_data and zone
NUMA-aware

Signed-off-by: Huang Pei <huangpei@loongson.cn>
---
 arch/mips/loongson64/numa.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

Comments

Sergey Shtylyov Feb. 28, 2021, 8:43 a.m. UTC | #1
Hello!

On 27.02.2021 9:29, Huang Pei wrote:

> It can make some metadata of MM, like pglist_data and zone
> NUMA-aware
> 
> Signed-off-by: Huang Pei <huangpei@loongson.cn>
> ---
>   arch/mips/loongson64/numa.c | 18 +++++++++++++++---
>   1 file changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
> index cf9459f79f9b..5912b2e7b10c 100644
> --- a/arch/mips/loongson64/numa.c
> +++ b/arch/mips/loongson64/numa.c
[...]
> @@ -183,6 +194,7 @@ static void __init node_mem_init(unsigned int node)
>   			memblock_reserve((node_addrspace_offset | 0xfe000000),
>   					 32 << 20);
>   	}
> +

    Unrelated whitespace change?

>   }
>   
>   static __init void prom_meminit(void)

MBR, Sergei
Jinyang He March 1, 2021, 8:42 a.m. UTC | #2
On 02/27/2021 02:29 PM, Huang Pei wrote:

> It can make some metadata of MM, like pglist_data and zone
> NUMA-aware
>
> Signed-off-by: Huang Pei <huangpei@loongson.cn>
> ---
>   arch/mips/loongson64/numa.c | 18 +++++++++++++++---
>   1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
> index cf9459f79f9b..5912b2e7b10c 100644
> --- a/arch/mips/loongson64/numa.c
> +++ b/arch/mips/loongson64/numa.c
> @@ -26,7 +26,6 @@
>   #include <asm/wbflush.h>
>   #include <boot_param.h>
>   
> -static struct pglist_data prealloc__node_data[MAX_NUMNODES];
>   unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
>   EXPORT_SYMBOL(__node_distances);
>   struct pglist_data *__node_data[MAX_NUMNODES];
> @@ -151,8 +150,12 @@ static void __init szmem(unsigned int node)
>   
>   static void __init node_mem_init(unsigned int node)
>   {
> +	struct pglist_data *nd;
>   	unsigned long node_addrspace_offset;
>   	unsigned long start_pfn, end_pfn;
> +	unsigned long nd_pa;
> +	int tnid;
> +	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
>   
>   	node_addrspace_offset = nid_to_addrbase(node);
>   	pr_info("Node%d's addrspace_offset is 0x%lx\n",
> @@ -162,8 +165,16 @@ static void __init node_mem_init(unsigned int node)
>   	pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
>   		node, start_pfn, end_pfn);
>   
> -	__node_data[node] = prealloc__node_data + node;
> -
> +	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, node);
[...]

Hi, all,

Few related to this patch. About memblock, I have a question.

In my own understanding, 3 stages at previous part of arch_mem_init().

stage1: Add memory to memblock.memory at plat_mem_setup().
stage2: parse_early_param() parses parameter about memroy, such as "mem" 
"memmap".
         check_kernel_sections_mem() checks whether the current 
memblock.memory contains the kernel.
         At this stage, user can defined memblock.memory by themselves. 
Also it is the final stage of determining memblock.memory.
stage3: others. use memblock.memory and add them to memblock.reserve.

Calling to memblock_alloc*() should be after the reserve kernel(), see:
     memblock_set_current_limit(PFN_PHYS(max_low_pfn))

(
Here few about this patch: It works no problem, memblock_dump shows:
[    0.000000]  memory.cnt  = 0x2
[    0.000000]  memory[0x0] [0x0000000000200000-0x000000000effffff], 
0x000000000ee00000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x1] [0x0000000090200000-0x000000025fffffff], 
0x00000001cfe00000 bytes on node 0 flags: 0x0
[    0.000000]  reserved.cnt  = 0x3
[    0.000000]  reserved[0x0] [0x0000000000200000-0x0000000001ed7fff], 
0x0000000001cd8000 bytes flags: 0x0
[    0.000000]  reserved[0x1] [0x00000000fe000000-0x00000000ffffffff], 
0x0000000002000000 bytes flags: 0x0
--->>> [    0.000000]  reserved[0x2] 
[0x000000025fffd6c0-0x000000025fffffff], 0x0000000000002940 bytes flags: 
0x0  <<<---
memblock.bottom_up is not enabled here, and it is not destroy kernel, 
although looks strange.
)

Morever, about "mem" parameter.

When parsing the mem parameter for the first time, all memblock.memory 
is removed.
For NUMA, memblock.memory contain important node information. These 
information are
imported at plat_mem_setup(). Without these node information, the NUMA 
platform
may not be able to use memory correctly.

The mem parameter is rarely used, but it has meaning. For example, 
kdump. I have
done some fixes before, but it looks fool.

Huacai suggested me that use pa_to_nid().

memblock_add(start, size) -> memblock_add_node(start, size, 
pa_to_nid(start))

I think this is a good way. Does anyone have other suggestions?

At last, should the NUMA platform reserve the kernel area after parse 
"mem" rather than before it?

Thanks,
Jinyang
diff mbox series

Patch

diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index cf9459f79f9b..5912b2e7b10c 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -26,7 +26,6 @@ 
 #include <asm/wbflush.h>
 #include <boot_param.h>
 
-static struct pglist_data prealloc__node_data[MAX_NUMNODES];
 unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES];
 EXPORT_SYMBOL(__node_distances);
 struct pglist_data *__node_data[MAX_NUMNODES];
@@ -151,8 +150,12 @@  static void __init szmem(unsigned int node)
 
 static void __init node_mem_init(unsigned int node)
 {
+	struct pglist_data *nd;
 	unsigned long node_addrspace_offset;
 	unsigned long start_pfn, end_pfn;
+	unsigned long nd_pa;
+	int tnid;
+	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
 
 	node_addrspace_offset = nid_to_addrbase(node);
 	pr_info("Node%d's addrspace_offset is 0x%lx\n",
@@ -162,8 +165,16 @@  static void __init node_mem_init(unsigned int node)
 	pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
 		node, start_pfn, end_pfn);
 
-	__node_data[node] = prealloc__node_data + node;
-
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, node);
+	if (!nd_pa)
+		panic("Cannot allocate %zu bytes for node %d data\n",
+		      nd_size, node);
+	nd = __va(nd_pa);
+	memset(nd, 0, sizeof(struct pglist_data));
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != node)
+		pr_info("NODE_DATA(%d) on node %d\n", node, tnid);
+	__node_data[node] = nd;
 	NODE_DATA(node)->node_start_pfn = start_pfn;
 	NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
 
@@ -183,6 +194,7 @@  static void __init node_mem_init(unsigned int node)
 			memblock_reserve((node_addrspace_offset | 0xfe000000),
 					 32 << 20);
 	}
+
 }
 
 static __init void prom_meminit(void)