diff mbox

[RFC,v3,13/24] ARM: NUMA: DT: Parse memory NUMA information

Message ID 1500378106-2620-14-git-send-email-vijay.kilari@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vijay Kilari July 18, 2017, 11:41 a.m. UTC
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>

Parse memory node and fetch numa-node-id information.
For each memory range, store in node_memblk_range[]
along with node id.

When booting in UEFI mode, UEFI passes memory information
to Dom0 using EFI memory descriptor table and deletes the
memory nodes from the host DT. However to fetch the memory
numa node id, memory DT node should not be deleted by EFI stub.
With this patch, do not delete memory node from FDT.

NUMA info of memory is extracted from process_memory_node()
instead of parsing the DT again during numa_init().

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
---
v3: - Set numa_off in numa_failed() and drop dt_numa variable
---
 xen/arch/arm/bootfdt.c      | 25 +++++++++++++++++++++----
 xen/arch/arm/efi/efi-boot.h | 25 -------------------------
 xen/arch/arm/numa/dt_numa.c | 32 ++++++++++++++++++++++++++++++++
 xen/arch/arm/numa/numa.c    |  5 +++++
 xen/include/asm-arm/numa.h  |  2 ++
 5 files changed, 60 insertions(+), 29 deletions(-)

Comments

Julien Grall July 19, 2017, 6:39 p.m. UTC | #1
Hi Vijay,

On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> Parse memory node and fetch numa-node-id information.
> For each memory range, store in node_memblk_range[]
> along with node id.
>
> When booting in UEFI mode, UEFI passes memory information
> to Dom0 using EFI memory descriptor table and deletes the
> memory nodes from the host DT. However to fetch the memory
> numa node id, memory DT node should not be deleted by EFI stub.
> With this patch, do not delete memory node from FDT.
>
> NUMA info of memory is extracted from process_memory_node()
> instead of parsing the DT again during numa_init().

This patch does too much and needs to be split. The splitting would be 
at least:

- EFI mode change
- Numa change

>
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
> ---
> v3: - Set numa_off in numa_failed() and drop dt_numa variable
> ---
>  xen/arch/arm/bootfdt.c      | 25 +++++++++++++++++++++----
>  xen/arch/arm/efi/efi-boot.h | 25 -------------------------
>  xen/arch/arm/numa/dt_numa.c | 32 ++++++++++++++++++++++++++++++++
>  xen/arch/arm/numa/numa.c    |  5 +++++
>  xen/include/asm-arm/numa.h  |  2 ++
>  5 files changed, 60 insertions(+), 29 deletions(-)
>
> diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
> index 6e8251b..b3a132c 100644
> --- a/xen/arch/arm/bootfdt.c
> +++ b/xen/arch/arm/bootfdt.c
> @@ -13,6 +13,8 @@
>  #include <xen/init.h>
>  #include <xen/device_tree.h>
>  #include <xen/libfdt/libfdt.h>
> +#include <xen/numa.h>
> +#include <xen/efi.h>

Please add the headers in alphabetical order.

>  #include <xsm/xsm.h>
>  #include <asm/setup.h>
>
> @@ -146,6 +148,9 @@ static void __init process_memory_node(const void *fdt, int node,
>      const __be32 *cell;
>      paddr_t start, size;
>      u32 reg_cells = address_cells + size_cells;
> +#ifdef CONFIG_NUMA
> +    uint32_t nid;
> +#endif
>
>      if ( address_cells < 1 || size_cells < 1 )
>      {
> @@ -154,24 +159,36 @@ static void __init process_memory_node(const void *fdt, int node,
>          return;
>      }
>
> +#ifdef CONFIG_NUMA
> +    nid = device_tree_get_u32(fdt, node, "numa-node-id", NR_NODE_MEMBLKS);

Should not you use MAX_NUM_NODES rather than NR_NODE_MEMBLKS?

Also, where is the sanity check?

> +#endif
>      prop = fdt_get_property(fdt, node, "reg", NULL);
>      if ( !prop )
>      {
>          printk("fdt: node `%s': missing `reg' property\n", name);
> +#ifdef CONFIG_NUMA
> +	numa_failed();

This file is using soft-tab not hard one.

> +#endif
>          return;
>      }
>
>      cell = (const __be32 *)prop->data;
>      banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof (u32));
>
> -    for ( i = 0; i < banks && bootinfo.mem.nr_banks < NR_MEM_BANKS; i++ )
> +    for ( i = 0; i < banks; i++ )
>      {
>          device_tree_get_reg(&cell, address_cells, size_cells, &start, &size);
>          if ( !size )
>              continue;
> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
> -        bootinfo.mem.nr_banks++;
> +        if ( !efi_enabled(EFI_BOOT) && bootinfo.mem.nr_banks < NR_MEM_BANKS )
> +        {
> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
> +            bootinfo.mem.nr_banks++;
> +        }

This change should be split.

> +#ifdef CONFIG_NUMA
> +        dt_numa_process_memory_node(nid, start, size);
> +#endif
>      }
>  }
>
> diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h
> index 56de26e..a8bde68 100644
> --- a/xen/arch/arm/efi/efi-boot.h
> +++ b/xen/arch/arm/efi/efi-boot.h
> @@ -194,33 +194,8 @@ EFI_STATUS __init fdt_add_uefi_nodes(EFI_SYSTEM_TABLE *sys_table,
>      int status;
>      u32 fdt_val32;
>      u64 fdt_val64;
> -    int prev;
>      int num_rsv;
>
> -    /*
> -     * Delete any memory nodes present.  The EFI memory map is the only
> -     * memory description provided to Xen.
> -     */
> -    prev = 0;
> -    for (;;)
> -    {
> -        const char *type;
> -        int len;
> -
> -        node = fdt_next_node(fdt, prev, NULL);
> -        if ( node < 0 )
> -            break;
> -
> -        type = fdt_getprop(fdt, node, "device_type", &len);
> -        if ( type && strncmp(type, "memory", len) == 0 )
> -        {
> -            fdt_del_node(fdt, node);
> -            continue;
> -        }
> -
> -        prev = node;
> -    }
> -

That chunk should move to the same patch as the EFI check.

>     /*
>      * Delete all memory reserve map entries. When booting via UEFI,
>      * kernel will use the UEFI memory map to find reserved regions.
> diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c
> index 963bb40..84030e7 100644
> --- a/xen/arch/arm/numa/dt_numa.c
> +++ b/xen/arch/arm/numa/dt_numa.c
> @@ -58,6 +58,38 @@ static int __init dt_numa_process_cpu_node(const void *fdt)
>      return 0;
>  }
>
> +void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
> +                                       paddr_t size)
> +{
> +    struct node *nd;
> +    int i;
> +
> +    i = conflicting_memblks(start, start + size);
> +    if ( i < 0 )
> +    {
> +         if ( numa_add_memblk(nid, start, size) )
> +         {
> +             printk(XENLOG_WARNING "DT: NUMA: node-id %u overflow \n", nid);
> +             numa_failed();
> +             return;
> +         }
> +    }
> +    else
> +    {
> +         nd = get_node_memblk_range(i);
> +         printk(XENLOG_ERR
> +                "NUMA DT: node %u (%"PRIx64"-%"PRIx64") overlaps with %d (%"PRIx64"-%"PRIx64")\n",

s/PRIx64/PRI_paddr/

> +                nid, start, start + size, i, nd->start, nd->end);
> +
> +         numa_failed();
> +         return;
> +    }
> +
> +    node_set(nid, memory_nodes_parsed);

This code looks fairly similar to some bits of 
acpi_numa_memory_affinity_init. Is there any way we could introduce a 
common helper?

> +
> +    return;
> +}
> +
>  int __init dt_numa_init(void)
>  {
>      int ret;
> diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
> index 45cc418..8227361 100644
> --- a/xen/arch/arm/numa/numa.c
> +++ b/xen/arch/arm/numa/numa.c
> @@ -19,6 +19,11 @@
>  #include <xen/nodemask.h>
>  #include <xen/numa.h>
>
> +void numa_failed(void)
> +{
> +    numa_off = true;
> +}
> +
>  void __init numa_init(void)
>  {
>      int ret = 0;
> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
> index 8f517a2..36cd782 100644
> --- a/xen/include/asm-arm/numa.h
> +++ b/xen/include/asm-arm/numa.h
> @@ -3,6 +3,8 @@
>
>  typedef uint8_t nodeid_t;
>
> +void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size);

Likely, this should go under CONFIG_NUMA.

> +
>  #ifdef CONFIG_NUMA
>  void numa_init(void);
>  int dt_numa_init(void);
>

Cheers,
Vijay Kilari July 20, 2017, 10:37 a.m. UTC | #2
On Thu, Jul 20, 2017 at 12:09 AM, Julien Grall <julien.grall@arm.com> wrote:
> Hi Vijay,
>
> On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
>>
>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>
>> Parse memory node and fetch numa-node-id information.
>> For each memory range, store in node_memblk_range[]
>> along with node id.
>>
>> When booting in UEFI mode, UEFI passes memory information
>> to Dom0 using EFI memory descriptor table and deletes the
>> memory nodes from the host DT. However to fetch the memory
>> numa node id, memory DT node should not be deleted by EFI stub.
>> With this patch, do not delete memory node from FDT.
>>
>> NUMA info of memory is extracted from process_memory_node()
>> instead of parsing the DT again during numa_init().
>
>
> This patch does too much and needs to be split. The splitting would be at
> least:
>
> - EFI mode change
> - Numa change

OK

>
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>> ---
>> v3: - Set numa_off in numa_failed() and drop dt_numa variable
>> ---
>>  xen/arch/arm/bootfdt.c      | 25 +++++++++++++++++++++----
>>  xen/arch/arm/efi/efi-boot.h | 25 -------------------------
>>  xen/arch/arm/numa/dt_numa.c | 32 ++++++++++++++++++++++++++++++++
>>  xen/arch/arm/numa/numa.c    |  5 +++++
>>  xen/include/asm-arm/numa.h  |  2 ++
>>  5 files changed, 60 insertions(+), 29 deletions(-)
>>
>> diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
>> index 6e8251b..b3a132c 100644
>> --- a/xen/arch/arm/bootfdt.c
>> +++ b/xen/arch/arm/bootfdt.c
>> @@ -13,6 +13,8 @@
>>  #include <xen/init.h>
>>  #include <xen/device_tree.h>
>>  #include <xen/libfdt/libfdt.h>
>> +#include <xen/numa.h>
>> +#include <xen/efi.h>
>
>
> Please add the headers in alphabetical order.
>
>>  #include <xsm/xsm.h>
>>  #include <asm/setup.h>
>>
>> @@ -146,6 +148,9 @@ static void __init process_memory_node(const void
>> *fdt, int node,
>>      const __be32 *cell;
>>      paddr_t start, size;
>>      u32 reg_cells = address_cells + size_cells;
>> +#ifdef CONFIG_NUMA
>> +    uint32_t nid;
>> +#endif
>>
>>      if ( address_cells < 1 || size_cells < 1 )
>>      {
>> @@ -154,24 +159,36 @@ static void __init process_memory_node(const void
>> *fdt, int node,
>>          return;
>>      }
>>
>> +#ifdef CONFIG_NUMA
>> +    nid = device_tree_get_u32(fdt, node, "numa-node-id",
>> NR_NODE_MEMBLKS);
>
>
> Should not you use MAX_NUM_NODES rather than NR_NODE_MEMBLKS?
>
> Also, where is the sanity check?

OK
>
>> +#endif
>>      prop = fdt_get_property(fdt, node, "reg", NULL);
>>      if ( !prop )
>>      {
>>          printk("fdt: node `%s': missing `reg' property\n", name);
>> +#ifdef CONFIG_NUMA
>> +       numa_failed();
>
>
> This file is using soft-tab not hard one.
>
>> +#endif
>>          return;
>>      }
>>
>>      cell = (const __be32 *)prop->data;
>>      banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof (u32));
>>
>> -    for ( i = 0; i < banks && bootinfo.mem.nr_banks < NR_MEM_BANKS; i++ )
>> +    for ( i = 0; i < banks; i++ )
>>      {
>>          device_tree_get_reg(&cell, address_cells, size_cells, &start,
>> &size);
>>          if ( !size )
>>              continue;
>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>> -        bootinfo.mem.nr_banks++;
>> +        if ( !efi_enabled(EFI_BOOT) && bootinfo.mem.nr_banks <
>> NR_MEM_BANKS )
>> +        {
>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>> +            bootinfo.mem.nr_banks++;
>> +        }
>
>
> This change should be split.
>
>
>> +#ifdef CONFIG_NUMA
>> +        dt_numa_process_memory_node(nid, start, size);
>> +#endif
>>      }
>>  }
>>
>> diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h
>> index 56de26e..a8bde68 100644
>> --- a/xen/arch/arm/efi/efi-boot.h
>> +++ b/xen/arch/arm/efi/efi-boot.h
>> @@ -194,33 +194,8 @@ EFI_STATUS __init fdt_add_uefi_nodes(EFI_SYSTEM_TABLE
>> *sys_table,
>>      int status;
>>      u32 fdt_val32;
>>      u64 fdt_val64;
>> -    int prev;
>>      int num_rsv;
>>
>> -    /*
>> -     * Delete any memory nodes present.  The EFI memory map is the only
>> -     * memory description provided to Xen.
>> -     */
>> -    prev = 0;
>> -    for (;;)
>> -    {
>> -        const char *type;
>> -        int len;
>> -
>> -        node = fdt_next_node(fdt, prev, NULL);
>> -        if ( node < 0 )
>> -            break;
>> -
>> -        type = fdt_getprop(fdt, node, "device_type", &len);
>> -        if ( type && strncmp(type, "memory", len) == 0 )
>> -        {
>> -            fdt_del_node(fdt, node);
>> -            continue;
>> -        }
>> -
>> -        prev = node;
>> -    }
>> -
>
>
> That chunk should move to the same patch as the EFI check.
>
ok
>
>>     /*
>>      * Delete all memory reserve map entries. When booting via UEFI,
>>      * kernel will use the UEFI memory map to find reserved regions.
>> diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c
>> index 963bb40..84030e7 100644
>> --- a/xen/arch/arm/numa/dt_numa.c
>> +++ b/xen/arch/arm/numa/dt_numa.c
>> @@ -58,6 +58,38 @@ static int __init dt_numa_process_cpu_node(const void
>> *fdt)
>>      return 0;
>>  }
>>
>> +void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
>> +                                       paddr_t size)
>> +{
>> +    struct node *nd;
>> +    int i;
>> +
>> +    i = conflicting_memblks(start, start + size);
>> +    if ( i < 0 )
>> +    {
>> +         if ( numa_add_memblk(nid, start, size) )
>> +         {
>> +             printk(XENLOG_WARNING "DT: NUMA: node-id %u overflow \n",
>> nid);
>> +             numa_failed();
>> +             return;
>> +         }
>> +    }
>> +    else
>> +    {
>> +         nd = get_node_memblk_range(i);
>> +         printk(XENLOG_ERR
>> +                "NUMA DT: node %u (%"PRIx64"-%"PRIx64") overlaps with %d
>> (%"PRIx64"-%"PRIx64")\n",
>
>
> s/PRIx64/PRI_paddr/
ok
>
>> +                nid, start, start + size, i, nd->start, nd->end);
>> +
>> +         numa_failed();
>> +         return;
>> +    }
>> +
>> +    node_set(nid, memory_nodes_parsed);
>
>
> This code looks fairly similar to some bits of
> acpi_numa_memory_affinity_init. Is there any way we could introduce a common
> helper?

Yes some bit of code is similar, But acpi_numa_memory_affinity_init() is stuffed
with some more checks of ACPI data in between the code. So quite complex
to make it common code.

>
>
>> +
>> +    return;
>> +}
>> +
>>  int __init dt_numa_init(void)
>>  {
>>      int ret;
>> diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
>> index 45cc418..8227361 100644
>> --- a/xen/arch/arm/numa/numa.c
>> +++ b/xen/arch/arm/numa/numa.c
>> @@ -19,6 +19,11 @@
>>  #include <xen/nodemask.h>
>>  #include <xen/numa.h>
>>
>> +void numa_failed(void)
>> +{
>> +    numa_off = true;
>> +}
>> +
>>  void __init numa_init(void)
>>  {
>>      int ret = 0;
>> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
>> index 8f517a2..36cd782 100644
>> --- a/xen/include/asm-arm/numa.h
>> +++ b/xen/include/asm-arm/numa.h
>> @@ -3,6 +3,8 @@
>>
>>  typedef uint8_t nodeid_t;
>>
>> +void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t
>> size);
>
>
> Likely, this should go under CONFIG_NUMA.

ok

>
>> +
>>  #ifdef CONFIG_NUMA
>>  void numa_init(void);
>>  int dt_numa_init(void);
>>
>
> Cheers,
>
> --
> Julien Grall
Julien Grall July 20, 2017, 11:24 a.m. UTC | #3
Hi Vijay,

On 20/07/17 11:37, Vijay Kilari wrote:
> On Thu, Jul 20, 2017 at 12:09 AM, Julien Grall <julien.grall@arm.com> wrote:
>> This code looks fairly similar to some bits of
>> acpi_numa_memory_affinity_init. Is there any way we could introduce a common
>> helper?
>
> Yes some bit of code is similar, But acpi_numa_memory_affinity_init() is stuffed
> with some more checks of ACPI data in between the code. So quite complex
> to make it common code.

I think it might be possible to rework acpi_numa_memory_affinity_init to 
take out common code... But let's keep like that for now.

Cheers,
Julien Grall July 20, 2017, 11:26 a.m. UTC | #4
On 19/07/17 19:39, Julien Grall wrote:
>>      cell = (const __be32 *)prop->data;
>>      banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof (u32));
>>
>> -    for ( i = 0; i < banks && bootinfo.mem.nr_banks < NR_MEM_BANKS;
>> i++ )
>> +    for ( i = 0; i < banks; i++ )
>>      {
>>          device_tree_get_reg(&cell, address_cells, size_cells, &start,
>> &size);
>>          if ( !size )
>>              continue;
>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>> -        bootinfo.mem.nr_banks++;
>> +        if ( !efi_enabled(EFI_BOOT) && bootinfo.mem.nr_banks <
>> NR_MEM_BANKS )
>> +        {
>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>> +            bootinfo.mem.nr_banks++;
>> +        }
>
> This change should be split.

I thought a bit more about this code during the week. I think it would 
be nicer to write:

#ifdef CONFIG_NUMA
dt_numa_process_memory_node(nid, start, size);
#endif

if ( !efi_enabled(EFI_BOOT) )
   continue;

if ( bootinfo.mem.nr_banks < NR_MEM_BANKS )
   break;

bootinfo.mem.bank[....];
....

Also, you may want to add a stub for dt_numa_process_memory_node rather 
than #ifdef in the code.

Cheers,
Vijay Kilari July 21, 2017, 11:10 a.m. UTC | #5
Hi Julien,

On Thu, Jul 20, 2017 at 4:56 PM, Julien Grall <julien.grall@arm.com> wrote:
>
>
> On 19/07/17 19:39, Julien Grall wrote:
>>>
>>>      cell = (const __be32 *)prop->data;
>>>      banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof (u32));
>>>
>>> -    for ( i = 0; i < banks && bootinfo.mem.nr_banks < NR_MEM_BANKS;
>>> i++ )
>>> +    for ( i = 0; i < banks; i++ )
>>>      {
>>>          device_tree_get_reg(&cell, address_cells, size_cells, &start,
>>> &size);
>>>          if ( !size )
>>>              continue;
>>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>>> -        bootinfo.mem.nr_banks++;
>>> +        if ( !efi_enabled(EFI_BOOT) && bootinfo.mem.nr_banks <
>>> NR_MEM_BANKS )
>>> +        {
>>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>>> +            bootinfo.mem.nr_banks++;
>>> +        }
>>
>>
>> This change should be split.
>
>
> I thought a bit more about this code during the week. I think it would be
> nicer to write:
>
> #ifdef CONFIG_NUMA
> dt_numa_process_memory_node(nid, start, size);
> #endif
>
> if ( !efi_enabled(EFI_BOOT) )
>   continue;

Should be if ( efi_enabled(EFI_BOOT) ) ?
>
> if ( bootinfo.mem.nr_banks < NR_MEM_BANKS )

Should be if ( bootinfo.mem.nr_banks >= NR_MEM_BANKS ) ?

>   break;
>
> bootinfo.mem.bank[....];
> ....
>
> Also, you may want to add a stub for dt_numa_process_memory_node rather than
> #ifdef in the code.
>
> Cheers,
>
> --
> Julien Grall
Julien Grall July 21, 2017, 12:35 p.m. UTC | #6
On 21/07/17 12:10, Vijay Kilari wrote:
> Hi Julien,
>
> On Thu, Jul 20, 2017 at 4:56 PM, Julien Grall <julien.grall@arm.com> wrote:
>>
>>
>> On 19/07/17 19:39, Julien Grall wrote:
>>>>
>>>>      cell = (const __be32 *)prop->data;
>>>>      banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof (u32));
>>>>
>>>> -    for ( i = 0; i < banks && bootinfo.mem.nr_banks < NR_MEM_BANKS;
>>>> i++ )
>>>> +    for ( i = 0; i < banks; i++ )
>>>>      {
>>>>          device_tree_get_reg(&cell, address_cells, size_cells, &start,
>>>> &size);
>>>>          if ( !size )
>>>>              continue;
>>>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>>>> -        bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>>>> -        bootinfo.mem.nr_banks++;
>>>> +        if ( !efi_enabled(EFI_BOOT) && bootinfo.mem.nr_banks <
>>>> NR_MEM_BANKS )
>>>> +        {
>>>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
>>>> +            bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
>>>> +            bootinfo.mem.nr_banks++;
>>>> +        }
>>>
>>>
>>> This change should be split.
>>
>>
>> I thought a bit more about this code during the week. I think it would be
>> nicer to write:
>>
>> #ifdef CONFIG_NUMA
>> dt_numa_process_memory_node(nid, start, size);
>> #endif
>>
>> if ( !efi_enabled(EFI_BOOT) )
>>   continue;
>
> Should be if ( efi_enabled(EFI_BOOT) ) ?
>>
>> if ( bootinfo.mem.nr_banks < NR_MEM_BANKS )
>
> Should be if ( bootinfo.mem.nr_banks >= NR_MEM_BANKS ) ?

Yes for both. I wrote too quickly this e-mail.

Cheers,
diff mbox

Patch

diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
index 6e8251b..b3a132c 100644
--- a/xen/arch/arm/bootfdt.c
+++ b/xen/arch/arm/bootfdt.c
@@ -13,6 +13,8 @@ 
 #include <xen/init.h>
 #include <xen/device_tree.h>
 #include <xen/libfdt/libfdt.h>
+#include <xen/numa.h>
+#include <xen/efi.h>
 #include <xsm/xsm.h>
 #include <asm/setup.h>
 
@@ -146,6 +148,9 @@  static void __init process_memory_node(const void *fdt, int node,
     const __be32 *cell;
     paddr_t start, size;
     u32 reg_cells = address_cells + size_cells;
+#ifdef CONFIG_NUMA
+    uint32_t nid;
+#endif
 
     if ( address_cells < 1 || size_cells < 1 )
     {
@@ -154,24 +159,36 @@  static void __init process_memory_node(const void *fdt, int node,
         return;
     }
 
+#ifdef CONFIG_NUMA
+    nid = device_tree_get_u32(fdt, node, "numa-node-id", NR_NODE_MEMBLKS);
+#endif
     prop = fdt_get_property(fdt, node, "reg", NULL);
     if ( !prop )
     {
         printk("fdt: node `%s': missing `reg' property\n", name);
+#ifdef CONFIG_NUMA
+	numa_failed();
+#endif
         return;
     }
 
     cell = (const __be32 *)prop->data;
     banks = fdt32_to_cpu(prop->len) / (reg_cells * sizeof (u32));
 
-    for ( i = 0; i < banks && bootinfo.mem.nr_banks < NR_MEM_BANKS; i++ )
+    for ( i = 0; i < banks; i++ )
     {
         device_tree_get_reg(&cell, address_cells, size_cells, &start, &size);
         if ( !size )
             continue;
-        bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
-        bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
-        bootinfo.mem.nr_banks++;
+        if ( !efi_enabled(EFI_BOOT) && bootinfo.mem.nr_banks < NR_MEM_BANKS )
+        {
+            bootinfo.mem.bank[bootinfo.mem.nr_banks].start = start;
+            bootinfo.mem.bank[bootinfo.mem.nr_banks].size = size;
+            bootinfo.mem.nr_banks++;
+        }
+#ifdef CONFIG_NUMA
+        dt_numa_process_memory_node(nid, start, size);
+#endif
     }
 }
 
diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h
index 56de26e..a8bde68 100644
--- a/xen/arch/arm/efi/efi-boot.h
+++ b/xen/arch/arm/efi/efi-boot.h
@@ -194,33 +194,8 @@  EFI_STATUS __init fdt_add_uefi_nodes(EFI_SYSTEM_TABLE *sys_table,
     int status;
     u32 fdt_val32;
     u64 fdt_val64;
-    int prev;
     int num_rsv;
 
-    /*
-     * Delete any memory nodes present.  The EFI memory map is the only
-     * memory description provided to Xen.
-     */
-    prev = 0;
-    for (;;)
-    {
-        const char *type;
-        int len;
-
-        node = fdt_next_node(fdt, prev, NULL);
-        if ( node < 0 )
-            break;
-
-        type = fdt_getprop(fdt, node, "device_type", &len);
-        if ( type && strncmp(type, "memory", len) == 0 )
-        {
-            fdt_del_node(fdt, node);
-            continue;
-        }
-
-        prev = node;
-    }
-
    /*
     * Delete all memory reserve map entries. When booting via UEFI,
     * kernel will use the UEFI memory map to find reserved regions.
diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c
index 963bb40..84030e7 100644
--- a/xen/arch/arm/numa/dt_numa.c
+++ b/xen/arch/arm/numa/dt_numa.c
@@ -58,6 +58,38 @@  static int __init dt_numa_process_cpu_node(const void *fdt)
     return 0;
 }
 
+void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
+                                       paddr_t size)
+{
+    struct node *nd;
+    int i;
+
+    i = conflicting_memblks(start, start + size);
+    if ( i < 0 )
+    {
+         if ( numa_add_memblk(nid, start, size) )
+         {
+             printk(XENLOG_WARNING "DT: NUMA: node-id %u overflow \n", nid);
+             numa_failed();
+             return;
+         }
+    }
+    else
+    {
+         nd = get_node_memblk_range(i);
+         printk(XENLOG_ERR
+                "NUMA DT: node %u (%"PRIx64"-%"PRIx64") overlaps with %d (%"PRIx64"-%"PRIx64")\n",
+                nid, start, start + size, i, nd->start, nd->end);
+
+         numa_failed();
+         return;
+    }
+
+    node_set(nid, memory_nodes_parsed);
+
+    return;
+}
+
 int __init dt_numa_init(void)
 {
     int ret;
diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
index 45cc418..8227361 100644
--- a/xen/arch/arm/numa/numa.c
+++ b/xen/arch/arm/numa/numa.c
@@ -19,6 +19,11 @@ 
 #include <xen/nodemask.h>
 #include <xen/numa.h>
 
+void numa_failed(void)
+{
+    numa_off = true;
+}
+
 void __init numa_init(void)
 {
     int ret = 0;
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index 8f517a2..36cd782 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -3,6 +3,8 @@ 
 
 typedef uint8_t nodeid_t;
 
+void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size);
+
 #ifdef CONFIG_NUMA
 void numa_init(void);
 int dt_numa_init(void);