diff mbox

[RFC,v3,15/24] ARM: NUMA: DT: Add CPU NUMA support

Message ID 1500378106-2620-16-git-send-email-vijay.kilari@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vijay Kilari July 18, 2017, 11:41 a.m. UTC
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>

For each cpu, update cpu_to_node[] with node id from
the numa-node-id DT property. Also, initialize cpu_to_node[]
with node 0.

Add macros to access cpu_to_node[] information.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
---
v3: - Dropped numa_add_cpu declaration from asm-arm/numa.h
    - Dropped stale declarations
    - Call numa_add_cpu for cpu0
---
 xen/arch/arm/numa/numa.c   | 21 +++++++++++++++++++++
 xen/arch/arm/setup.c       |  2 ++
 xen/arch/arm/smpboot.c     | 25 ++++++++++++++++++++++++-
 xen/include/asm-arm/numa.h |  7 +++++++
 xen/include/asm-x86/numa.h |  1 -
 xen/include/xen/numa.h     |  1 +
 6 files changed, 55 insertions(+), 2 deletions(-)

Comments

Julien Grall July 24, 2017, 11:24 a.m. UTC | #1
Hi Vijay,

On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> For each cpu, update cpu_to_node[] with node id from
> the numa-node-id DT property. Also, initialize cpu_to_node[]
> with node 0.
>
> Add macros to access cpu_to_node[] information.
>
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
> ---
> v3: - Dropped numa_add_cpu declaration from asm-arm/numa.h
>     - Dropped stale declarations
>     - Call numa_add_cpu for cpu0
> ---
>  xen/arch/arm/numa/numa.c   | 21 +++++++++++++++++++++
>  xen/arch/arm/setup.c       |  2 ++
>  xen/arch/arm/smpboot.c     | 25 ++++++++++++++++++++++++-
>  xen/include/asm-arm/numa.h |  7 +++++++
>  xen/include/asm-x86/numa.h |  1 -
>  xen/include/xen/numa.h     |  1 +
>  6 files changed, 55 insertions(+), 2 deletions(-)
>
> diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
> index c00b92c..dc80aa5 100644
> --- a/xen/arch/arm/numa/numa.c
> +++ b/xen/arch/arm/numa/numa.c
> @@ -22,11 +22,31 @@
>
>  static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b);
>
> +/*
> + * Setup early cpu_to_node.
> + */
> +void __init init_cpu_to_node(void)
> +{
> +    int i;
> +
> +    for ( i = 0; i < NR_CPUS; i++ )
> +        numa_set_node(i, 0);
> +}

 From the comment: "Setup early cpu_to_node". However this is not how 
you are using it.

But I am not sure why it is even here...

> +
>  void numa_failed(void)
>  {
>      numa_off = true;
>      init_dt_numa_distance();
>      node_distance_fn = NULL;
> +    init_cpu_to_node();
> +}
> +
> +void __init numa_set_cpu_node(int cpu, unsigned int nid)
> +{
> +    if ( !node_isset(nid, processor_nodes_parsed) || nid >= MAX_NUMNODES )
> +        nid = 0;

This looks wrong to me. If the node-id is invalid, why would you blindly 
set to 0?

> +
> +    numa_set_node(cpu, nid);
>  }
>
>  uint8_t __node_distance(nodeid_t a, nodeid_t b)
> @@ -49,6 +69,7 @@ void __init numa_init(void)
>      int ret = 0;
>
>      nodes_clear(processor_nodes_parsed);
> +    init_cpu_to_node();
>      init_dt_numa_distance();
>
>      if ( numa_off )
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index a6d1499..b9c8b0d 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -787,6 +787,8 @@ void __init start_xen(unsigned long boot_phys_offset,
>
>      processor_id();
>
> +    numa_add_cpu(0);
> +
>      smp_init_cpus();
>      cpus = smp_get_max_cpus();
>      printk(XENLOG_INFO "SMP: Allowing %u CPUs\n", cpus);
> diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
> index 32e8722..fcf9afc 100644
> --- a/xen/arch/arm/smpboot.c
> +++ b/xen/arch/arm/smpboot.c
> @@ -29,6 +29,7 @@
>  #include <xen/timer.h>
>  #include <xen/irq.h>
>  #include <xen/console.h>
> +#include <xen/numa.h>

Please use the alphabetical order.

>  #include <asm/cpuerrata.h>
>  #include <asm/gic.h>
>  #include <asm/psci.h>
> @@ -106,6 +107,7 @@ static void __init dt_smp_init_cpus(void)
>          [0 ... NR_CPUS - 1] = MPIDR_INVALID
>      };
>      bool_t bootcpu_valid = 0;
> +    nodeid_t *cpu_to_nodemap;
>      int rc;
>
>      mpidr = boot_cpu_data.mpidr.bits & MPIDR_HWID_MASK;
> @@ -117,11 +119,18 @@ static void __init dt_smp_init_cpus(void)
>          return;
>      }
>
> +    cpu_to_nodemap = xzalloc_array(nodeid_t, NR_CPUS);

Why do you need to allocate cpu_to_nodemap? Would not it be easier to 
put it on the stack as we do for other variable?

> +    if ( !cpu_to_nodemap )
> +    {
> +        printk(XENLOG_WARNING "Failed to allocate memory for cpu_to_nodemap\n");
> +        return;
> +    }
> +
>      dt_for_each_child_node( cpus, cpu )
>      {
>          const __be32 *prop;
>          u64 addr;
> -        u32 reg_len;
> +        uint32_t reg_len, nid;
>          register_t hwid;
>
>          if ( !dt_device_type_is_equal(cpu, "cpu") )
> @@ -146,6 +155,15 @@ static void __init dt_smp_init_cpus(void)
>              continue;
>          }
>
> +        if ( !dt_property_read_u32(cpu, "numa-node-id", &nid) )
> +        {
> +            printk(XENLOG_WARNING "cpu node `%s`: numa-node-id not found\n",
> +                   dt_node_full_name(cpu));

numa-node-id is not mandatory. So you would print a warning on all 
non-NUMA platform. This not what we want.

> +            nid = 0;
> +        }
> +
> +        cpu_to_nodemap[cpuidx] = nid;
> +
>          addr = dt_read_number(prop, dt_n_addr_cells(cpu));
>
>          hwid = addr;
> @@ -224,6 +242,7 @@ static void __init dt_smp_init_cpus(void)
>      {
>          printk(XENLOG_WARNING "DT missing boot CPU MPIDR[23:0]\n"
>                 "Using only 1 CPU\n");
> +        xfree(cpu_to_nodemap);
>          return;
>      }
>
> @@ -233,7 +252,10 @@ static void __init dt_smp_init_cpus(void)
>              continue;
>          cpumask_set_cpu(i, &cpu_possible_map);
>          cpu_logical_map(i) = tmp_map[i];
> +        numa_set_cpu_node(i, cpu_to_nodemap[i]);
>      }
> +
> +    xfree(cpu_to_nodemap);
>  }
>
>  void __init smp_init_cpus(void)
> @@ -313,6 +335,7 @@ void start_secondary(unsigned long boot_phys_offset,
>       */
>      smp_wmb();
>
> +    numa_add_cpu(cpuid);

Newline here please.

>      /* Now report this CPU is up */
>      cpumask_set_cpu(cpuid, &cpu_online_map);
>
> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
> index d1dc83a..0d3146c 100644
> --- a/xen/include/asm-arm/numa.h
> +++ b/xen/include/asm-arm/numa.h
> @@ -10,12 +10,19 @@ void init_dt_numa_distance(void);
>  #ifdef CONFIG_NUMA
>  void numa_init(void);
>  int dt_numa_init(void);
> +void numa_set_cpu_node(int cpu, unsigned int nid);
> +
>  #else
>  static inline void numa_init(void)
>  {
>      return;
>  }
>
> +static inline void numa_set_cpu_node(int cpu, unsigned int nid)
> +{
> +    return;
> +}
> +
>  /* Fake one node for now. See also node_online_map. */
>  #define cpu_to_node(cpu) 0
>  #define node_to_cpumask(node)   (cpu_online_map)
> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> index ca0a2a6..fc4747f 100644
> --- a/xen/include/asm-x86/numa.h
> +++ b/xen/include/asm-x86/numa.h
> @@ -15,7 +15,6 @@ extern nodeid_t acpi_setup_node(unsigned int pxm);
>  extern void srat_detect_node(int cpu);
>
>  extern nodeid_t apicid_to_node[];
> -extern void init_cpu_to_node(void);
>
>  void srat_parse_regions(paddr_t addr);
>  unsigned int arch_get_dma_bitsize(void);
> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> index 10ef4c4..8a306e7 100644
> --- a/xen/include/xen/numa.h
> +++ b/xen/include/xen/numa.h
> @@ -30,6 +30,7 @@ extern s8 acpi_numa;
>  void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
>  int srat_disabled(void);
>  int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
> +void init_cpu_to_node(void);

You never used this function in common code. So why did you move it in 
the common headers?

>
>  #ifdef CONFIG_NUMA
>  #define cpu_to_node(cpu)         (cpu_to_node[cpu])
>

Cheers,
Vijay Kilari July 25, 2017, 6:47 a.m. UTC | #2
Hi Julien,

On Mon, Jul 24, 2017 at 4:54 PM, Julien Grall <julien.grall@arm.com> wrote:
> Hi Vijay,
>
>
> On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
>>
>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>
>> For each cpu, update cpu_to_node[] with node id from
>> the numa-node-id DT property. Also, initialize cpu_to_node[]
>> with node 0.
>>
>> Add macros to access cpu_to_node[] information.
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>> ---
>> v3: - Dropped numa_add_cpu declaration from asm-arm/numa.h
>>     - Dropped stale declarations
>>     - Call numa_add_cpu for cpu0
>> ---
>>  xen/arch/arm/numa/numa.c   | 21 +++++++++++++++++++++
>>  xen/arch/arm/setup.c       |  2 ++
>>  xen/arch/arm/smpboot.c     | 25 ++++++++++++++++++++++++-
>>  xen/include/asm-arm/numa.h |  7 +++++++
>>  xen/include/asm-x86/numa.h |  1 -
>>  xen/include/xen/numa.h     |  1 +
>>  6 files changed, 55 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
>> index c00b92c..dc80aa5 100644
>> --- a/xen/arch/arm/numa/numa.c
>> +++ b/xen/arch/arm/numa/numa.c
>> @@ -22,11 +22,31 @@
>>
>>  static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b);
>>
>> +/*
>> + * Setup early cpu_to_node.
>> + */
>> +void __init init_cpu_to_node(void)
>> +{
>> +    int i;
>> +
>> +    for ( i = 0; i < NR_CPUS; i++ )
>> +        numa_set_node(i, 0);
>> +}
>
>
> From the comment: "Setup early cpu_to_node". However this is not how you are
> using it.

Ok. I will update the comment.

>
> But I am not sure why it is even here...
>
>> +
>>  void numa_failed(void)
>>  {
>>      numa_off = true;
>>      init_dt_numa_distance();
>>      node_distance_fn = NULL;
>> +    init_cpu_to_node();
>> +}
>> +
>> +void __init numa_set_cpu_node(int cpu, unsigned int nid)
>> +{
>> +    if ( !node_isset(nid, processor_nodes_parsed) || nid >= MAX_NUMNODES
>> )
>> +        nid = 0;
>
>
> This looks wrong to me. If the node-id is invalid, why would you blindly set
> to 0?

Generally this check will not pass. I will make this function return
error code in case
of wrong nid.

>
>
>> +
>> +    numa_set_node(cpu, nid);
>>  }
>>
>>  uint8_t __node_distance(nodeid_t a, nodeid_t b)
>> @@ -49,6 +69,7 @@ void __init numa_init(void)
>>      int ret = 0;
>>
>>      nodes_clear(processor_nodes_parsed);
>> +    init_cpu_to_node();
>>      init_dt_numa_distance();
>>
>>      if ( numa_off )
>> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
>> index a6d1499..b9c8b0d 100644
>> --- a/xen/arch/arm/setup.c
>> +++ b/xen/arch/arm/setup.c
>> @@ -787,6 +787,8 @@ void __init start_xen(unsigned long boot_phys_offset,
>>
>>      processor_id();
>>
>> +    numa_add_cpu(0);
>> +
>>      smp_init_cpus();
>>      cpus = smp_get_max_cpus();
>>      printk(XENLOG_INFO "SMP: Allowing %u CPUs\n", cpus);
>> diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
>> index 32e8722..fcf9afc 100644
>> --- a/xen/arch/arm/smpboot.c
>> +++ b/xen/arch/arm/smpboot.c
>> @@ -29,6 +29,7 @@
>>  #include <xen/timer.h>
>>  #include <xen/irq.h>
>>  #include <xen/console.h>
>> +#include <xen/numa.h>
>
>
> Please use the alphabetical order.
>
>>  #include <asm/cpuerrata.h>
>>  #include <asm/gic.h>
>>  #include <asm/psci.h>
>> @@ -106,6 +107,7 @@ static void __init dt_smp_init_cpus(void)
>>          [0 ... NR_CPUS - 1] = MPIDR_INVALID
>>      };
>>      bool_t bootcpu_valid = 0;
>> +    nodeid_t *cpu_to_nodemap;
>>      int rc;
>>
>>      mpidr = boot_cpu_data.mpidr.bits & MPIDR_HWID_MASK;
>> @@ -117,11 +119,18 @@ static void __init dt_smp_init_cpus(void)
>>          return;
>>      }
>>
>> +    cpu_to_nodemap = xzalloc_array(nodeid_t, NR_CPUS);
>
>
> Why do you need to allocate cpu_to_nodemap? Would not it be easier to put it
> on the stack as we do for other variable?

This array holds nodemap indexed by cpuid once for all the cpus.
Later while setting the logical cpu id mapping, the node mapping is set
by calling numa_set_cpu_node().

>
>> +    if ( !cpu_to_nodemap )
>> +    {
>> +        printk(XENLOG_WARNING "Failed to allocate memory for
>> cpu_to_nodemap\n");
>> +        return;
>> +    }
>> +
>>      dt_for_each_child_node( cpus, cpu )
>>      {
>>          const __be32 *prop;
>>          u64 addr;
>> -        u32 reg_len;
>> +        uint32_t reg_len, nid;
>>          register_t hwid;
>>
>>          if ( !dt_device_type_is_equal(cpu, "cpu") )
>> @@ -146,6 +155,15 @@ static void __init dt_smp_init_cpus(void)
>>              continue;
>>          }
>>
>> +        if ( !dt_property_read_u32(cpu, "numa-node-id", &nid) )
>> +        {
>> +            printk(XENLOG_WARNING "cpu node `%s`: numa-node-id not
>> found\n",
>> +                   dt_node_full_name(cpu));
>
>
> numa-node-id is not mandatory. So you would print a warning on all non-NUMA
> platform. This not what we want.

ok. I will drop this warning.
>
>> +            nid = 0;
>> +        }
>> +
>> +        cpu_to_nodemap[cpuidx] = nid;
>> +
>>          addr = dt_read_number(prop, dt_n_addr_cells(cpu));
>>
>>          hwid = addr;
>> @@ -224,6 +242,7 @@ static void __init dt_smp_init_cpus(void)
>>      {
>>          printk(XENLOG_WARNING "DT missing boot CPU MPIDR[23:0]\n"
>>                 "Using only 1 CPU\n");
>> +        xfree(cpu_to_nodemap);
>>          return;
>>      }
>>
>> @@ -233,7 +252,10 @@ static void __init dt_smp_init_cpus(void)
>>              continue;
>>          cpumask_set_cpu(i, &cpu_possible_map);
>>          cpu_logical_map(i) = tmp_map[i];
>> +        numa_set_cpu_node(i, cpu_to_nodemap[i]);
>>      }
>> +
>> +    xfree(cpu_to_nodemap);
>>  }
>>
>>  void __init smp_init_cpus(void)
>> @@ -313,6 +335,7 @@ void start_secondary(unsigned long boot_phys_offset,
>>       */
>>      smp_wmb();
>>
>> +    numa_add_cpu(cpuid);
>
>
> Newline here please.
>
>
>>      /* Now report this CPU is up */
>>      cpumask_set_cpu(cpuid, &cpu_online_map);
>>
>> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
>> index d1dc83a..0d3146c 100644
>> --- a/xen/include/asm-arm/numa.h
>> +++ b/xen/include/asm-arm/numa.h
>> @@ -10,12 +10,19 @@ void init_dt_numa_distance(void);
>>  #ifdef CONFIG_NUMA
>>  void numa_init(void);
>>  int dt_numa_init(void);
>> +void numa_set_cpu_node(int cpu, unsigned int nid);
>> +
>>  #else
>>  static inline void numa_init(void)
>>  {
>>      return;
>>  }
>>
>> +static inline void numa_set_cpu_node(int cpu, unsigned int nid)
>> +{
>> +    return;
>> +}
>> +
>>  /* Fake one node for now. See also node_online_map. */
>>  #define cpu_to_node(cpu) 0
>>  #define node_to_cpumask(node)   (cpu_online_map)
>> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
>> index ca0a2a6..fc4747f 100644
>> --- a/xen/include/asm-x86/numa.h
>> +++ b/xen/include/asm-x86/numa.h
>> @@ -15,7 +15,6 @@ extern nodeid_t acpi_setup_node(unsigned int pxm);
>>  extern void srat_detect_node(int cpu);
>>
>>  extern nodeid_t apicid_to_node[];
>> -extern void init_cpu_to_node(void);
>>
>>  void srat_parse_regions(paddr_t addr);
>>  unsigned int arch_get_dma_bitsize(void);
>> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
>> index 10ef4c4..8a306e7 100644
>> --- a/xen/include/xen/numa.h
>> +++ b/xen/include/xen/numa.h
>> @@ -30,6 +30,7 @@ extern s8 acpi_numa;
>>  void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
>>  int srat_disabled(void);
>>  int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
>> +void init_cpu_to_node(void);
>
>
> You never used this function in common code. So why did you move it in the
> common headers?

Same was defined for x86 as well. So I have moved to common header file.

>
>>
>>  #ifdef CONFIG_NUMA
>>  #define cpu_to_node(cpu)         (cpu_to_node[cpu])
>>
>
> Cheers,
>
> --
> Julien Grall
Julien Grall July 25, 2017, 6:38 p.m. UTC | #3
On 25/07/17 07:47, Vijay Kilari wrote:
>>>  void numa_failed(void)
>>>  {
>>>      numa_off = true;
>>>      init_dt_numa_distance();
>>>      node_distance_fn = NULL;
>>> +    init_cpu_to_node();
>>> +}
>>> +
>>> +void __init numa_set_cpu_node(int cpu, unsigned int nid)
>>> +{
>>> +    if ( !node_isset(nid, processor_nodes_parsed) || nid >= MAX_NUMNODES
>>> )
>>> +        nid = 0;
>>
>>
>> This looks wrong to me. If the node-id is invalid, why would you blindly set
>> to 0?
>
> Generally this check will not pass. I will make this function return
> error code in case
> of wrong nid.

I don't really want to see error code and error handling everywhere in 
the initialization code. I would assume that if the NUMA bindings are 
wrong we should just crash Xen rather continuing with NUMA disabled.

Stefano do you have any opinion here?

[...]

>>>  #include <asm/cpuerrata.h>
>>>  #include <asm/gic.h>
>>>  #include <asm/psci.h>
>>> @@ -106,6 +107,7 @@ static void __init dt_smp_init_cpus(void)
>>>          [0 ... NR_CPUS - 1] = MPIDR_INVALID
>>>      };
>>>      bool_t bootcpu_valid = 0;
>>> +    nodeid_t *cpu_to_nodemap;
>>>      int rc;
>>>
>>>      mpidr = boot_cpu_data.mpidr.bits & MPIDR_HWID_MASK;
>>> @@ -117,11 +119,18 @@ static void __init dt_smp_init_cpus(void)
>>>          return;
>>>      }
>>>
>>> +    cpu_to_nodemap = xzalloc_array(nodeid_t, NR_CPUS);
>>
>>
>> Why do you need to allocate cpu_to_nodemap? Would not it be easier to put it
>> on the stack as we do for other variable?
>
> This array holds nodemap indexed by cpuid once for all the cpus.
> Later while setting the logical cpu id mapping, the node mapping is set
> by calling numa_set_cpu_node().

This does not answer question... Please read it again and explain why 
you can't do:

nodeid_t cpu_to_nodemap[NR_CPUS];

>>> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
>>> index d1dc83a..0d3146c 100644
>>> --- a/xen/include/asm-arm/numa.h
>>> +++ b/xen/include/asm-arm/numa.h
>>> @@ -10,12 +10,19 @@ void init_dt_numa_distance(void);
>>>  #ifdef CONFIG_NUMA
>>>  void numa_init(void);
>>>  int dt_numa_init(void);
>>> +void numa_set_cpu_node(int cpu, unsigned int nid);
>>> +
>>>  #else
>>>  static inline void numa_init(void)
>>>  {
>>>      return;
>>>  }
>>>
>>> +static inline void numa_set_cpu_node(int cpu, unsigned int nid)
>>> +{
>>> +    return;
>>> +}
>>> +
>>>  /* Fake one node for now. See also node_online_map. */
>>>  #define cpu_to_node(cpu) 0
>>>  #define node_to_cpumask(node)   (cpu_online_map)
>>> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
>>> index ca0a2a6..fc4747f 100644
>>> --- a/xen/include/asm-x86/numa.h
>>> +++ b/xen/include/asm-x86/numa.h
>>> @@ -15,7 +15,6 @@ extern nodeid_t acpi_setup_node(unsigned int pxm);
>>>  extern void srat_detect_node(int cpu);
>>>
>>>  extern nodeid_t apicid_to_node[];
>>> -extern void init_cpu_to_node(void);
>>>
>>>  void srat_parse_regions(paddr_t addr);
>>>  unsigned int arch_get_dma_bitsize(void);
>>> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
>>> index 10ef4c4..8a306e7 100644
>>> --- a/xen/include/xen/numa.h
>>> +++ b/xen/include/xen/numa.h
>>> @@ -30,6 +30,7 @@ extern s8 acpi_numa;
>>>  void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
>>>  int srat_disabled(void);
>>>  int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
>>> +void init_cpu_to_node(void);
>>
>>
>> You never used this function in common code. So why did you move it in the
>> common headers?
>
> Same was defined for x86 as well. So I have moved to common header file.

You should make common only functions that will be called from code 
common or are part of common code.

In this particular case, the name might be the same but the behavior is 
completely different and the way to use it too...

Cheers,
Stefano Stabellini July 25, 2017, 6:48 p.m. UTC | #4
On Tue, 25 Jul 2017, Julien Grall wrote:
> On 25/07/17 07:47, Vijay Kilari wrote:
> > > >  void numa_failed(void)
> > > >  {
> > > >      numa_off = true;
> > > >      init_dt_numa_distance();
> > > >      node_distance_fn = NULL;
> > > > +    init_cpu_to_node();
> > > > +}
> > > > +
> > > > +void __init numa_set_cpu_node(int cpu, unsigned int nid)
> > > > +{
> > > > +    if ( !node_isset(nid, processor_nodes_parsed) || nid >=
> > > > MAX_NUMNODES
> > > > )
> > > > +        nid = 0;
> > > 
> > > 
> > > This looks wrong to me. If the node-id is invalid, why would you blindly
> > > set
> > > to 0?
> > 
> > Generally this check will not pass. I will make this function return
> > error code in case
> > of wrong nid.
> 
> I don't really want to see error code and error handling everywhere in the
> initialization code. I would assume that if the NUMA bindings are wrong we
> should just crash Xen rather continuing with NUMA disabled.
> 
> Stefano do you have any opinion here?

Yes, I noticed that there is an overabundance of error checks in the
patches. I have pointed out in other cases that some of these checks are
duplicates.

I am OK with some checks but we should not do the same check over and
over.

To answer the question: do we need any checks at all?

I am fine with no checks on the device tree or ACPI bindings themselves.
I am also OK with some checks in few places to check that the
information passed by the firmware is in the right shape (for example we
check for the ACPI header length before accessing any ACPI tables). That
is good. But I am not OK with repeating the same check multiple times
uselessly or checking for conditions that cannot happen (like a NULL
pointer in the ACPI header case again).
Julien Grall July 25, 2017, 6:51 p.m. UTC | #5
On 25/07/17 19:48, Stefano Stabellini wrote:
> On Tue, 25 Jul 2017, Julien Grall wrote:
>> On 25/07/17 07:47, Vijay Kilari wrote:
>>>>>  void numa_failed(void)
>>>>>  {
>>>>>      numa_off = true;
>>>>>      init_dt_numa_distance();
>>>>>      node_distance_fn = NULL;
>>>>> +    init_cpu_to_node();
>>>>> +}
>>>>> +
>>>>> +void __init numa_set_cpu_node(int cpu, unsigned int nid)
>>>>> +{
>>>>> +    if ( !node_isset(nid, processor_nodes_parsed) || nid >=
>>>>> MAX_NUMNODES
>>>>> )
>>>>> +        nid = 0;
>>>>
>>>>
>>>> This looks wrong to me. If the node-id is invalid, why would you blindly
>>>> set
>>>> to 0?
>>>
>>> Generally this check will not pass. I will make this function return
>>> error code in case
>>> of wrong nid.
>>
>> I don't really want to see error code and error handling everywhere in the
>> initialization code. I would assume that if the NUMA bindings are wrong we
>> should just crash Xen rather continuing with NUMA disabled.
>>
>> Stefano do you have any opinion here?
>
> Yes, I noticed that there is an overabundance of error checks in the
> patches. I have pointed out in other cases that some of these checks are
> duplicates.
>
> I am OK with some checks but we should not do the same check over and
> over.
>
> To answer the question: do we need any checks at all?
>
> I am fine with no checks on the device tree or ACPI bindings themselves.
> I am also OK with some checks in few places to check that the
> information passed by the firmware is in the right shape (for example we
> check for the ACPI header length before accessing any ACPI tables). That
> is good. But I am not OK with repeating the same check multiple times
> uselessly or checking for conditions that cannot happen (like a NULL
> pointer in the ACPI header case again).

I would prefer to keep the check on the DT bindings and ACPI bindings. I 
hit some problem in the past that were quite annoying to debug without them.

But I was wondering if we should just panic/BUG_ON directly. Rather than 
returning an error.

For any redundant check, we could just turn to an ASSERT.

Cheers,
Stefano Stabellini July 25, 2017, 7:06 p.m. UTC | #6
On Tue, 25 Jul 2017, Julien Grall wrote:
> On 25/07/17 19:48, Stefano Stabellini wrote:
> > On Tue, 25 Jul 2017, Julien Grall wrote:
> > > On 25/07/17 07:47, Vijay Kilari wrote:
> > > > > >  void numa_failed(void)
> > > > > >  {
> > > > > >      numa_off = true;
> > > > > >      init_dt_numa_distance();
> > > > > >      node_distance_fn = NULL;
> > > > > > +    init_cpu_to_node();
> > > > > > +}
> > > > > > +
> > > > > > +void __init numa_set_cpu_node(int cpu, unsigned int nid)
> > > > > > +{
> > > > > > +    if ( !node_isset(nid, processor_nodes_parsed) || nid >=
> > > > > > MAX_NUMNODES
> > > > > > )
> > > > > > +        nid = 0;
> > > > > 
> > > > > 
> > > > > This looks wrong to me. If the node-id is invalid, why would you
> > > > > blindly
> > > > > set
> > > > > to 0?
> > > > 
> > > > Generally this check will not pass. I will make this function return
> > > > error code in case
> > > > of wrong nid.
> > > 
> > > I don't really want to see error code and error handling everywhere in the
> > > initialization code. I would assume that if the NUMA bindings are wrong we
> > > should just crash Xen rather continuing with NUMA disabled.
> > > 
> > > Stefano do you have any opinion here?
> > 
> > Yes, I noticed that there is an overabundance of error checks in the
> > patches. I have pointed out in other cases that some of these checks are
> > duplicates.
> > 
> > I am OK with some checks but we should not do the same check over and
> > over.
> > 
> > To answer the question: do we need any checks at all?
> > 
> > I am fine with no checks on the device tree or ACPI bindings themselves.
> > I am also OK with some checks in few places to check that the
> > information passed by the firmware is in the right shape (for example we
> > check for the ACPI header length before accessing any ACPI tables). That
> > is good. But I am not OK with repeating the same check multiple times
> > uselessly or checking for conditions that cannot happen (like a NULL
> > pointer in the ACPI header case again).
> 
> I would prefer to keep the check on the DT bindings and ACPI bindings. I hit
> some problem in the past that were quite annoying to debug without them.
> 
> But I was wondering if we should just panic/BUG_ON directly. Rather than
> returning an error.

I think BUG_ON is fine, but it would be best if we also printed a
useful message before crashing Xen. At least the user would know that
the problem is a broken device_tree/ACPI.


> For any redundant check, we could just turn to an ASSERT.
Julien Grall July 26, 2017, 5:18 p.m. UTC | #7
Hi Stefano,

On 25/07/17 20:06, Stefano Stabellini wrote:
> On Tue, 25 Jul 2017, Julien Grall wrote:
>> On 25/07/17 19:48, Stefano Stabellini wrote:
>>> On Tue, 25 Jul 2017, Julien Grall wrote:
>>>> On 25/07/17 07:47, Vijay Kilari wrote:
>>>>>>>  void numa_failed(void)
>>>>>>>  {
>>>>>>>      numa_off = true;
>>>>>>>      init_dt_numa_distance();
>>>>>>>      node_distance_fn = NULL;
>>>>>>> +    init_cpu_to_node();
>>>>>>> +}
>>>>>>> +
>>>>>>> +void __init numa_set_cpu_node(int cpu, unsigned int nid)
>>>>>>> +{
>>>>>>> +    if ( !node_isset(nid, processor_nodes_parsed) || nid >=
>>>>>>> MAX_NUMNODES
>>>>>>> )
>>>>>>> +        nid = 0;
>>>>>>
>>>>>>
>>>>>> This looks wrong to me. If the node-id is invalid, why would you
>>>>>> blindly
>>>>>> set
>>>>>> to 0?
>>>>>
>>>>> Generally this check will not pass. I will make this function return
>>>>> error code in case
>>>>> of wrong nid.
>>>>
>>>> I don't really want to see error code and error handling everywhere in the
>>>> initialization code. I would assume that if the NUMA bindings are wrong we
>>>> should just crash Xen rather continuing with NUMA disabled.
>>>>
>>>> Stefano do you have any opinion here?
>>>
>>> Yes, I noticed that there is an overabundance of error checks in the
>>> patches. I have pointed out in other cases that some of these checks are
>>> duplicates.
>>>
>>> I am OK with some checks but we should not do the same check over and
>>> over.
>>>
>>> To answer the question: do we need any checks at all?
>>>
>>> I am fine with no checks on the device tree or ACPI bindings themselves.
>>> I am also OK with some checks in few places to check that the
>>> information passed by the firmware is in the right shape (for example we
>>> check for the ACPI header length before accessing any ACPI tables). That
>>> is good. But I am not OK with repeating the same check multiple times
>>> uselessly or checking for conditions that cannot happen (like a NULL
>>> pointer in the ACPI header case again).
>>
>> I would prefer to keep the check on the DT bindings and ACPI bindings. I hit
>> some problem in the past that were quite annoying to debug without them.
>>
>> But I was wondering if we should just panic/BUG_ON directly. Rather than
>> returning an error.
>
> I think BUG_ON is fine, but it would be best if we also printed a
> useful message before crashing Xen. At least the user would know that
> the problem is a broken device_tree/ACPI.

I was suggesting to use panic because you can get a nice message :).

Cheers,
Stefano Stabellini July 26, 2017, 5:21 p.m. UTC | #8
On Wed, 26 Jul 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 25/07/17 20:06, Stefano Stabellini wrote:
> > On Tue, 25 Jul 2017, Julien Grall wrote:
> > > On 25/07/17 19:48, Stefano Stabellini wrote:
> > > > On Tue, 25 Jul 2017, Julien Grall wrote:
> > > > > On 25/07/17 07:47, Vijay Kilari wrote:
> > > > > > > >  void numa_failed(void)
> > > > > > > >  {
> > > > > > > >      numa_off = true;
> > > > > > > >      init_dt_numa_distance();
> > > > > > > >      node_distance_fn = NULL;
> > > > > > > > +    init_cpu_to_node();
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +void __init numa_set_cpu_node(int cpu, unsigned int nid)
> > > > > > > > +{
> > > > > > > > +    if ( !node_isset(nid, processor_nodes_parsed) || nid >=
> > > > > > > > MAX_NUMNODES
> > > > > > > > )
> > > > > > > > +        nid = 0;
> > > > > > > 
> > > > > > > 
> > > > > > > This looks wrong to me. If the node-id is invalid, why would you
> > > > > > > blindly
> > > > > > > set
> > > > > > > to 0?
> > > > > > 
> > > > > > Generally this check will not pass. I will make this function return
> > > > > > error code in case
> > > > > > of wrong nid.
> > > > > 
> > > > > I don't really want to see error code and error handling everywhere in
> > > > > the
> > > > > initialization code. I would assume that if the NUMA bindings are
> > > > > wrong we
> > > > > should just crash Xen rather continuing with NUMA disabled.
> > > > > 
> > > > > Stefano do you have any opinion here?
> > > > 
> > > > Yes, I noticed that there is an overabundance of error checks in the
> > > > patches. I have pointed out in other cases that some of these checks are
> > > > duplicates.
> > > > 
> > > > I am OK with some checks but we should not do the same check over and
> > > > over.
> > > > 
> > > > To answer the question: do we need any checks at all?
> > > > 
> > > > I am fine with no checks on the device tree or ACPI bindings themselves.
> > > > I am also OK with some checks in few places to check that the
> > > > information passed by the firmware is in the right shape (for example we
> > > > check for the ACPI header length before accessing any ACPI tables). That
> > > > is good. But I am not OK with repeating the same check multiple times
> > > > uselessly or checking for conditions that cannot happen (like a NULL
> > > > pointer in the ACPI header case again).
> > > 
> > > I would prefer to keep the check on the DT bindings and ACPI bindings. I
> > > hit
> > > some problem in the past that were quite annoying to debug without them.
> > > 
> > > But I was wondering if we should just panic/BUG_ON directly. Rather than
> > > returning an error.
> > 
> > I think BUG_ON is fine, but it would be best if we also printed a
> > useful message before crashing Xen. At least the user would know that
> > the problem is a broken device_tree/ACPI.
> 
> I was suggesting to use panic because you can get a nice message :).

panic is great, somehow BUG_ON grabbed all my attention when I read your
email :-)
diff mbox

Patch

diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
index c00b92c..dc80aa5 100644
--- a/xen/arch/arm/numa/numa.c
+++ b/xen/arch/arm/numa/numa.c
@@ -22,11 +22,31 @@ 
 
 static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b);
 
+/*
+ * Setup early cpu_to_node.
+ */
+void __init init_cpu_to_node(void)
+{
+    int i;
+
+    for ( i = 0; i < NR_CPUS; i++ )
+        numa_set_node(i, 0);
+}
+
 void numa_failed(void)
 {
     numa_off = true;
     init_dt_numa_distance();
     node_distance_fn = NULL;
+    init_cpu_to_node();
+}
+
+void __init numa_set_cpu_node(int cpu, unsigned int nid)
+{
+    if ( !node_isset(nid, processor_nodes_parsed) || nid >= MAX_NUMNODES )
+        nid = 0;
+
+    numa_set_node(cpu, nid);
 }
 
 uint8_t __node_distance(nodeid_t a, nodeid_t b)
@@ -49,6 +69,7 @@  void __init numa_init(void)
     int ret = 0;
 
     nodes_clear(processor_nodes_parsed);
+    init_cpu_to_node();
     init_dt_numa_distance();
 
     if ( numa_off )
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index a6d1499..b9c8b0d 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -787,6 +787,8 @@  void __init start_xen(unsigned long boot_phys_offset,
 
     processor_id();
 
+    numa_add_cpu(0);
+
     smp_init_cpus();
     cpus = smp_get_max_cpus();
     printk(XENLOG_INFO "SMP: Allowing %u CPUs\n", cpus);
diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
index 32e8722..fcf9afc 100644
--- a/xen/arch/arm/smpboot.c
+++ b/xen/arch/arm/smpboot.c
@@ -29,6 +29,7 @@ 
 #include <xen/timer.h>
 #include <xen/irq.h>
 #include <xen/console.h>
+#include <xen/numa.h>
 #include <asm/cpuerrata.h>
 #include <asm/gic.h>
 #include <asm/psci.h>
@@ -106,6 +107,7 @@  static void __init dt_smp_init_cpus(void)
         [0 ... NR_CPUS - 1] = MPIDR_INVALID
     };
     bool_t bootcpu_valid = 0;
+    nodeid_t *cpu_to_nodemap;
     int rc;
 
     mpidr = boot_cpu_data.mpidr.bits & MPIDR_HWID_MASK;
@@ -117,11 +119,18 @@  static void __init dt_smp_init_cpus(void)
         return;
     }
 
+    cpu_to_nodemap = xzalloc_array(nodeid_t, NR_CPUS);
+    if ( !cpu_to_nodemap )
+    {
+        printk(XENLOG_WARNING "Failed to allocate memory for cpu_to_nodemap\n");
+        return;
+    }
+
     dt_for_each_child_node( cpus, cpu )
     {
         const __be32 *prop;
         u64 addr;
-        u32 reg_len;
+        uint32_t reg_len, nid;
         register_t hwid;
 
         if ( !dt_device_type_is_equal(cpu, "cpu") )
@@ -146,6 +155,15 @@  static void __init dt_smp_init_cpus(void)
             continue;
         }
 
+        if ( !dt_property_read_u32(cpu, "numa-node-id", &nid) )
+        {
+            printk(XENLOG_WARNING "cpu node `%s`: numa-node-id not found\n",
+                   dt_node_full_name(cpu));
+            nid = 0;
+        }
+
+        cpu_to_nodemap[cpuidx] = nid;
+
         addr = dt_read_number(prop, dt_n_addr_cells(cpu));
 
         hwid = addr;
@@ -224,6 +242,7 @@  static void __init dt_smp_init_cpus(void)
     {
         printk(XENLOG_WARNING "DT missing boot CPU MPIDR[23:0]\n"
                "Using only 1 CPU\n");
+        xfree(cpu_to_nodemap);
         return;
     }
 
@@ -233,7 +252,10 @@  static void __init dt_smp_init_cpus(void)
             continue;
         cpumask_set_cpu(i, &cpu_possible_map);
         cpu_logical_map(i) = tmp_map[i];
+        numa_set_cpu_node(i, cpu_to_nodemap[i]);
     }
+
+    xfree(cpu_to_nodemap);
 }
 
 void __init smp_init_cpus(void)
@@ -313,6 +335,7 @@  void start_secondary(unsigned long boot_phys_offset,
      */
     smp_wmb();
 
+    numa_add_cpu(cpuid);
     /* Now report this CPU is up */
     cpumask_set_cpu(cpuid, &cpu_online_map);
 
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index d1dc83a..0d3146c 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -10,12 +10,19 @@  void init_dt_numa_distance(void);
 #ifdef CONFIG_NUMA
 void numa_init(void);
 int dt_numa_init(void);
+void numa_set_cpu_node(int cpu, unsigned int nid);
+
 #else
 static inline void numa_init(void)
 {
     return;
 }
 
+static inline void numa_set_cpu_node(int cpu, unsigned int nid)
+{
+    return;
+}
+
 /* Fake one node for now. See also node_online_map. */
 #define cpu_to_node(cpu) 0
 #define node_to_cpumask(node)   (cpu_online_map)
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index ca0a2a6..fc4747f 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -15,7 +15,6 @@  extern nodeid_t acpi_setup_node(unsigned int pxm);
 extern void srat_detect_node(int cpu);
 
 extern nodeid_t apicid_to_node[];
-extern void init_cpu_to_node(void);
 
 void srat_parse_regions(paddr_t addr);
 unsigned int arch_get_dma_bitsize(void);
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 10ef4c4..8a306e7 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -30,6 +30,7 @@  extern s8 acpi_numa;
 void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 int srat_disabled(void);
 int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
+void init_cpu_to_node(void);
 
 #ifdef CONFIG_NUMA
 #define cpu_to_node(cpu)         (cpu_to_node[cpu])