diff mbox series

[XEN,RFC,16/40] xen/arm: Create a fake NUMA node to use common code

Message ID 20210811102423.28908-17-wei.chen@arm.com (mailing list archive)
State New, archived
Headers show
Series Add device tree based NUMA support to Arm64 | expand

Commit Message

Wei Chen Aug. 11, 2021, 10:23 a.m. UTC
When CONFIG_NUMA is enabled for Arm, Xen will switch to use common
NUMA API instead of previous fake NUMA API. Before we parse NUMA
information from device tree or ACPI SRAT table, we need to init
the NUMA related variables, like cpu_to_node, as single node NUMA
system.

So in this patch, we introduce a numa_init function for to
initialize these data structures as all resources belongs to node#0.
This will make the new API returns the same values as the fake API
has done.

Signed-off-by: Wei Chen <wei.chen@arm.com>
---
 xen/arch/arm/numa.c        | 53 ++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/setup.c       |  8 ++++++
 xen/include/asm-arm/numa.h | 11 ++++++++
 3 files changed, 72 insertions(+)

Comments

Stefano Stabellini Aug. 26, 2021, 11:10 p.m. UTC | #1
On Wed, 11 Aug 2021, Wei Chen wrote:
> When CONFIG_NUMA is enabled for Arm, Xen will switch to use common
> NUMA API instead of previous fake NUMA API. Before we parse NUMA
> information from device tree or ACPI SRAT table, we need to init
> the NUMA related variables, like cpu_to_node, as single node NUMA
> system.
> 
> So in this patch, we introduce a numa_init function for to
> initialize these data structures as all resources belongs to node#0.
> This will make the new API returns the same values as the fake API
> has done.
> 
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
>  xen/arch/arm/numa.c        | 53 ++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/setup.c       |  8 ++++++
>  xen/include/asm-arm/numa.h | 11 ++++++++
>  3 files changed, 72 insertions(+)
> 
> diff --git a/xen/arch/arm/numa.c b/xen/arch/arm/numa.c
> index 1e30c5bb13..566ad1e52b 100644
> --- a/xen/arch/arm/numa.c
> +++ b/xen/arch/arm/numa.c
> @@ -20,6 +20,8 @@
>  #include <xen/init.h>
>  #include <xen/nodemask.h>
>  #include <xen/numa.h>
> +#include <xen/pfn.h>
> +#include <asm/setup.h>
>  
>  void numa_set_node(int cpu, nodeid_t nid)
>  {
> @@ -29,3 +31,54 @@ void numa_set_node(int cpu, nodeid_t nid)
>  
>      cpu_to_node[cpu] = nid;
>  }
> +
> +void __init numa_init(bool acpi_off)
> +{
> +    uint32_t idx;
> +    paddr_t ram_start = ~0;
> +    paddr_t ram_size = 0;
> +    paddr_t ram_end = 0;
> +
> +    printk(XENLOG_WARNING
> +        "NUMA has not been supported yet, NUMA off!\n");

NIT: please align


> +    /* Arm NUMA has not been implemented until this patch */

"Arm NUMA is not implemented yet"


> +    numa_off = true;
> +
> +    /*
> +     * Set all cpu_to_node mapping to 0, this will make cpu_to_node
> +     * function return 0 as previous fake cpu_to_node API.
> +     */
> +    for ( idx = 0; idx < NR_CPUS; idx++ )
> +        cpu_to_node[idx] = 0;
> +
> +    /*
> +     * Make node_to_cpumask, node_spanned_pages and node_start_pfn
> +     * return as previous fake APIs.
> +     */
> +    for ( idx = 0; idx < MAX_NUMNODES; idx++ ) {
> +        node_to_cpumask[idx] = cpu_online_map;
> +        node_spanned_pages(idx) = (max_page - mfn_x(first_valid_mfn));
> +        node_start_pfn(idx) = (mfn_x(first_valid_mfn));
> +    }

I just want to note that this works because MAX_NUMNODES is 1. If
MAX_NUMNODES was > 1 then it would be wrong to set node_to_cpumask,
node_spanned_pages and node_start_pfn for all nodes to the same values.

It might be worth writing something about it in the in-code comment.


> +    /*
> +     * Find the minimal and maximum address of RAM, NUMA will
> +     * build a memory to node mapping table for the whole range.
> +     */
> +    ram_start = bootinfo.mem.bank[0].start;
> +    ram_size  = bootinfo.mem.bank[0].size;
> +    ram_end   = ram_start + ram_size;
> +    for ( idx = 1 ; idx < bootinfo.mem.nr_banks; idx++ )
> +    {
> +        paddr_t bank_start = bootinfo.mem.bank[idx].start;
> +        paddr_t bank_size = bootinfo.mem.bank[idx].size;
> +        paddr_t bank_end = bank_start + bank_size;
> +
> +        ram_size  = ram_size + bank_size;

ram_size is updated but not utilized


> +        ram_start = min(ram_start, bank_start);
> +        ram_end   = max(ram_end, bank_end);
> +    }
> +
> +    numa_initmem_init(PFN_UP(ram_start), PFN_DOWN(ram_end));
> +    return;
> +}
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 63a908e325..3c58d2d441 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -30,6 +30,7 @@
>  #include <xen/init.h>
>  #include <xen/irq.h>
>  #include <xen/mm.h>
> +#include <xen/numa.h>
>  #include <xen/param.h>
>  #include <xen/softirq.h>
>  #include <xen/keyhandler.h>
> @@ -874,6 +875,13 @@ void __init start_xen(unsigned long boot_phys_offset,
>      /* Parse the ACPI tables for possible boot-time configuration */
>      acpi_boot_table_init();
>  
> +    /*
> +     * Try to initialize NUMA system, if failed, the system will
> +     * fallback to uniform system which means system has only 1
> +     * NUMA node.
> +     */
> +    numa_init(acpi_disabled);
> +
>      end_boot_allocator();
>  
>      /*
> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
> index b2982f9053..bb495a24e1 100644
> --- a/xen/include/asm-arm/numa.h
> +++ b/xen/include/asm-arm/numa.h
> @@ -13,6 +13,16 @@ typedef u8 nodeid_t;
>   */
>  #define NODES_SHIFT      6
>  
> +extern void numa_init(bool acpi_off);
> +
> +/*
> + * Temporary for fake NUMA node, when CPU, memory and distance
> + * matrix will be read from DTB or ACPI SRAT. The following
> + * symbols will be removed.
> + */
> +extern mfn_t first_valid_mfn;
> +#define __node_distance(a, b) (20)
> +
>  #else
>  
>  /* Fake one node for now. See also node_online_map. */
> @@ -35,6 +45,7 @@ extern mfn_t first_valid_mfn;
>  #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
>  #define __node_distance(a, b) (20)
>  
> +#define numa_init(x) do { } while (0)
>  #define numa_set_node(x, y) do { } while (0)
>  
>  #endif
> -- 
> 2.25.1
>
Wei Chen Aug. 27, 2021, 1:15 a.m. UTC | #2
Hi Stefano,

> -----Original Message-----
> From: Stefano Stabellini <sstabellini@kernel.org>
> Sent: 2021年8月27日 7:10
> To: Wei Chen <Wei.Chen@arm.com>
> Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org;
> jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> Subject: Re: [XEN RFC PATCH 16/40] xen/arm: Create a fake NUMA node to use
> common code
> 
> On Wed, 11 Aug 2021, Wei Chen wrote:
> > When CONFIG_NUMA is enabled for Arm, Xen will switch to use common
> > NUMA API instead of previous fake NUMA API. Before we parse NUMA
> > information from device tree or ACPI SRAT table, we need to init
> > the NUMA related variables, like cpu_to_node, as single node NUMA
> > system.
> >
> > So in this patch, we introduce a numa_init function for to
> > initialize these data structures as all resources belongs to node#0.
> > This will make the new API returns the same values as the fake API
> > has done.
> >
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > ---
> >  xen/arch/arm/numa.c        | 53 ++++++++++++++++++++++++++++++++++++++
> >  xen/arch/arm/setup.c       |  8 ++++++
> >  xen/include/asm-arm/numa.h | 11 ++++++++
> >  3 files changed, 72 insertions(+)
> >
> > diff --git a/xen/arch/arm/numa.c b/xen/arch/arm/numa.c
> > index 1e30c5bb13..566ad1e52b 100644
> > --- a/xen/arch/arm/numa.c
> > +++ b/xen/arch/arm/numa.c
> > @@ -20,6 +20,8 @@
> >  #include <xen/init.h>
> >  #include <xen/nodemask.h>
> >  #include <xen/numa.h>
> > +#include <xen/pfn.h>
> > +#include <asm/setup.h>
> >
> >  void numa_set_node(int cpu, nodeid_t nid)
> >  {
> > @@ -29,3 +31,54 @@ void numa_set_node(int cpu, nodeid_t nid)
> >
> >      cpu_to_node[cpu] = nid;
> >  }
> > +
> > +void __init numa_init(bool acpi_off)
> > +{
> > +    uint32_t idx;
> > +    paddr_t ram_start = ~0;
> > +    paddr_t ram_size = 0;
> > +    paddr_t ram_end = 0;
> > +
> > +    printk(XENLOG_WARNING
> > +        "NUMA has not been supported yet, NUMA off!\n");
> 
> NIT: please align
> 


OK

> 
> > +    /* Arm NUMA has not been implemented until this patch */
> 
> "Arm NUMA is not implemented yet"
> 

OK

> 
> > +    numa_off = true;
> > +
> > +    /*
> > +     * Set all cpu_to_node mapping to 0, this will make cpu_to_node
> > +     * function return 0 as previous fake cpu_to_node API.
> > +     */
> > +    for ( idx = 0; idx < NR_CPUS; idx++ )
> > +        cpu_to_node[idx] = 0;
> > +
> > +    /*
> > +     * Make node_to_cpumask, node_spanned_pages and node_start_pfn
> > +     * return as previous fake APIs.
> > +     */
> > +    for ( idx = 0; idx < MAX_NUMNODES; idx++ ) {
> > +        node_to_cpumask[idx] = cpu_online_map;
> > +        node_spanned_pages(idx) = (max_page - mfn_x(first_valid_mfn));
> > +        node_start_pfn(idx) = (mfn_x(first_valid_mfn));
> > +    }
> 
> I just want to note that this works because MAX_NUMNODES is 1. If
> MAX_NUMNODES was > 1 then it would be wrong to set node_to_cpumask,
> node_spanned_pages and node_start_pfn for all nodes to the same values.
> 
> It might be worth writing something about it in the in-code comment.
> 

OK, I will do it.

> 
> > +    /*
> > +     * Find the minimal and maximum address of RAM, NUMA will
> > +     * build a memory to node mapping table for the whole range.
> > +     */
> > +    ram_start = bootinfo.mem.bank[0].start;
> > +    ram_size  = bootinfo.mem.bank[0].size;
> > +    ram_end   = ram_start + ram_size;
> > +    for ( idx = 1 ; idx < bootinfo.mem.nr_banks; idx++ )
> > +    {
> > +        paddr_t bank_start = bootinfo.mem.bank[idx].start;
> > +        paddr_t bank_size = bootinfo.mem.bank[idx].size;
> > +        paddr_t bank_end = bank_start + bank_size;
> > +
> > +        ram_size  = ram_size + bank_size;
> 
> ram_size is updated but not utilized
> 

Ok, I will remove it.

> 
> > +        ram_start = min(ram_start, bank_start);
> > +        ram_end   = max(ram_end, bank_end);
> > +    }
> > +
> > +    numa_initmem_init(PFN_UP(ram_start), PFN_DOWN(ram_end));
> > +    return;
> > +}
> > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> > index 63a908e325..3c58d2d441 100644
> > --- a/xen/arch/arm/setup.c
> > +++ b/xen/arch/arm/setup.c
> > @@ -30,6 +30,7 @@
> >  #include <xen/init.h>
> >  #include <xen/irq.h>
> >  #include <xen/mm.h>
> > +#include <xen/numa.h>
> >  #include <xen/param.h>
> >  #include <xen/softirq.h>
> >  #include <xen/keyhandler.h>
> > @@ -874,6 +875,13 @@ void __init start_xen(unsigned long
> boot_phys_offset,
> >      /* Parse the ACPI tables for possible boot-time configuration */
> >      acpi_boot_table_init();
> >
> > +    /*
> > +     * Try to initialize NUMA system, if failed, the system will
> > +     * fallback to uniform system which means system has only 1
> > +     * NUMA node.
> > +     */
> > +    numa_init(acpi_disabled);
> > +
> >      end_boot_allocator();
> >
> >      /*
> > diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
> > index b2982f9053..bb495a24e1 100644
> > --- a/xen/include/asm-arm/numa.h
> > +++ b/xen/include/asm-arm/numa.h
> > @@ -13,6 +13,16 @@ typedef u8 nodeid_t;
> >   */
> >  #define NODES_SHIFT      6
> >
> > +extern void numa_init(bool acpi_off);
> > +
> > +/*
> > + * Temporary for fake NUMA node, when CPU, memory and distance
> > + * matrix will be read from DTB or ACPI SRAT. The following
> > + * symbols will be removed.
> > + */
> > +extern mfn_t first_valid_mfn;
> > +#define __node_distance(a, b) (20)
> > +
> >  #else
> >
> >  /* Fake one node for now. See also node_online_map. */
> > @@ -35,6 +45,7 @@ extern mfn_t first_valid_mfn;
> >  #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
> >  #define __node_distance(a, b) (20)
> >
> > +#define numa_init(x) do { } while (0)
> >  #define numa_set_node(x, y) do { } while (0)
> >
> >  #endif
> > --
> > 2.25.1
> >
Jan Beulich Aug. 27, 2021, 6:18 a.m. UTC | #3
On 27.08.2021 01:10, Stefano Stabellini wrote:
> On Wed, 11 Aug 2021, Wei Chen wrote:
>> @@ -29,3 +31,54 @@ void numa_set_node(int cpu, nodeid_t nid)
>>  
>>      cpu_to_node[cpu] = nid;
>>  }
>> +
>> +void __init numa_init(bool acpi_off)
>> +{
>> +    uint32_t idx;
>> +    paddr_t ram_start = ~0;
>> +    paddr_t ram_size = 0;
>> +    paddr_t ram_end = 0;
>> +
>> +    printk(XENLOG_WARNING
>> +        "NUMA has not been supported yet, NUMA off!\n");
> 
> NIT: please align
> 
> 
>> +    /* Arm NUMA has not been implemented until this patch */
> 
> "Arm NUMA is not implemented yet"
> 
> 
>> +    numa_off = true;
>> +
>> +    /*
>> +     * Set all cpu_to_node mapping to 0, this will make cpu_to_node
>> +     * function return 0 as previous fake cpu_to_node API.
>> +     */
>> +    for ( idx = 0; idx < NR_CPUS; idx++ )
>> +        cpu_to_node[idx] = 0;
>> +
>> +    /*
>> +     * Make node_to_cpumask, node_spanned_pages and node_start_pfn
>> +     * return as previous fake APIs.
>> +     */
>> +    for ( idx = 0; idx < MAX_NUMNODES; idx++ ) {
>> +        node_to_cpumask[idx] = cpu_online_map;
>> +        node_spanned_pages(idx) = (max_page - mfn_x(first_valid_mfn));
>> +        node_start_pfn(idx) = (mfn_x(first_valid_mfn));
>> +    }
> 
> I just want to note that this works because MAX_NUMNODES is 1. If
> MAX_NUMNODES was > 1 then it would be wrong to set node_to_cpumask,
> node_spanned_pages and node_start_pfn for all nodes to the same values.
> 
> It might be worth writing something about it in the in-code comment.

Plus perhaps BUILD_BUG_ON(MAX_NUMNODES != 1), so the issue is actually
noticed at build time once the constant gets changed?

Jan
Wei Chen Aug. 27, 2021, 9:32 a.m. UTC | #4
Hi Jan,

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 2021年8月27日 14:18
> To: Stefano Stabellini <sstabellini@kernel.org>; Wei Chen
> <Wei.Chen@arm.com>
> Cc: xen-devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> <Bertrand.Marquis@arm.com>
> Subject: Re: [XEN RFC PATCH 16/40] xen/arm: Create a fake NUMA node to use
> common code
> 
> On 27.08.2021 01:10, Stefano Stabellini wrote:
> > On Wed, 11 Aug 2021, Wei Chen wrote:
> >> @@ -29,3 +31,54 @@ void numa_set_node(int cpu, nodeid_t nid)
> >>
> >>      cpu_to_node[cpu] = nid;
> >>  }
> >> +
> >> +void __init numa_init(bool acpi_off)
> >> +{
> >> +    uint32_t idx;
> >> +    paddr_t ram_start = ~0;
> >> +    paddr_t ram_size = 0;
> >> +    paddr_t ram_end = 0;
> >> +
> >> +    printk(XENLOG_WARNING
> >> +        "NUMA has not been supported yet, NUMA off!\n");
> >
> > NIT: please align
> >
> >
> >> +    /* Arm NUMA has not been implemented until this patch */
> >
> > "Arm NUMA is not implemented yet"
> >
> >
> >> +    numa_off = true;
> >> +
> >> +    /*
> >> +     * Set all cpu_to_node mapping to 0, this will make cpu_to_node
> >> +     * function return 0 as previous fake cpu_to_node API.
> >> +     */
> >> +    for ( idx = 0; idx < NR_CPUS; idx++ )
> >> +        cpu_to_node[idx] = 0;
> >> +
> >> +    /*
> >> +     * Make node_to_cpumask, node_spanned_pages and node_start_pfn
> >> +     * return as previous fake APIs.
> >> +     */
> >> +    for ( idx = 0; idx < MAX_NUMNODES; idx++ ) {
> >> +        node_to_cpumask[idx] = cpu_online_map;
> >> +        node_spanned_pages(idx) = (max_page - mfn_x(first_valid_mfn));
> >> +        node_start_pfn(idx) = (mfn_x(first_valid_mfn));
> >> +    }
> >
> > I just want to note that this works because MAX_NUMNODES is 1. If
> > MAX_NUMNODES was > 1 then it would be wrong to set node_to_cpumask,
> > node_spanned_pages and node_start_pfn for all nodes to the same values.
> >
> > It might be worth writing something about it in the in-code comment.
> 
> Plus perhaps BUILD_BUG_ON(MAX_NUMNODES != 1), so the issue is actually
> noticed at build time once the constant gets changed?
> 

It would be better. I will use it in next version.

> Jan
diff mbox series

Patch

diff --git a/xen/arch/arm/numa.c b/xen/arch/arm/numa.c
index 1e30c5bb13..566ad1e52b 100644
--- a/xen/arch/arm/numa.c
+++ b/xen/arch/arm/numa.c
@@ -20,6 +20,8 @@ 
 #include <xen/init.h>
 #include <xen/nodemask.h>
 #include <xen/numa.h>
+#include <xen/pfn.h>
+#include <asm/setup.h>
 
 void numa_set_node(int cpu, nodeid_t nid)
 {
@@ -29,3 +31,54 @@  void numa_set_node(int cpu, nodeid_t nid)
 
     cpu_to_node[cpu] = nid;
 }
+
+void __init numa_init(bool acpi_off)
+{
+    uint32_t idx;
+    paddr_t ram_start = ~0;
+    paddr_t ram_size = 0;
+    paddr_t ram_end = 0;
+
+    printk(XENLOG_WARNING
+        "NUMA has not been supported yet, NUMA off!\n");
+    /* Arm NUMA has not been implemented until this patch */
+    numa_off = true;
+
+    /*
+     * Set all cpu_to_node mapping to 0, this will make cpu_to_node
+     * function return 0 as previous fake cpu_to_node API.
+     */
+    for ( idx = 0; idx < NR_CPUS; idx++ )
+        cpu_to_node[idx] = 0;
+
+    /*
+     * Make node_to_cpumask, node_spanned_pages and node_start_pfn
+     * return as previous fake APIs.
+     */
+    for ( idx = 0; idx < MAX_NUMNODES; idx++ ) {
+        node_to_cpumask[idx] = cpu_online_map;
+        node_spanned_pages(idx) = (max_page - mfn_x(first_valid_mfn));
+        node_start_pfn(idx) = (mfn_x(first_valid_mfn));
+    }
+
+    /*
+     * Find the minimal and maximum address of RAM, NUMA will
+     * build a memory to node mapping table for the whole range.
+     */
+    ram_start = bootinfo.mem.bank[0].start;
+    ram_size  = bootinfo.mem.bank[0].size;
+    ram_end   = ram_start + ram_size;
+    for ( idx = 1 ; idx < bootinfo.mem.nr_banks; idx++ )
+    {
+        paddr_t bank_start = bootinfo.mem.bank[idx].start;
+        paddr_t bank_size = bootinfo.mem.bank[idx].size;
+        paddr_t bank_end = bank_start + bank_size;
+
+        ram_size  = ram_size + bank_size;
+        ram_start = min(ram_start, bank_start);
+        ram_end   = max(ram_end, bank_end);
+    }
+
+    numa_initmem_init(PFN_UP(ram_start), PFN_DOWN(ram_end));
+    return;
+}
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 63a908e325..3c58d2d441 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -30,6 +30,7 @@ 
 #include <xen/init.h>
 #include <xen/irq.h>
 #include <xen/mm.h>
+#include <xen/numa.h>
 #include <xen/param.h>
 #include <xen/softirq.h>
 #include <xen/keyhandler.h>
@@ -874,6 +875,13 @@  void __init start_xen(unsigned long boot_phys_offset,
     /* Parse the ACPI tables for possible boot-time configuration */
     acpi_boot_table_init();
 
+    /*
+     * Try to initialize NUMA system, if failed, the system will
+     * fallback to uniform system which means system has only 1
+     * NUMA node.
+     */
+    numa_init(acpi_disabled);
+
     end_boot_allocator();
 
     /*
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index b2982f9053..bb495a24e1 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -13,6 +13,16 @@  typedef u8 nodeid_t;
  */
 #define NODES_SHIFT      6
 
+extern void numa_init(bool acpi_off);
+
+/*
+ * Temporary for fake NUMA node, when CPU, memory and distance
+ * matrix will be read from DTB or ACPI SRAT. The following
+ * symbols will be removed.
+ */
+extern mfn_t first_valid_mfn;
+#define __node_distance(a, b) (20)
+
 #else
 
 /* Fake one node for now. See also node_online_map. */
@@ -35,6 +45,7 @@  extern mfn_t first_valid_mfn;
 #define node_start_pfn(nid) (mfn_x(first_valid_mfn))
 #define __node_distance(a, b) (20)
 
+#define numa_init(x) do { } while (0)
 #define numa_set_node(x, y) do { } while (0)
 
 #endif