diff mbox

[RFC,v3,14/24] ARM: NUMA: DT: Parse NUMA distance information

Message ID 1500378106-2620-15-git-send-email-vijay.kilari@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vijay Kilari July 18, 2017, 11:41 a.m. UTC
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>

Parse distance-matrix and fetch node distance information.
Store distance information in node_distance[].

Register dt_node_distance() function pointer with
the ARM numa code. This approach can be later used for
ACPI.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
---
v3: - Moved __node_distance() declaration to common
      header file
    - Use device_tree_node_compatible() instead of
      device_tree_node_matches()
    - Dropped xen/errno.h inclusion
---
 xen/arch/arm/numa/dt_numa.c | 131 ++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/numa/numa.c    |  22 ++++++++
 xen/include/asm-arm/numa.h  |   2 +
 xen/include/asm-x86/numa.h  |   1 -
 xen/include/xen/numa.h      |   3 +
 5 files changed, 158 insertions(+), 1 deletion(-)

Comments

Julien Grall July 20, 2017, 1:02 p.m. UTC | #1
Hi Vijay,

On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> Parse distance-matrix and fetch node distance information.
> Store distance information in node_distance[].
>
> Register dt_node_distance() function pointer with
> the ARM numa code. This approach can be later used for
> ACPI.

After my comment on v1, I was expecting to see a link to the binding in 
the commit message...

>
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
> ---
> v3: - Moved __node_distance() declaration to common
>       header file
>     - Use device_tree_node_compatible() instead of
>       device_tree_node_matches()
>     - Dropped xen/errno.h inclusion
> ---
>  xen/arch/arm/numa/dt_numa.c | 131 ++++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/numa/numa.c    |  22 ++++++++
>  xen/include/asm-arm/numa.h  |   2 +
>  xen/include/asm-x86/numa.h  |   1 -
>  xen/include/xen/numa.h      |   3 +
>  5 files changed, 158 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c
> index 84030e7..46c0346 100644
> --- a/xen/arch/arm/numa/dt_numa.c
> +++ b/xen/arch/arm/numa/dt_numa.c
> @@ -23,6 +23,48 @@
>  #include <xen/numa.h>
>  #include <asm/setup.h>
>
> +static uint8_t node_distance[MAX_NUMNODES][MAX_NUMNODES];

On v1, you said that you will look at allocating node_distance on the 
fly. So why it is not done?

You can give a look at alloc_boot_pages(...).

> +
> +static uint8_t dt_node_distance(nodeid_t nodea, nodeid_t nodeb)
> +{
> +    if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES )
> +        return nodea == nodeb ? LOCAL_DISTANCE : REMOTE_DISTANCE;

Do we really expect dt_node_distance to be called with wrong node?

Looking at the ACPI code, they don't check that... So likely this should 
be an ASSERT(...).

> +
> +    return node_distance[nodea][nodeb];
> +}
> +
> +static int dt_numa_set_distance(uint32_t nodea, uint32_t nodeb,

I think this should be __init.

> +                                uint32_t distance)
> +{
> +   /* node_distance is uint8_t. Ensure distance is less than 255 */
> +   if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES || distance > 255 )
> +       return -EINVAL;
> +
> +   node_distance[nodea][nodeb] = distance;
> +
> +   return 0;
> +}
> +
> +void init_dt_numa_distance(void)

Ditto.

> +{
> +    int i, j;
> +
> +    for ( i = 0; i < MAX_NUMNODES; i++ )
> +    {
> +        for ( j = 0; j < MAX_NUMNODES; j++ )
> +        {
> +            /*
> +             * Initialize distance 10 for local distance and
> +             * 20 for remote distance.
> +             */
> +            if ( i  == j )
> +                node_distance[i][j] = LOCAL_DISTANCE;
> +            else
> +                node_distance[i][j] = REMOTE_DISTANCE;
> +        }
> +    }
> +}
> +
>  /*
>   * Even though we connect cpus to numa domains later in SMP
>   * init, we need to know the node ids now for all cpus.
> @@ -58,6 +100,76 @@ static int __init dt_numa_process_cpu_node(const void *fdt)
>      return 0;
>  }
>
> +static int __init dt_numa_parse_distance_map(const void *fdt, int node,
> +                                             const char *name,
> +                                             uint32_t address_cells,
> +                                             uint32_t size_cells)
> +{
> +    const struct fdt_property *prop;
> +    const __be32 *matrix;
> +    int entry_count, len, i;
> +
> +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> +
> +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> +    if ( !prop )
> +    {
> +        printk(XENLOG_WARNING

s/XENLOG_WARNING/XENLOG_INFO/ because numa-distance-map is not mandatory.

> +               "NUMA: No distance-matrix property in distance-map\n");
> +
> +        return -EINVAL;

If I am reading correctly the binding, the distance-matrix is not 
mandatory. If it is not present, you should use a default matrix. But 
here you will disable NUMA completely.

> +    }
> +
> +    if ( len % sizeof(uint32_t) != 0 )
> +    {
> +         printk(XENLOG_WARNING
> +                "distance-matrix in node is not a multiple of u32\n");
> +
> +        return -EINVAL;
> +    }
> +
> +    entry_count = len / sizeof(uint32_t);
> +    if ( entry_count <= 0 )
> +    {
> +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> +
> +        return -EINVAL;
> +    }
> +
> +    matrix = (const __be32 *)prop->data;
> +    for ( i = 0; i + 2 < entry_count; i += 3 )

It would be easier to read if entry_count is the number of triplet. E.g

entry_count = (len / sizeof(uint32_t)) / 3;

for ( i = 0; i < entry_count; i++ )

> +    {
> +        uint32_t nodea, nodeb, distance;
> +
> +        nodea = dt_read_number(matrix, 1);
> +        matrix++;

nodea = dt_next_cell(1, &matrix) will do the increment for you.

> +        nodeb = dt_read_number(matrix, 1);
> +        matrix++;

Ditto.

> +        distance = dt_read_number(matrix, 1);
> +        matrix++;

Ditto.

> +
> +        if ( dt_numa_set_distance(nodea, nodeb, distance) )
> +        {
> +            printk(XENLOG_WARNING
> +                   "NUMA: node-id out of range in distance matrix for [node%d -> node%d]\n",

s/%d/%u/

> +                   nodea, nodeb);
> +            return -EINVAL;
> +
> +        }
> +        printk(XENLOG_INFO "NUMA: distance[node%d -> node%d] = %d\n",
> +               nodea, nodeb, distance);
> +
> +        /*
> +         * Set default distance of node B->A same as A->B.
> +         * No need to check for return value of numa_set_distance.
> +         */
> +        if ( nodeb > nodea )

Mind explaining this if in the comment?

> +            dt_numa_set_distance(nodeb, nodea, distance);
> +    }
> +
> +    return 0;
> +}
> +
>  void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
>                                         paddr_t size)
>  {
> @@ -90,11 +202,30 @@ void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
>      return;
>  }
>
> +static int __init dt_numa_scan_distance_node(const void *fdt, int node,
> +                                             const char *name, int depth,
> +                                             uint32_t address_cells,
> +                                             uint32_t size_cells, void *data)
> +{
> +    if ( device_tree_node_compatible(fdt, node, "numa-distance-map-v1") )
> +        return dt_numa_parse_distance_map(fdt, node, name, address_cells,
> +                                          size_cells);
> +
> +    return 0;
> +}
> +
>  int __init dt_numa_init(void)
>  {
>      int ret;
>
>      ret = dt_numa_process_cpu_node((void *)device_tree_flattened);
> +    if ( ret )
> +        return ret;
> +
> +    ret = device_tree_for_each_node((void *)device_tree_flattened,

Why do you need the cast?

> +                                    dt_numa_scan_distance_node, NULL);
> +    if ( !ret )
> +        register_node_distance(&dt_node_distance);
>
>      return ret;
>  }
> diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
> index 8227361..c00b92c 100644
> --- a/xen/arch/arm/numa/numa.c
> +++ b/xen/arch/arm/numa/numa.c
> @@ -18,10 +18,30 @@
>  #include <xen/ctype.h>
>  #include <xen/nodemask.h>
>  #include <xen/numa.h>
> +#include <asm/acpi.h>

I don't understand why you include asm/acpi.h with no code using ACPI at 
the moment...

> +
> +static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b);
>
>  void numa_failed(void)
>  {
>      numa_off = true;
> +    init_dt_numa_distance();

Why do you need to initialize init_dt_numa_distance when it has failed? 
The array will never be used in that case.

> +    node_distance_fn = NULL;
> +}
> +
> +uint8_t __node_distance(nodeid_t a, nodeid_t b)
> +{
> +    if ( node_distance_fn != NULL);
> +        return node_distance_fn(a, b);
> +
> +    return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE;
> +}
> +
> +EXPORT_SYMBOL(__node_distance);

Please drop EXPORT_SYMBOL, this is not used by Xen and only here when 
the code is imported from Linux.

> +
> +void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b))
> +{
> +    node_distance_fn = fn;
>  }
>
>  void __init numa_init(void)
> @@ -29,6 +49,8 @@ void __init numa_init(void)
>      int ret = 0;
>
>      nodes_clear(processor_nodes_parsed);
> +    init_dt_numa_distance();

This should go in dt_numa_init and would avoid to export it.

> +
>      if ( numa_off )
>          goto no_numa;
>
> diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
> index 36cd782..d1dc83a 100644
> --- a/xen/include/asm-arm/numa.h
> +++ b/xen/include/asm-arm/numa.h
> @@ -4,6 +4,8 @@
>  typedef uint8_t nodeid_t;
>
>  void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size);
> +void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b));
> +void init_dt_numa_distance(void);
>
>  #ifdef CONFIG_NUMA
>  void numa_init(void);
> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> index d8a0a44..ca0a2a6 100644
> --- a/xen/include/asm-x86/numa.h
> +++ b/xen/include/asm-x86/numa.h
> @@ -18,7 +18,6 @@ extern nodeid_t apicid_to_node[];
>  extern void init_cpu_to_node(void);
>
>  void srat_parse_regions(paddr_t addr);
> -extern uint8_t __node_distance(nodeid_t a, nodeid_t b);
>  unsigned int arch_get_dma_bitsize(void);
>
>  #endif
> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> index 110d5dc..10ef4c4 100644
> --- a/xen/include/xen/numa.h
> +++ b/xen/include/xen/numa.h
> @@ -6,6 +6,8 @@
>  #include <asm/numa.h>
>
>  #define NUMA_NO_NODE     0xFF
> +#define LOCAL_DISTANCE   10
> +#define REMOTE_DISTANCE  20

I would add DEFAULT in each name. Probably LOCAL_DEFAULT_DISTANCE and 
REMOVE_LOCAL_DISTANCE.

>  #define NUMA_NO_DISTANCE 0xFF
>
>  #define MAX_NUMNODES    NR_NODES
> @@ -70,6 +72,7 @@ int numa_add_memblk(nodeid_t nodeid, paddr_t start, uint64_t size);
>  int get_num_node_memblks(void);
>  bool arch_sanitize_nodes_memory(void);
>  void numa_failed(void);
> +uint8_t __node_distance(nodeid_t a, nodeid_t b);
>  #else
>  static inline void numa_add_cpu(int cpu) { }
>  static inline void numa_set_node(int cpu, nodeid_t node) { }
>

Cheers,
diff mbox

Patch

diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c
index 84030e7..46c0346 100644
--- a/xen/arch/arm/numa/dt_numa.c
+++ b/xen/arch/arm/numa/dt_numa.c
@@ -23,6 +23,48 @@ 
 #include <xen/numa.h>
 #include <asm/setup.h>
 
+static uint8_t node_distance[MAX_NUMNODES][MAX_NUMNODES];
+
+static uint8_t dt_node_distance(nodeid_t nodea, nodeid_t nodeb)
+{
+    if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES )
+        return nodea == nodeb ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+
+    return node_distance[nodea][nodeb];
+}
+
+static int dt_numa_set_distance(uint32_t nodea, uint32_t nodeb,
+                                uint32_t distance)
+{
+   /* node_distance is uint8_t. Ensure distance is less than 255 */
+   if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES || distance > 255 )
+       return -EINVAL;
+
+   node_distance[nodea][nodeb] = distance;
+
+   return 0;
+}
+
+void init_dt_numa_distance(void)
+{
+    int i, j;
+
+    for ( i = 0; i < MAX_NUMNODES; i++ )
+    {
+        for ( j = 0; j < MAX_NUMNODES; j++ )
+        {
+            /*
+             * Initialize distance 10 for local distance and
+             * 20 for remote distance.
+             */
+            if ( i  == j )
+                node_distance[i][j] = LOCAL_DISTANCE;
+            else
+                node_distance[i][j] = REMOTE_DISTANCE;
+        }
+    }
+}
+
 /*
  * Even though we connect cpus to numa domains later in SMP
  * init, we need to know the node ids now for all cpus.
@@ -58,6 +100,76 @@  static int __init dt_numa_process_cpu_node(const void *fdt)
     return 0;
 }
 
+static int __init dt_numa_parse_distance_map(const void *fdt, int node,
+                                             const char *name,
+                                             uint32_t address_cells,
+                                             uint32_t size_cells)
+{
+    const struct fdt_property *prop;
+    const __be32 *matrix;
+    int entry_count, len, i;
+
+    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
+
+    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
+    if ( !prop )
+    {
+        printk(XENLOG_WARNING
+               "NUMA: No distance-matrix property in distance-map\n");
+
+        return -EINVAL;
+    }
+
+    if ( len % sizeof(uint32_t) != 0 )
+    {
+         printk(XENLOG_WARNING
+                "distance-matrix in node is not a multiple of u32\n");
+
+        return -EINVAL;
+    }
+
+    entry_count = len / sizeof(uint32_t);
+    if ( entry_count <= 0 )
+    {
+        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
+
+        return -EINVAL;
+    }
+
+    matrix = (const __be32 *)prop->data;
+    for ( i = 0; i + 2 < entry_count; i += 3 )
+    {
+        uint32_t nodea, nodeb, distance;
+
+        nodea = dt_read_number(matrix, 1);
+        matrix++;
+        nodeb = dt_read_number(matrix, 1);
+        matrix++;
+        distance = dt_read_number(matrix, 1);
+        matrix++;
+
+        if ( dt_numa_set_distance(nodea, nodeb, distance) )
+        {
+            printk(XENLOG_WARNING
+                   "NUMA: node-id out of range in distance matrix for [node%d -> node%d]\n",
+                   nodea, nodeb);
+            return -EINVAL;
+
+        }
+        printk(XENLOG_INFO "NUMA: distance[node%d -> node%d] = %d\n",
+               nodea, nodeb, distance);
+
+        /*
+         * Set default distance of node B->A same as A->B.
+         * No need to check for return value of numa_set_distance.
+         */
+        if ( nodeb > nodea )
+            dt_numa_set_distance(nodeb, nodea, distance);
+    }
+
+    return 0;
+}
+
 void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
                                        paddr_t size)
 {
@@ -90,11 +202,30 @@  void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start,
     return;
 }
 
+static int __init dt_numa_scan_distance_node(const void *fdt, int node,
+                                             const char *name, int depth,
+                                             uint32_t address_cells,
+                                             uint32_t size_cells, void *data)
+{
+    if ( device_tree_node_compatible(fdt, node, "numa-distance-map-v1") )
+        return dt_numa_parse_distance_map(fdt, node, name, address_cells,
+                                          size_cells);
+
+    return 0;
+}
+
 int __init dt_numa_init(void)
 {
     int ret;
 
     ret = dt_numa_process_cpu_node((void *)device_tree_flattened);
+    if ( ret )
+        return ret;
+
+    ret = device_tree_for_each_node((void *)device_tree_flattened,
+                                    dt_numa_scan_distance_node, NULL);
+    if ( !ret )
+        register_node_distance(&dt_node_distance);
 
     return ret;
 }
diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c
index 8227361..c00b92c 100644
--- a/xen/arch/arm/numa/numa.c
+++ b/xen/arch/arm/numa/numa.c
@@ -18,10 +18,30 @@ 
 #include <xen/ctype.h>
 #include <xen/nodemask.h>
 #include <xen/numa.h>
+#include <asm/acpi.h>
+
+static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b);
 
 void numa_failed(void)
 {
     numa_off = true;
+    init_dt_numa_distance();
+    node_distance_fn = NULL;
+}
+
+uint8_t __node_distance(nodeid_t a, nodeid_t b)
+{
+    if ( node_distance_fn != NULL);
+        return node_distance_fn(a, b);
+
+    return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+}
+
+EXPORT_SYMBOL(__node_distance);
+
+void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b))
+{
+    node_distance_fn = fn;
 }
 
 void __init numa_init(void)
@@ -29,6 +49,8 @@  void __init numa_init(void)
     int ret = 0;
 
     nodes_clear(processor_nodes_parsed);
+    init_dt_numa_distance();
+
     if ( numa_off )
         goto no_numa;
 
diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h
index 36cd782..d1dc83a 100644
--- a/xen/include/asm-arm/numa.h
+++ b/xen/include/asm-arm/numa.h
@@ -4,6 +4,8 @@ 
 typedef uint8_t nodeid_t;
 
 void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size);
+void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b));
+void init_dt_numa_distance(void);
 
 #ifdef CONFIG_NUMA
 void numa_init(void);
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index d8a0a44..ca0a2a6 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -18,7 +18,6 @@  extern nodeid_t apicid_to_node[];
 extern void init_cpu_to_node(void);
 
 void srat_parse_regions(paddr_t addr);
-extern uint8_t __node_distance(nodeid_t a, nodeid_t b);
 unsigned int arch_get_dma_bitsize(void);
 
 #endif
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 110d5dc..10ef4c4 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -6,6 +6,8 @@ 
 #include <asm/numa.h>
 
 #define NUMA_NO_NODE     0xFF
+#define LOCAL_DISTANCE   10
+#define REMOTE_DISTANCE  20
 #define NUMA_NO_DISTANCE 0xFF
 
 #define MAX_NUMNODES    NR_NODES
@@ -70,6 +72,7 @@  int numa_add_memblk(nodeid_t nodeid, paddr_t start, uint64_t size);
 int get_num_node_memblks(void);
 bool arch_sanitize_nodes_memory(void);
 void numa_failed(void);
+uint8_t __node_distance(nodeid_t a, nodeid_t b);
 #else
 static inline void numa_add_cpu(int cpu) { }
 static inline void numa_set_node(int cpu, nodeid_t node) { }