diff mbox series

[XEN,RFC,24/40] xen/arm: introduce a helper to parse device tree NUMA distance map

Message ID 20210811102423.28908-25-wei.chen@arm.com (mailing list archive)
State New, archived
Headers show
Series Add device tree based NUMA support to Arm64 | expand

Commit Message

Wei Chen Aug. 11, 2021, 10:24 a.m. UTC
A NUMA aware device tree will provide a "distance-map" node to
describe distance between any two nodes. This patch introduce a
new helper to parse this distance map.

Signed-off-by: Wei Chen <wei.chen@arm.com>
---
 xen/arch/arm/numa_device_tree.c | 67 +++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

Comments

Julien Grall Aug. 25, 2021, 1:56 p.m. UTC | #1
Hi Wei,

On 11/08/2021 11:24, Wei Chen wrote:
> A NUMA aware device tree will provide a "distance-map" node to
> describe distance between any two nodes. This patch introduce a

s/introduce/introduces/

> new helper to parse this distance map.
> 
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
>   xen/arch/arm/numa_device_tree.c | 67 +++++++++++++++++++++++++++++++++
>   1 file changed, 67 insertions(+)
> 
> diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c
> index bbe081dcd1..6e0d1d3d9f 100644
> --- a/xen/arch/arm/numa_device_tree.c
> +++ b/xen/arch/arm/numa_device_tree.c
> @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void *fdt, int node,
>   
>       return 0;
>   }
> +
> +/* Parse NUMA distance map v1 */
> +int __init
> +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> +{
> +    const struct fdt_property *prop;
> +    const __be32 *matrix;
> +    int entry_count, len, i;

entry_count and i should be unsigned. len unfortunately can't because 
fdt_get_property expects a signed int.

> +
> +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> +
> +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> +    if ( !prop )
> +    {
> +        printk(XENLOG_WARNING
> +               "NUMA: No distance-matrix property in distance-map\n");
> +
> +        return -EINVAL;
> +    }
> +
> +    if ( len % sizeof(uint32_t) != 0 )
> +    {
> +        printk(XENLOG_WARNING
> +               "distance-matrix in node is not a multiple of u32\n");
> +        return -EINVAL;
> +    }
> +
> +    entry_count = len / sizeof(uint32_t);
> +    if ( entry_count <= 0 )

I understand that entry_count may be 0. But I can't see how it can be 
negative as the property len cannot be (even if it is a signed type). So 
I think this wants to be "== 0".

> +    {
> +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> +
> +        return -EINVAL;
> +    }
> +
> +    matrix = (const __be32 *)prop->data;
> +    for ( i = 0; i + 2 < entry_count; i += 3 )
> +    {
> +        uint32_t from, to, distance;
> +
> +        from = dt_read_number(matrix, 1);
> +        matrix++;

You can use dt_next_cell() which will update the pointer for you.

> +        to = dt_read_number(matrix, 1);
> +        matrix++;
> +        distance = dt_read_number(matrix, 1);
> +        matrix++;
> +
> +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> +        {
> +            printk(XENLOG_WARNING
> +                   "Invalid nodes' distance from node#%d to node#%d = %d\n",
> +                   from, to, distance);
> +            return -EINVAL;
> +        }
> +
> +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d = %d\n",
> +               from, to, distance);
> +        numa_set_distance(from, to, distance);
> +
> +        /* Set default distance of node B->A same as A->B */
> +        if (to > from)
> +             numa_set_distance(to, from, distance);
> +    }
> +
> +    return 0;
> +}
> 

Cheers,
Wei Chen Aug. 26, 2021, 7:01 a.m. UTC | #2
Hi Julien,

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 2021年8月25日 21:56
> To: Wei Chen <Wei.Chen@arm.com>; xen-devel@lists.xenproject.org;
> sstabellini@kernel.org
> Cc: Bertrand Marquis <Bertrand.Marquis@arm.com>; Jan Beulich
> <jbeulich@suse.com>
> Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> device tree NUMA distance map
> 
> Hi Wei,
> 
> On 11/08/2021 11:24, Wei Chen wrote:
> > A NUMA aware device tree will provide a "distance-map" node to
> > describe distance between any two nodes. This patch introduce a
> 
> s/introduce/introduces/

OK

> 
> > new helper to parse this distance map.
> >
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > ---
> >   xen/arch/arm/numa_device_tree.c | 67 +++++++++++++++++++++++++++++++++
> >   1 file changed, 67 insertions(+)
> >
> > diff --git a/xen/arch/arm/numa_device_tree.c
> b/xen/arch/arm/numa_device_tree.c
> > index bbe081dcd1..6e0d1d3d9f 100644
> > --- a/xen/arch/arm/numa_device_tree.c
> > +++ b/xen/arch/arm/numa_device_tree.c
> > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void *fdt,
> int node,
> >
> >       return 0;
> >   }
> > +
> > +/* Parse NUMA distance map v1 */
> > +int __init
> > +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> > +{
> > +    const struct fdt_property *prop;
> > +    const __be32 *matrix;
> > +    int entry_count, len, i;
> 
> entry_count and i should be unsigned. len unfortunately can't because
> fdt_get_property expects a signed int.
> 

OK

> > +
> > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > +
> > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > +    if ( !prop )
> > +    {
> > +        printk(XENLOG_WARNING
> > +               "NUMA: No distance-matrix property in distance-map\n");
> > +
> > +        return -EINVAL;
> > +    }
> > +
> > +    if ( len % sizeof(uint32_t) != 0 )
> > +    {
> > +        printk(XENLOG_WARNING
> > +               "distance-matrix in node is not a multiple of u32\n");
> > +        return -EINVAL;
> > +    }
> > +
> > +    entry_count = len / sizeof(uint32_t);
> > +    if ( entry_count <= 0 )
> 
> I understand that entry_count may be 0. But I can't see how it can be
> negative as the property len cannot be (even if it is a signed type). So
> I think this wants to be "== 0".
> 

From the fdt_get_property's comment, when prop is NULL, the len can
be negative. But, yes, prop==NULL check will handled before this code.
negative len will not reach here. I am ok to change it to "== 0"

> > +    {
> > +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> > +
> > +        return -EINVAL;
> > +    }
> > +
> > +    matrix = (const __be32 *)prop->data;
> > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > +    {
> > +        uint32_t from, to, distance;
> > +
> > +        from = dt_read_number(matrix, 1);
> > +        matrix++;
> 
> You can use dt_next_cell() which will update the pointer for you.
> 

Thanks

> > +        to = dt_read_number(matrix, 1);
> > +        matrix++;
> > +        distance = dt_read_number(matrix, 1);
> > +        matrix++;
> > +
> > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > +        {
> > +            printk(XENLOG_WARNING
> > +                   "Invalid nodes' distance from node#%d to node#%d
> = %d\n",
> > +                   from, to, distance);
> > +            return -EINVAL;
> > +        }
> > +
> > +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d
> = %d\n",
> > +               from, to, distance);
> > +        numa_set_distance(from, to, distance);
> > +
> > +        /* Set default distance of node B->A same as A->B */
> > +        if (to > from)
> > +             numa_set_distance(to, from, distance);
> > +    }
> > +
> > +    return 0;
> > +}
> >
> 
> Cheers,
> 
> --
> Julien Grall
Stefano Stabellini Aug. 31, 2021, 12:48 a.m. UTC | #3
On Wed, 11 Aug 2021, Wei Chen wrote:
> A NUMA aware device tree will provide a "distance-map" node to
> describe distance between any two nodes. This patch introduce a
> new helper to parse this distance map.
> 
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
>  xen/arch/arm/numa_device_tree.c | 67 +++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c
> index bbe081dcd1..6e0d1d3d9f 100644
> --- a/xen/arch/arm/numa_device_tree.c
> +++ b/xen/arch/arm/numa_device_tree.c
> @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void *fdt, int node,
>  
>      return 0;
>  }
> +
> +/* Parse NUMA distance map v1 */
> +int __init
> +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> +{
> +    const struct fdt_property *prop;
> +    const __be32 *matrix;
> +    int entry_count, len, i;
> +
> +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> +
> +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> +    if ( !prop )
> +    {
> +        printk(XENLOG_WARNING
> +               "NUMA: No distance-matrix property in distance-map\n");
> +
> +        return -EINVAL;
> +    }
> +
> +    if ( len % sizeof(uint32_t) != 0 )
> +    {
> +        printk(XENLOG_WARNING
> +               "distance-matrix in node is not a multiple of u32\n");
> +        return -EINVAL;
> +    }
> +
> +    entry_count = len / sizeof(uint32_t);
> +    if ( entry_count <= 0 )
> +    {
> +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> +
> +        return -EINVAL;
> +    }
> +
> +    matrix = (const __be32 *)prop->data;
> +    for ( i = 0; i + 2 < entry_count; i += 3 )
> +    {
> +        uint32_t from, to, distance;
> +
> +        from = dt_read_number(matrix, 1);
> +        matrix++;
> +        to = dt_read_number(matrix, 1);
> +        matrix++;
> +        distance = dt_read_number(matrix, 1);
> +        matrix++;
> +
> +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> +        {
> +            printk(XENLOG_WARNING
> +                   "Invalid nodes' distance from node#%d to node#%d = %d\n",
> +                   from, to, distance);
> +            return -EINVAL;
> +        }
> +
> +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d = %d\n",
> +               from, to, distance);
> +        numa_set_distance(from, to, distance);
> +
> +        /* Set default distance of node B->A same as A->B */
> +        if (to > from)
> +             numa_set_distance(to, from, distance);

I am a bit unsure about this last 2 lines: why calling numa_set_distance
in the opposite direction only when to > from? Wouldn't it be OK to
always do both:

numa_set_distance(from, to, distance);
numa_set_distance(to, from, distance);

?


But in any case, I have a different suggestion. The binding states that
"distances are equal in either direction". Also it has an example where
only one direction is expressed unfortunately (at the end of the
document).

So my suggestion is to parse it as follows:

- call numa_set_distance just once from
  device_tree_parse_numa_distance_map_v1

- in numa_set_distance:
    - set node_distance_map[from][to] = distance;
    - check node_distance_map[to][from]
          - if unset, node_distance_map[to][from] = distance;
          - if already set to the same value, return success;
          - if already set to a different value, return error;
Wei Chen Aug. 31, 2021, 10:17 a.m. UTC | #4
Hi Stefano,

> -----Original Message-----
> From: Stefano Stabellini <sstabellini@kernel.org>
> Sent: 2021年8月31日 8:48
> To: Wei Chen <Wei.Chen@arm.com>
> Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org;
> jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> device tree NUMA distance map
> 
> On Wed, 11 Aug 2021, Wei Chen wrote:
> > A NUMA aware device tree will provide a "distance-map" node to
> > describe distance between any two nodes. This patch introduce a
> > new helper to parse this distance map.
> >
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > ---
> >  xen/arch/arm/numa_device_tree.c | 67 +++++++++++++++++++++++++++++++++
> >  1 file changed, 67 insertions(+)
> >
> > diff --git a/xen/arch/arm/numa_device_tree.c
> b/xen/arch/arm/numa_device_tree.c
> > index bbe081dcd1..6e0d1d3d9f 100644
> > --- a/xen/arch/arm/numa_device_tree.c
> > +++ b/xen/arch/arm/numa_device_tree.c
> > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void *fdt,
> int node,
> >
> >      return 0;
> >  }
> > +
> > +/* Parse NUMA distance map v1 */
> > +int __init
> > +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> > +{
> > +    const struct fdt_property *prop;
> > +    const __be32 *matrix;
> > +    int entry_count, len, i;
> > +
> > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > +
> > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > +    if ( !prop )
> > +    {
> > +        printk(XENLOG_WARNING
> > +               "NUMA: No distance-matrix property in distance-map\n");
> > +
> > +        return -EINVAL;
> > +    }
> > +
> > +    if ( len % sizeof(uint32_t) != 0 )
> > +    {
> > +        printk(XENLOG_WARNING
> > +               "distance-matrix in node is not a multiple of u32\n");
> > +        return -EINVAL;
> > +    }
> > +
> > +    entry_count = len / sizeof(uint32_t);
> > +    if ( entry_count <= 0 )
> > +    {
> > +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> > +
> > +        return -EINVAL;
> > +    }
> > +
> > +    matrix = (const __be32 *)prop->data;
> > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > +    {
> > +        uint32_t from, to, distance;
> > +
> > +        from = dt_read_number(matrix, 1);
> > +        matrix++;
> > +        to = dt_read_number(matrix, 1);
> > +        matrix++;
> > +        distance = dt_read_number(matrix, 1);
> > +        matrix++;
> > +
> > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > +        {
> > +            printk(XENLOG_WARNING
> > +                   "Invalid nodes' distance from node#%d to node#%d
> = %d\n",
> > +                   from, to, distance);
> > +            return -EINVAL;
> > +        }
> > +
> > +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d
> = %d\n",
> > +               from, to, distance);
> > +        numa_set_distance(from, to, distance);
> > +
> > +        /* Set default distance of node B->A same as A->B */
> > +        if (to > from)
> > +             numa_set_distance(to, from, distance);
> 
> I am a bit unsure about this last 2 lines: why calling numa_set_distance
> in the opposite direction only when to > from? Wouldn't it be OK to
> always do both:
> 
> numa_set_distance(from, to, distance);
> numa_set_distance(to, from, distance);
> 
> ?
> 
I borrowed this code from Linux, but here is my understanding:

First, I read some notes in Documentation/devicetree/bindings/numa.txt
1. Each entry represents distance from first node to second node.
The distances are equal in either direction.
2. distance-matrix should have entries in lexicographical ascending
order of nodes.

Here is an example of distance-map node in DTB:
Sample#1, full list:
		distance-map {
			 compatible = "numa-distance-map-v1";
			 distance-matrix = <0 0  10>,
					   <0 1  20>,
					   <0 2  40>,
					   <0 3  20>,
					   <1 0  20>,
					   <1 1  10>,
					   <1 2  20>,
					   <1 3  40>,
					   <2 0  40>,
					   <2 1  20>,
					   <2 2  10>,
					   <2 3  20>,
					   <3 0  20>,
					   <3 1  40>,
					   <3 2  20>,
					   <3 3  10>;
		};

Call numa_set_distance when "to > from" will prevent Xen to call
numa_set_distance(0, 1, 20) again when it's setting distance for <1 0 20>.
But, numa_set_distance(1, 0, 20) will be call twice.

Normally, distance-map node will be optimized in following sample#2,
all redundant entries are removed:
Sample#2, partial list:
		distance-map {
			 compatible = "numa-distance-map-v1";
			 distance-matrix = <0 0  10>,
					   <0 1  20>,
					   <0 2  40>,
					   <0 3  20>,
					   <1 1  10>,
					   <1 2  20>,
					   <1 3  40>,
					   <2 2  10>,
					   <2 3  20>,
					   <3 3  10>;
		};

There is not any "from > to" entry in the map. But using this partial map
still can set all distances for all pairs. And numa_set_distance(1, 0, 20)
will be only once.


> But in any case, I have a different suggestion. The binding states that
> "distances are equal in either direction". Also it has an example where
> only one direction is expressed unfortunately (at the end of the
> document).
> 

Oh, I should see this comment first, then I will not post above
comment : )

> So my suggestion is to parse it as follows:
> 
> - call numa_set_distance just once from
>   device_tree_parse_numa_distance_map_v1
> 
> - in numa_set_distance:
>     - set node_distance_map[from][to] = distance;
>     - check node_distance_map[to][from]
>           - if unset, node_distance_map[to][from] = distance;
>           - if already set to the same value, return success;
>           - if already set to a different value, return error;

I don't really like this implementation. I want the behavior of
numa_set_distance just like the function name, do not include
implicit operations. Otherwise, except the user read this function
implementation before he use it, he probably doesn't know this
function has done so many things.

Cheers,
Wei Chen
Stefano Stabellini Aug. 31, 2021, 9:36 p.m. UTC | #5
On Tue, 31 Aug 2021, Wei Chen wrote:
> Hi Stefano,
> 
> > -----Original Message-----
> > From: Stefano Stabellini <sstabellini@kernel.org>
> > Sent: 2021年8月31日 8:48
> > To: Wei Chen <Wei.Chen@arm.com>
> > Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org; julien@xen.org;
> > jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> > Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> > device tree NUMA distance map
> > 
> > On Wed, 11 Aug 2021, Wei Chen wrote:
> > > A NUMA aware device tree will provide a "distance-map" node to
> > > describe distance between any two nodes. This patch introduce a
> > > new helper to parse this distance map.
> > >
> > > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > > ---
> > >  xen/arch/arm/numa_device_tree.c | 67 +++++++++++++++++++++++++++++++++
> > >  1 file changed, 67 insertions(+)
> > >
> > > diff --git a/xen/arch/arm/numa_device_tree.c
> > b/xen/arch/arm/numa_device_tree.c
> > > index bbe081dcd1..6e0d1d3d9f 100644
> > > --- a/xen/arch/arm/numa_device_tree.c
> > > +++ b/xen/arch/arm/numa_device_tree.c
> > > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void *fdt,
> > int node,
> > >
> > >      return 0;
> > >  }
> > > +
> > > +/* Parse NUMA distance map v1 */
> > > +int __init
> > > +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> > > +{
> > > +    const struct fdt_property *prop;
> > > +    const __be32 *matrix;
> > > +    int entry_count, len, i;
> > > +
> > > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > > +
> > > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > > +    if ( !prop )
> > > +    {
> > > +        printk(XENLOG_WARNING
> > > +               "NUMA: No distance-matrix property in distance-map\n");
> > > +
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    if ( len % sizeof(uint32_t) != 0 )
> > > +    {
> > > +        printk(XENLOG_WARNING
> > > +               "distance-matrix in node is not a multiple of u32\n");
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    entry_count = len / sizeof(uint32_t);
> > > +    if ( entry_count <= 0 )
> > > +    {
> > > +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> > > +
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    matrix = (const __be32 *)prop->data;
> > > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > > +    {
> > > +        uint32_t from, to, distance;
> > > +
> > > +        from = dt_read_number(matrix, 1);
> > > +        matrix++;
> > > +        to = dt_read_number(matrix, 1);
> > > +        matrix++;
> > > +        distance = dt_read_number(matrix, 1);
> > > +        matrix++;
> > > +
> > > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > > +        {
> > > +            printk(XENLOG_WARNING
> > > +                   "Invalid nodes' distance from node#%d to node#%d
> > = %d\n",
> > > +                   from, to, distance);
> > > +            return -EINVAL;
> > > +        }
> > > +
> > > +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d
> > = %d\n",
> > > +               from, to, distance);
> > > +        numa_set_distance(from, to, distance);
> > > +
> > > +        /* Set default distance of node B->A same as A->B */
> > > +        if (to > from)
> > > +             numa_set_distance(to, from, distance);
> > 
> > I am a bit unsure about this last 2 lines: why calling numa_set_distance
> > in the opposite direction only when to > from? Wouldn't it be OK to
> > always do both:
> > 
> > numa_set_distance(from, to, distance);
> > numa_set_distance(to, from, distance);
> > 
> > ?
> > 
> I borrowed this code from Linux, but here is my understanding:
> 
> First, I read some notes in Documentation/devicetree/bindings/numa.txt
> 1. Each entry represents distance from first node to second node.
> The distances are equal in either direction.
> 2. distance-matrix should have entries in lexicographical ascending
> order of nodes.
> 
> Here is an example of distance-map node in DTB:
> Sample#1, full list:
> 		distance-map {
> 			 compatible = "numa-distance-map-v1";
> 			 distance-matrix = <0 0  10>,
> 					   <0 1  20>,
> 					   <0 2  40>,
> 					   <0 3  20>,
> 					   <1 0  20>,
> 					   <1 1  10>,
> 					   <1 2  20>,
> 					   <1 3  40>,
> 					   <2 0  40>,
> 					   <2 1  20>,
> 					   <2 2  10>,
> 					   <2 3  20>,
> 					   <3 0  20>,
> 					   <3 1  40>,
> 					   <3 2  20>,
> 					   <3 3  10>;
> 		};
>
> Call numa_set_distance when "to > from" will prevent Xen to call
> numa_set_distance(0, 1, 20) again when it's setting distance for <1 0 20>.
> But, numa_set_distance(1, 0, 20) will be call twice.
> 
> Normally, distance-map node will be optimized in following sample#2,
> all redundant entries are removed:
> Sample#2, partial list:
> 		distance-map {
> 			 compatible = "numa-distance-map-v1";
> 			 distance-matrix = <0 0  10>,
> 					   <0 1  20>,
> 					   <0 2  40>,
> 					   <0 3  20>,
> 					   <1 1  10>,
> 					   <1 2  20>,
> 					   <1 3  40>,
> 					   <2 2  10>,
> 					   <2 3  20>,
> 					   <3 3  10>;
> 		};
> 
> There is not any "from > to" entry in the map. But using this partial map
> still can set all distances for all pairs. And numa_set_distance(1, 0, 20)
> will be only once.
 
I see. I can't find in Documentation/devicetree/bindings/numa.txt where
it says that "from > to" nodes can be omitted. If it is not written
down, then somebody could easily optimize it the opposite way:

 			 distance-matrix = <0 0  10>,
 					   <1 0  20>,
 					   <2 0  40>,
 					   <3 0  20>,
 					   <1 1  10>,
 					   <2 1  20>,
 					   <3 1  40>,
 					   <2 2  10>,
 					   <3 2  20>,
 					   <3 3  10>;

I think the code in Xen should be resilient and able to cope with a
device tree like the one you wrote or the one I wrote. From a code
perspective, it should be very easy to do. If nothing else it would make
Xen more resilient against buggy firmware.

 
> > But in any case, I have a different suggestion. The binding states that
> > "distances are equal in either direction". Also it has an example where
> > only one direction is expressed unfortunately (at the end of the
> > document).
> > 
> 
> Oh, I should see this comment first, then I will not post above
> comment : )
> 
> > So my suggestion is to parse it as follows:
> > 
> > - call numa_set_distance just once from
> >   device_tree_parse_numa_distance_map_v1
> > 
> > - in numa_set_distance:
> >     - set node_distance_map[from][to] = distance;
> >     - check node_distance_map[to][from]
> >           - if unset, node_distance_map[to][from] = distance;
> >           - if already set to the same value, return success;
> >           - if already set to a different value, return error;
> 
> I don't really like this implementation. I want the behavior of
> numa_set_distance just like the function name, do not include
> implicit operations. Otherwise, except the user read this function
> implementation before he use it, he probably doesn't know this
> function has done so many things.

You can leave numa_set_distance as-is without any implicit operations.

In that case, just call numa_set_distance twice from numa_set_distance
for both from/to and to/from. numa_set_distance could return error is
the entry was already set to a different value or success otherwise
(also in the case it was already set to the same value). This would
allow Xen to cope with both scenarios above.
Wei Chen Sept. 1, 2021, 11:04 a.m. UTC | #6
Hi Stefano,

> -----Original Message-----
> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
> Stefano Stabellini
> Sent: 2021年9月1日 5:36
> To: Wei Chen <Wei.Chen@arm.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>; xen-
> devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> <Bertrand.Marquis@arm.com>
> Subject: RE: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> device tree NUMA distance map
> 
> On Tue, 31 Aug 2021, Wei Chen wrote:
> > Hi Stefano,
> >
> > > -----Original Message-----
> > > From: Stefano Stabellini <sstabellini@kernel.org>
> > > Sent: 2021年8月31日 8:48
> > > To: Wei Chen <Wei.Chen@arm.com>
> > > Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org;
> julien@xen.org;
> > > jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> > > Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to
> parse
> > > device tree NUMA distance map
> > >
> > > On Wed, 11 Aug 2021, Wei Chen wrote:
> > > > A NUMA aware device tree will provide a "distance-map" node to
> > > > describe distance between any two nodes. This patch introduce a
> > > > new helper to parse this distance map.
> > > >
> > > > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > > > ---
> > > >  xen/arch/arm/numa_device_tree.c | 67
> +++++++++++++++++++++++++++++++++
> > > >  1 file changed, 67 insertions(+)
> > > >
> > > > diff --git a/xen/arch/arm/numa_device_tree.c
> > > b/xen/arch/arm/numa_device_tree.c
> > > > index bbe081dcd1..6e0d1d3d9f 100644
> > > > --- a/xen/arch/arm/numa_device_tree.c
> > > > +++ b/xen/arch/arm/numa_device_tree.c
> > > > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void
> *fdt,
> > > int node,
> > > >
> > > >      return 0;
> > > >  }
> > > > +
> > > > +/* Parse NUMA distance map v1 */
> > > > +int __init
> > > > +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> > > > +{
> > > > +    const struct fdt_property *prop;
> > > > +    const __be32 *matrix;
> > > > +    int entry_count, len, i;
> > > > +
> > > > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > > > +
> > > > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > > > +    if ( !prop )
> > > > +    {
> > > > +        printk(XENLOG_WARNING
> > > > +               "NUMA: No distance-matrix property in distance-
> map\n");
> > > > +
> > > > +        return -EINVAL;
> > > > +    }
> > > > +
> > > > +    if ( len % sizeof(uint32_t) != 0 )
> > > > +    {
> > > > +        printk(XENLOG_WARNING
> > > > +               "distance-matrix in node is not a multiple of
> u32\n");
> > > > +        return -EINVAL;
> > > > +    }
> > > > +
> > > > +    entry_count = len / sizeof(uint32_t);
> > > > +    if ( entry_count <= 0 )
> > > > +    {
> > > > +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> > > > +
> > > > +        return -EINVAL;
> > > > +    }
> > > > +
> > > > +    matrix = (const __be32 *)prop->data;
> > > > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > > > +    {
> > > > +        uint32_t from, to, distance;
> > > > +
> > > > +        from = dt_read_number(matrix, 1);
> > > > +        matrix++;
> > > > +        to = dt_read_number(matrix, 1);
> > > > +        matrix++;
> > > > +        distance = dt_read_number(matrix, 1);
> > > > +        matrix++;
> > > > +
> > > > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > > > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > > > +        {
> > > > +            printk(XENLOG_WARNING
> > > > +                   "Invalid nodes' distance from node#%d to node#%d
> > > = %d\n",
> > > > +                   from, to, distance);
> > > > +            return -EINVAL;
> > > > +        }
> > > > +
> > > > +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d
> > > = %d\n",
> > > > +               from, to, distance);
> > > > +        numa_set_distance(from, to, distance);
> > > > +
> > > > +        /* Set default distance of node B->A same as A->B */
> > > > +        if (to > from)
> > > > +             numa_set_distance(to, from, distance);
> > >
> > > I am a bit unsure about this last 2 lines: why calling
> numa_set_distance
> > > in the opposite direction only when to > from? Wouldn't it be OK to
> > > always do both:
> > >
> > > numa_set_distance(from, to, distance);
> > > numa_set_distance(to, from, distance);
> > >
> > > ?
> > >
> > I borrowed this code from Linux, but here is my understanding:
> >
> > First, I read some notes in Documentation/devicetree/bindings/numa.txt
> > 1. Each entry represents distance from first node to second node.
> > The distances are equal in either direction.
> > 2. distance-matrix should have entries in lexicographical ascending
> > order of nodes.
> >
> > Here is an example of distance-map node in DTB:
> > Sample#1, full list:
> > 		distance-map {
> > 			 compatible = "numa-distance-map-v1";
> > 			 distance-matrix = <0 0  10>,
> > 					   <0 1  20>,
> > 					   <0 2  40>,
> > 					   <0 3  20>,
> > 					   <1 0  20>,
> > 					   <1 1  10>,
> > 					   <1 2  20>,
> > 					   <1 3  40>,
> > 					   <2 0  40>,
> > 					   <2 1  20>,
> > 					   <2 2  10>,
> > 					   <2 3  20>,
> > 					   <3 0  20>,
> > 					   <3 1  40>,
> > 					   <3 2  20>,
> > 					   <3 3  10>;
> > 		};
> >
> > Call numa_set_distance when "to > from" will prevent Xen to call
> > numa_set_distance(0, 1, 20) again when it's setting distance for <1 0
> 20>.
> > But, numa_set_distance(1, 0, 20) will be call twice.
> >
> > Normally, distance-map node will be optimized in following sample#2,
> > all redundant entries are removed:
> > Sample#2, partial list:
> > 		distance-map {
> > 			 compatible = "numa-distance-map-v1";
> > 			 distance-matrix = <0 0  10>,
> > 					   <0 1  20>,
> > 					   <0 2  40>,
> > 					   <0 3  20>,
> > 					   <1 1  10>,
> > 					   <1 2  20>,
> > 					   <1 3  40>,
> > 					   <2 2  10>,
> > 					   <2 3  20>,
> > 					   <3 3  10>;
> > 		};
> >
> > There is not any "from > to" entry in the map. But using this partial
> map
> > still can set all distances for all pairs. And numa_set_distance(1, 0,
> 20)
> > will be only once.
> 
> I see. I can't find in Documentation/devicetree/bindings/numa.txt where
> it says that "from > to" nodes can be omitted. If it is not written
> down, then somebody could easily optimize it the opposite way:
> 
>  			 distance-matrix = <0 0  10>,
>  					   <1 0  20>,
>  					   <2 0  40>,
>  					   <3 0  20>,
>  					   <1 1  10>,
>  					   <2 1  20>,
>  					   <3 1  40>,
>  					   <2 2  10>,
>  					   <3 2  20>,
>  					   <3 3  10>;
> 

Yes, you're right. Spec doesn't say opposite way is unallowed.

> I think the code in Xen should be resilient and able to cope with a
> device tree like the one you wrote or the one I wrote. From a code
> perspective, it should be very easy to do. If nothing else it would make
> Xen more resilient against buggy firmware.
> 
> 

I don't disagree with that.

> > > But in any case, I have a different suggestion. The binding states
> that
> > > "distances are equal in either direction". Also it has an example
> where
> > > only one direction is expressed unfortunately (at the end of the
> > > document).
> > >
> >
> > Oh, I should see this comment first, then I will not post above
> > comment : )
> >
> > > So my suggestion is to parse it as follows:
> > >
> > > - call numa_set_distance just once from
> > >   device_tree_parse_numa_distance_map_v1
> > >
> > > - in numa_set_distance:
> > >     - set node_distance_map[from][to] = distance;
> > >     - check node_distance_map[to][from]
> > >           - if unset, node_distance_map[to][from] = distance;
> > >           - if already set to the same value, return success;
> > >           - if already set to a different value, return error;
> >
> > I don't really like this implementation. I want the behavior of
> > numa_set_distance just like the function name, do not include
> > implicit operations. Otherwise, except the user read this function
> > implementation before he use it, he probably doesn't know this
> > function has done so many things.
> 
> You can leave numa_set_distance as-is without any implicit operations.
> 
> In that case, just call numa_set_distance twice from numa_set_distance
> for both from/to and to/from. numa_set_distance could return error is

I am OK for the first sentence. But...

> the entry was already set to a different value or success otherwise
> (also in the case it was already set to the same value). This would

... I prefer not to check the previous value. Subsequent numa_set_distance
call will override previous calls. Keep numa_set_distance as simple as
it can. And when you pass new data to numa_set_distance, it doesn't
know whether the previous data was correct or the new data is correct.
Only caller may have known.  

> allow Xen to cope with both scenarios above.
Stefano Stabellini Sept. 1, 2021, 4:21 p.m. UTC | #7
On Wed, 1 Sep 2021, Wei Chen wrote:
> Hi Stefano,
> 
> > -----Original Message-----
> > From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
> > Stefano Stabellini
> > Sent: 2021年9月1日 5:36
> > To: Wei Chen <Wei.Chen@arm.com>
> > Cc: Stefano Stabellini <sstabellini@kernel.org>; xen-
> > devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> > <Bertrand.Marquis@arm.com>
> > Subject: RE: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> > device tree NUMA distance map
> > 
> > On Tue, 31 Aug 2021, Wei Chen wrote:
> > > Hi Stefano,
> > >
> > > > -----Original Message-----
> > > > From: Stefano Stabellini <sstabellini@kernel.org>
> > > > Sent: 2021年8月31日 8:48
> > > > To: Wei Chen <Wei.Chen@arm.com>
> > > > Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org;
> > julien@xen.org;
> > > > jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> > > > Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to
> > parse
> > > > device tree NUMA distance map
> > > >
> > > > On Wed, 11 Aug 2021, Wei Chen wrote:
> > > > > A NUMA aware device tree will provide a "distance-map" node to
> > > > > describe distance between any two nodes. This patch introduce a
> > > > > new helper to parse this distance map.
> > > > >
> > > > > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > > > > ---
> > > > >  xen/arch/arm/numa_device_tree.c | 67
> > +++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 67 insertions(+)
> > > > >
> > > > > diff --git a/xen/arch/arm/numa_device_tree.c
> > > > b/xen/arch/arm/numa_device_tree.c
> > > > > index bbe081dcd1..6e0d1d3d9f 100644
> > > > > --- a/xen/arch/arm/numa_device_tree.c
> > > > > +++ b/xen/arch/arm/numa_device_tree.c
> > > > > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const void
> > *fdt,
> > > > int node,
> > > > >
> > > > >      return 0;
> > > > >  }
> > > > > +
> > > > > +/* Parse NUMA distance map v1 */
> > > > > +int __init
> > > > > +device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
> > > > > +{
> > > > > +    const struct fdt_property *prop;
> > > > > +    const __be32 *matrix;
> > > > > +    int entry_count, len, i;
> > > > > +
> > > > > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > > > > +
> > > > > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > > > > +    if ( !prop )
> > > > > +    {
> > > > > +        printk(XENLOG_WARNING
> > > > > +               "NUMA: No distance-matrix property in distance-
> > map\n");
> > > > > +
> > > > > +        return -EINVAL;
> > > > > +    }
> > > > > +
> > > > > +    if ( len % sizeof(uint32_t) != 0 )
> > > > > +    {
> > > > > +        printk(XENLOG_WARNING
> > > > > +               "distance-matrix in node is not a multiple of
> > u32\n");
> > > > > +        return -EINVAL;
> > > > > +    }
> > > > > +
> > > > > +    entry_count = len / sizeof(uint32_t);
> > > > > +    if ( entry_count <= 0 )
> > > > > +    {
> > > > > +        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
> > > > > +
> > > > > +        return -EINVAL;
> > > > > +    }
> > > > > +
> > > > > +    matrix = (const __be32 *)prop->data;
> > > > > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > > > > +    {
> > > > > +        uint32_t from, to, distance;
> > > > > +
> > > > > +        from = dt_read_number(matrix, 1);
> > > > > +        matrix++;
> > > > > +        to = dt_read_number(matrix, 1);
> > > > > +        matrix++;
> > > > > +        distance = dt_read_number(matrix, 1);
> > > > > +        matrix++;
> > > > > +
> > > > > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > > > > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > > > > +        {
> > > > > +            printk(XENLOG_WARNING
> > > > > +                   "Invalid nodes' distance from node#%d to node#%d
> > > > = %d\n",
> > > > > +                   from, to, distance);
> > > > > +            return -EINVAL;
> > > > > +        }
> > > > > +
> > > > > +        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d
> > > > = %d\n",
> > > > > +               from, to, distance);
> > > > > +        numa_set_distance(from, to, distance);
> > > > > +
> > > > > +        /* Set default distance of node B->A same as A->B */
> > > > > +        if (to > from)
> > > > > +             numa_set_distance(to, from, distance);
> > > >
> > > > I am a bit unsure about this last 2 lines: why calling
> > numa_set_distance
> > > > in the opposite direction only when to > from? Wouldn't it be OK to
> > > > always do both:
> > > >
> > > > numa_set_distance(from, to, distance);
> > > > numa_set_distance(to, from, distance);
> > > >
> > > > ?
> > > >
> > > I borrowed this code from Linux, but here is my understanding:
> > >
> > > First, I read some notes in Documentation/devicetree/bindings/numa.txt
> > > 1. Each entry represents distance from first node to second node.
> > > The distances are equal in either direction.
> > > 2. distance-matrix should have entries in lexicographical ascending
> > > order of nodes.
> > >
> > > Here is an example of distance-map node in DTB:
> > > Sample#1, full list:
> > > 		distance-map {
> > > 			 compatible = "numa-distance-map-v1";
> > > 			 distance-matrix = <0 0  10>,
> > > 					   <0 1  20>,
> > > 					   <0 2  40>,
> > > 					   <0 3  20>,
> > > 					   <1 0  20>,
> > > 					   <1 1  10>,
> > > 					   <1 2  20>,
> > > 					   <1 3  40>,
> > > 					   <2 0  40>,
> > > 					   <2 1  20>,
> > > 					   <2 2  10>,
> > > 					   <2 3  20>,
> > > 					   <3 0  20>,
> > > 					   <3 1  40>,
> > > 					   <3 2  20>,
> > > 					   <3 3  10>;
> > > 		};
> > >
> > > Call numa_set_distance when "to > from" will prevent Xen to call
> > > numa_set_distance(0, 1, 20) again when it's setting distance for <1 0
> > 20>.
> > > But, numa_set_distance(1, 0, 20) will be call twice.
> > >
> > > Normally, distance-map node will be optimized in following sample#2,
> > > all redundant entries are removed:
> > > Sample#2, partial list:
> > > 		distance-map {
> > > 			 compatible = "numa-distance-map-v1";
> > > 			 distance-matrix = <0 0  10>,
> > > 					   <0 1  20>,
> > > 					   <0 2  40>,
> > > 					   <0 3  20>,
> > > 					   <1 1  10>,
> > > 					   <1 2  20>,
> > > 					   <1 3  40>,
> > > 					   <2 2  10>,
> > > 					   <2 3  20>,
> > > 					   <3 3  10>;
> > > 		};
> > >
> > > There is not any "from > to" entry in the map. But using this partial
> > map
> > > still can set all distances for all pairs. And numa_set_distance(1, 0,
> > 20)
> > > will be only once.
> > 
> > I see. I can't find in Documentation/devicetree/bindings/numa.txt where
> > it says that "from > to" nodes can be omitted. If it is not written
> > down, then somebody could easily optimize it the opposite way:
> > 
> >  			 distance-matrix = <0 0  10>,
> >  					   <1 0  20>,
> >  					   <2 0  40>,
> >  					   <3 0  20>,
> >  					   <1 1  10>,
> >  					   <2 1  20>,
> >  					   <3 1  40>,
> >  					   <2 2  10>,
> >  					   <3 2  20>,
> >  					   <3 3  10>;
> > 
> 
> Yes, you're right. Spec doesn't say opposite way is unallowed.
> 
> > I think the code in Xen should be resilient and able to cope with a
> > device tree like the one you wrote or the one I wrote. From a code
> > perspective, it should be very easy to do. If nothing else it would make
> > Xen more resilient against buggy firmware.
> > 
> > 
> 
> I don't disagree with that.
> 
> > > > But in any case, I have a different suggestion. The binding states
> > that
> > > > "distances are equal in either direction". Also it has an example
> > where
> > > > only one direction is expressed unfortunately (at the end of the
> > > > document).
> > > >
> > >
> > > Oh, I should see this comment first, then I will not post above
> > > comment : )
> > >
> > > > So my suggestion is to parse it as follows:
> > > >
> > > > - call numa_set_distance just once from
> > > >   device_tree_parse_numa_distance_map_v1
> > > >
> > > > - in numa_set_distance:
> > > >     - set node_distance_map[from][to] = distance;
> > > >     - check node_distance_map[to][from]
> > > >           - if unset, node_distance_map[to][from] = distance;
> > > >           - if already set to the same value, return success;
> > > >           - if already set to a different value, return error;
> > >
> > > I don't really like this implementation. I want the behavior of
> > > numa_set_distance just like the function name, do not include
> > > implicit operations. Otherwise, except the user read this function
> > > implementation before he use it, he probably doesn't know this
> > > function has done so many things.
> > 
> > You can leave numa_set_distance as-is without any implicit operations.
> > 
> > In that case, just call numa_set_distance twice from numa_set_distance
> > for both from/to and to/from. numa_set_distance could return error is
> 
> I am OK for the first sentence. But...
> 
> > the entry was already set to a different value or success otherwise
> > (also in the case it was already set to the same value). This would
> 
> ... I prefer not to check the previous value. Subsequent numa_set_distance
> call will override previous calls. Keep numa_set_distance as simple as
> it can. And when you pass new data to numa_set_distance, it doesn't
> know whether the previous data was correct or the new data is correct.
> Only caller may have known.  

That might be OK but if not numa_set_distance then somebody else needs
to check against overwriting previous values. That is to be able to spot
bad device tree cases like:

  0 1 20
  1 0 40
Wei Chen Sept. 2, 2021, 2:30 a.m. UTC | #8
Hi Stefano,

> -----Original Message-----
> From: Stefano Stabellini <sstabellini@kernel.org>
> Sent: 2021年9月2日 0:22
> To: Wei Chen <Wei.Chen@arm.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>; xen-
> devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> <Bertrand.Marquis@arm.com>
> Subject: RE: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> device tree NUMA distance map
> 
> On Wed, 1 Sep 2021, Wei Chen wrote:
> > Hi Stefano,
> >
> > > -----Original Message-----
> > > From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
> > > Stefano Stabellini
> > > Sent: 2021年9月1日 5:36
> > > To: Wei Chen <Wei.Chen@arm.com>
> > > Cc: Stefano Stabellini <sstabellini@kernel.org>; xen-
> > > devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> > > <Bertrand.Marquis@arm.com>
> > > Subject: RE: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to
> parse
> > > device tree NUMA distance map
> > >
> > > On Tue, 31 Aug 2021, Wei Chen wrote:
> > > > Hi Stefano,
> > > >
> > > > > -----Original Message-----
> > > > > From: Stefano Stabellini <sstabellini@kernel.org>
> > > > > Sent: 2021年8月31日 8:48
> > > > > To: Wei Chen <Wei.Chen@arm.com>
> > > > > Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org;
> > > julien@xen.org;
> > > > > jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> > > > > Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to
> > > parse
> > > > > device tree NUMA distance map
> > > > >
> > > > > On Wed, 11 Aug 2021, Wei Chen wrote:
> > > > > > A NUMA aware device tree will provide a "distance-map" node to
> > > > > > describe distance between any two nodes. This patch introduce a
> > > > > > new helper to parse this distance map.
> > > > > >
> > > > > > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > > > > > ---
> > > > > >  xen/arch/arm/numa_device_tree.c | 67
> > > +++++++++++++++++++++++++++++++++
> > > > > >  1 file changed, 67 insertions(+)
> > > > > >
> > > > > > diff --git a/xen/arch/arm/numa_device_tree.c
> > > > > b/xen/arch/arm/numa_device_tree.c
> > > > > > index bbe081dcd1..6e0d1d3d9f 100644
> > > > > > --- a/xen/arch/arm/numa_device_tree.c
> > > > > > +++ b/xen/arch/arm/numa_device_tree.c
> > > > > > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const
> void
> > > *fdt,
> > > > > int node,
> > > > > >
> > > > > >      return 0;
> > > > > >  }
> > > > > > +
> > > > > > +/* Parse NUMA distance map v1 */
> > > > > > +int __init
> > > > > > +device_tree_parse_numa_distance_map_v1(const void *fdt, int
> node)
> > > > > > +{
> > > > > > +    const struct fdt_property *prop;
> > > > > > +    const __be32 *matrix;
> > > > > > +    int entry_count, len, i;
> > > > > > +
> > > > > > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > > > > > +
> > > > > > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > > > > > +    if ( !prop )
> > > > > > +    {
> > > > > > +        printk(XENLOG_WARNING
> > > > > > +               "NUMA: No distance-matrix property in distance-
> > > map\n");
> > > > > > +
> > > > > > +        return -EINVAL;
> > > > > > +    }
> > > > > > +
> > > > > > +    if ( len % sizeof(uint32_t) != 0 )
> > > > > > +    {
> > > > > > +        printk(XENLOG_WARNING
> > > > > > +               "distance-matrix in node is not a multiple of
> > > u32\n");
> > > > > > +        return -EINVAL;
> > > > > > +    }
> > > > > > +
> > > > > > +    entry_count = len / sizeof(uint32_t);
> > > > > > +    if ( entry_count <= 0 )
> > > > > > +    {
> > > > > > +        printk(XENLOG_WARNING "NUMA: Invalid distance-
> matrix\n");
> > > > > > +
> > > > > > +        return -EINVAL;
> > > > > > +    }
> > > > > > +
> > > > > > +    matrix = (const __be32 *)prop->data;
> > > > > > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > > > > > +    {
> > > > > > +        uint32_t from, to, distance;
> > > > > > +
> > > > > > +        from = dt_read_number(matrix, 1);
> > > > > > +        matrix++;
> > > > > > +        to = dt_read_number(matrix, 1);
> > > > > > +        matrix++;
> > > > > > +        distance = dt_read_number(matrix, 1);
> > > > > > +        matrix++;
> > > > > > +
> > > > > > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > > > > > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > > > > > +        {
> > > > > > +            printk(XENLOG_WARNING
> > > > > > +                   "Invalid nodes' distance from node#%d to
> node#%d
> > > > > = %d\n",
> > > > > > +                   from, to, distance);
> > > > > > +            return -EINVAL;
> > > > > > +        }
> > > > > > +
> > > > > > +        printk(XENLOG_INFO "NUMA: distance from node#%d to
> node#%d
> > > > > = %d\n",
> > > > > > +               from, to, distance);
> > > > > > +        numa_set_distance(from, to, distance);
> > > > > > +
> > > > > > +        /* Set default distance of node B->A same as A->B */
> > > > > > +        if (to > from)
> > > > > > +             numa_set_distance(to, from, distance);
> > > > >
> > > > > I am a bit unsure about this last 2 lines: why calling
> > > numa_set_distance
> > > > > in the opposite direction only when to > from? Wouldn't it be OK
> to
> > > > > always do both:
> > > > >
> > > > > numa_set_distance(from, to, distance);
> > > > > numa_set_distance(to, from, distance);
> > > > >
> > > > > ?
> > > > >
> > > > I borrowed this code from Linux, but here is my understanding:
> > > >
> > > > First, I read some notes in
> Documentation/devicetree/bindings/numa.txt
> > > > 1. Each entry represents distance from first node to second node.
> > > > The distances are equal in either direction.
> > > > 2. distance-matrix should have entries in lexicographical ascending
> > > > order of nodes.
> > > >
> > > > Here is an example of distance-map node in DTB:
> > > > Sample#1, full list:
> > > > 		distance-map {
> > > > 			 compatible = "numa-distance-map-v1";
> > > > 			 distance-matrix = <0 0  10>,
> > > > 					   <0 1  20>,
> > > > 					   <0 2  40>,
> > > > 					   <0 3  20>,
> > > > 					   <1 0  20>,
> > > > 					   <1 1  10>,
> > > > 					   <1 2  20>,
> > > > 					   <1 3  40>,
> > > > 					   <2 0  40>,
> > > > 					   <2 1  20>,
> > > > 					   <2 2  10>,
> > > > 					   <2 3  20>,
> > > > 					   <3 0  20>,
> > > > 					   <3 1  40>,
> > > > 					   <3 2  20>,
> > > > 					   <3 3  10>;
> > > > 		};
> > > >
> > > > Call numa_set_distance when "to > from" will prevent Xen to call
> > > > numa_set_distance(0, 1, 20) again when it's setting distance for <1
> 0
> > > 20>.
> > > > But, numa_set_distance(1, 0, 20) will be call twice.
> > > >
> > > > Normally, distance-map node will be optimized in following sample#2,
> > > > all redundant entries are removed:
> > > > Sample#2, partial list:
> > > > 		distance-map {
> > > > 			 compatible = "numa-distance-map-v1";
> > > > 			 distance-matrix = <0 0  10>,
> > > > 					   <0 1  20>,
> > > > 					   <0 2  40>,
> > > > 					   <0 3  20>,
> > > > 					   <1 1  10>,
> > > > 					   <1 2  20>,
> > > > 					   <1 3  40>,
> > > > 					   <2 2  10>,
> > > > 					   <2 3  20>,
> > > > 					   <3 3  10>;
> > > > 		};
> > > >
> > > > There is not any "from > to" entry in the map. But using this
> partial
> > > map
> > > > still can set all distances for all pairs. And numa_set_distance(1,
> 0,
> > > 20)
> > > > will be only once.
> > >
> > > I see. I can't find in Documentation/devicetree/bindings/numa.txt
> where
> > > it says that "from > to" nodes can be omitted. If it is not written
> > > down, then somebody could easily optimize it the opposite way:
> > >
> > >  			 distance-matrix = <0 0  10>,
> > >  					   <1 0  20>,
> > >  					   <2 0  40>,
> > >  					   <3 0  20>,
> > >  					   <1 1  10>,
> > >  					   <2 1  20>,
> > >  					   <3 1  40>,
> > >  					   <2 2  10>,
> > >  					   <3 2  20>,
> > >  					   <3 3  10>;
> > >
> >
> > Yes, you're right. Spec doesn't say opposite way is unallowed.
> >
> > > I think the code in Xen should be resilient and able to cope with a
> > > device tree like the one you wrote or the one I wrote. From a code
> > > perspective, it should be very easy to do. If nothing else it would
> make
> > > Xen more resilient against buggy firmware.
> > >
> > >
> >
> > I don't disagree with that.
> >
> > > > > But in any case, I have a different suggestion. The binding states
> > > that
> > > > > "distances are equal in either direction". Also it has an example
> > > where
> > > > > only one direction is expressed unfortunately (at the end of the
> > > > > document).
> > > > >
> > > >
> > > > Oh, I should see this comment first, then I will not post above
> > > > comment : )
> > > >
> > > > > So my suggestion is to parse it as follows:
> > > > >
> > > > > - call numa_set_distance just once from
> > > > >   device_tree_parse_numa_distance_map_v1
> > > > >
> > > > > - in numa_set_distance:
> > > > >     - set node_distance_map[from][to] = distance;
> > > > >     - check node_distance_map[to][from]
> > > > >           - if unset, node_distance_map[to][from] = distance;
> > > > >           - if already set to the same value, return success;
> > > > >           - if already set to a different value, return error;
> > > >
> > > > I don't really like this implementation. I want the behavior of
> > > > numa_set_distance just like the function name, do not include
> > > > implicit operations. Otherwise, except the user read this function
> > > > implementation before he use it, he probably doesn't know this
> > > > function has done so many things.
> > >
> > > You can leave numa_set_distance as-is without any implicit operations.
> > >
> > > In that case, just call numa_set_distance twice from numa_set_distance
> > > for both from/to and to/from. numa_set_distance could return error is
> >
> > I am OK for the first sentence. But...
> >
> > > the entry was already set to a different value or success otherwise
> > > (also in the case it was already set to the same value). This would
> >
> > ... I prefer not to check the previous value. Subsequent
> numa_set_distance
> > call will override previous calls. Keep numa_set_distance as simple as
> > it can. And when you pass new data to numa_set_distance, it doesn't
> > know whether the previous data was correct or the new data is correct.
> > Only caller may have known.
> 
> That might be OK but if not numa_set_distance then somebody else needs
> to check against overwriting previous values. That is to be able to spot
> bad device tree cases like:
> 
>   0 1 20
>   1 0 40


How about we check it still in NUMA distance parse function?
Before setting the numa_set_distance for one pair nodes (e.g. a -> b),
we can get its opposite way distance first.

distance_b_a = __node_distance(b, a); ==> get opposite way distance.
if (distance_b_a == 0) ==> opposite way distance has not been set
{
    numa_set_distance(a, b, 20); ==> set both
    numa_set_distance(b, a, 20)
} else {
    if (distance_b_a == 20) ==> opposite way distance has been set
       numa_set_distance(a, b, 20); ==> set this way only
    else ===> opposite way distance has been set, but is unmatched
       // What can we do here?
       Panic the system? or Just warning users? Or choose the bigger
       distance for both ways?
       
       And distance_b_a == NUMA_NO_DISTANCE would be a special case
       here.
}
Jan Beulich Sept. 2, 2021, 6 a.m. UTC | #9
On 01.09.2021 18:21, Stefano Stabellini wrote:
> On Wed, 1 Sep 2021, Wei Chen wrote:
>>> Stefano Stabellini
>>> Sent: 2021年9月1日 5:36
>>>
>>> On Tue, 31 Aug 2021, Wei Chen wrote:
>>>> I don't really like this implementation. I want the behavior of
>>>> numa_set_distance just like the function name, do not include
>>>> implicit operations. Otherwise, except the user read this function
>>>> implementation before he use it, he probably doesn't know this
>>>> function has done so many things.
>>>
>>> You can leave numa_set_distance as-is without any implicit operations.
>>>
>>> In that case, just call numa_set_distance twice from numa_set_distance
>>> for both from/to and to/from. numa_set_distance could return error is
>>
>> I am OK for the first sentence. But...
>>
>>> the entry was already set to a different value or success otherwise
>>> (also in the case it was already set to the same value). This would
>>
>> ... I prefer not to check the previous value. Subsequent numa_set_distance
>> call will override previous calls. Keep numa_set_distance as simple as
>> it can. And when you pass new data to numa_set_distance, it doesn't
>> know whether the previous data was correct or the new data is correct.
>> Only caller may have known.  
> 
> That might be OK but if not numa_set_distance then somebody else needs
> to check against overwriting previous values. That is to be able to spot
> bad device tree cases like:
> 
>   0 1 20
>   1 0 40

What's wrong with this? At least the ACPI spec cares to specifically
permit this case:

"Except for the relative distance from a System Locality to itself,
 each relative distance is stored twice in the matrix. This provides
 the capability to describe the scenario where the relative distances
 for the two directions between System Localities is different."

Jan
Wei Chen Sept. 2, 2021, 2:14 p.m. UTC | #10
Hi Jan,

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 2021年9月2日 14:00
> To: Stefano Stabellini <sstabellini@kernel.org>; Wei Chen
> <Wei.Chen@arm.com>
> Cc: xen-devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> <Bertrand.Marquis@arm.com>
> Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> device tree NUMA distance map
> 
> On 01.09.2021 18:21, Stefano Stabellini wrote:
> > On Wed, 1 Sep 2021, Wei Chen wrote:
> >>> Stefano Stabellini
> >>> Sent: 2021年9月1日 5:36
> >>>
> >>> On Tue, 31 Aug 2021, Wei Chen wrote:
> >>>> I don't really like this implementation. I want the behavior of
> >>>> numa_set_distance just like the function name, do not include
> >>>> implicit operations. Otherwise, except the user read this function
> >>>> implementation before he use it, he probably doesn't know this
> >>>> function has done so many things.
> >>>
> >>> You can leave numa_set_distance as-is without any implicit operations.
> >>>
> >>> In that case, just call numa_set_distance twice from numa_set_distance
> >>> for both from/to and to/from. numa_set_distance could return error is
> >>
> >> I am OK for the first sentence. But...
> >>
> >>> the entry was already set to a different value or success otherwise
> >>> (also in the case it was already set to the same value). This would
> >>
> >> ... I prefer not to check the previous value. Subsequent
> numa_set_distance
> >> call will override previous calls. Keep numa_set_distance as simple as
> >> it can. And when you pass new data to numa_set_distance, it doesn't
> >> know whether the previous data was correct or the new data is correct.
> >> Only caller may have known.
> >
> > That might be OK but if not numa_set_distance then somebody else needs
> > to check against overwriting previous values. That is to be able to spot
> > bad device tree cases like:
> >
> >   0 1 20
> >   1 0 40
> 
> What's wrong with this? At least the ACPI spec cares to specifically
> permit this case:
> 

This is because some notes description in device tree NUMA binding:
- distance-matrix
  This property defines a matrix to describe the relative distances
  between all numa nodes.
  It is represented as a list of node pairs and their relative distance.

  Note:
	1. Each entry represents distance from first node to second node.
	The distances are equal in either direction.
https://www.kernel.org/doc/Documentation/devicetree/bindings/numa.txt

So we treat this case is wrong in DT. But as you said ACPI allow such
case, that means real machines allow different distances for one pair
nodes in different directions.

> "Except for the relative distance from a System Locality to itself,
>  each relative distance is stored twice in the matrix. This provides
>  the capability to describe the scenario where the relative distances
>  for the two directions between System Localities is different."
> 
> Jan
Stefano Stabellini Sept. 2, 2021, 3:19 p.m. UTC | #11
On Thu, 2 Sep 2021, Wei Chen wrote:
> Hi Stefano,
> 
> > -----Original Message-----
> > From: Stefano Stabellini <sstabellini@kernel.org>
> > Sent: 2021年9月2日 0:22
> > To: Wei Chen <Wei.Chen@arm.com>
> > Cc: Stefano Stabellini <sstabellini@kernel.org>; xen-
> > devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> > <Bertrand.Marquis@arm.com>
> > Subject: RE: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to parse
> > device tree NUMA distance map
> > 
> > On Wed, 1 Sep 2021, Wei Chen wrote:
> > > Hi Stefano,
> > >
> > > > -----Original Message-----
> > > > From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of
> > > > Stefano Stabellini
> > > > Sent: 2021年9月1日 5:36
> > > > To: Wei Chen <Wei.Chen@arm.com>
> > > > Cc: Stefano Stabellini <sstabellini@kernel.org>; xen-
> > > > devel@lists.xenproject.org; julien@xen.org; Bertrand Marquis
> > > > <Bertrand.Marquis@arm.com>
> > > > Subject: RE: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to
> > parse
> > > > device tree NUMA distance map
> > > >
> > > > On Tue, 31 Aug 2021, Wei Chen wrote:
> > > > > Hi Stefano,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Stefano Stabellini <sstabellini@kernel.org>
> > > > > > Sent: 2021年8月31日 8:48
> > > > > > To: Wei Chen <Wei.Chen@arm.com>
> > > > > > Cc: xen-devel@lists.xenproject.org; sstabellini@kernel.org;
> > > > julien@xen.org;
> > > > > > jbeulich@suse.com; Bertrand Marquis <Bertrand.Marquis@arm.com>
> > > > > > Subject: Re: [XEN RFC PATCH 24/40] xen/arm: introduce a helper to
> > > > parse
> > > > > > device tree NUMA distance map
> > > > > >
> > > > > > On Wed, 11 Aug 2021, Wei Chen wrote:
> > > > > > > A NUMA aware device tree will provide a "distance-map" node to
> > > > > > > describe distance between any two nodes. This patch introduce a
> > > > > > > new helper to parse this distance map.
> > > > > > >
> > > > > > > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > > > > > > ---
> > > > > > >  xen/arch/arm/numa_device_tree.c | 67
> > > > +++++++++++++++++++++++++++++++++
> > > > > > >  1 file changed, 67 insertions(+)
> > > > > > >
> > > > > > > diff --git a/xen/arch/arm/numa_device_tree.c
> > > > > > b/xen/arch/arm/numa_device_tree.c
> > > > > > > index bbe081dcd1..6e0d1d3d9f 100644
> > > > > > > --- a/xen/arch/arm/numa_device_tree.c
> > > > > > > +++ b/xen/arch/arm/numa_device_tree.c
> > > > > > > @@ -200,3 +200,70 @@ device_tree_parse_numa_memory_node(const
> > void
> > > > *fdt,
> > > > > > int node,
> > > > > > >
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > > +
> > > > > > > +/* Parse NUMA distance map v1 */
> > > > > > > +int __init
> > > > > > > +device_tree_parse_numa_distance_map_v1(const void *fdt, int
> > node)
> > > > > > > +{
> > > > > > > +    const struct fdt_property *prop;
> > > > > > > +    const __be32 *matrix;
> > > > > > > +    int entry_count, len, i;
> > > > > > > +
> > > > > > > +    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
> > > > > > > +
> > > > > > > +    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
> > > > > > > +    if ( !prop )
> > > > > > > +    {
> > > > > > > +        printk(XENLOG_WARNING
> > > > > > > +               "NUMA: No distance-matrix property in distance-
> > > > map\n");
> > > > > > > +
> > > > > > > +        return -EINVAL;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if ( len % sizeof(uint32_t) != 0 )
> > > > > > > +    {
> > > > > > > +        printk(XENLOG_WARNING
> > > > > > > +               "distance-matrix in node is not a multiple of
> > > > u32\n");
> > > > > > > +        return -EINVAL;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    entry_count = len / sizeof(uint32_t);
> > > > > > > +    if ( entry_count <= 0 )
> > > > > > > +    {
> > > > > > > +        printk(XENLOG_WARNING "NUMA: Invalid distance-
> > matrix\n");
> > > > > > > +
> > > > > > > +        return -EINVAL;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    matrix = (const __be32 *)prop->data;
> > > > > > > +    for ( i = 0; i + 2 < entry_count; i += 3 )
> > > > > > > +    {
> > > > > > > +        uint32_t from, to, distance;
> > > > > > > +
> > > > > > > +        from = dt_read_number(matrix, 1);
> > > > > > > +        matrix++;
> > > > > > > +        to = dt_read_number(matrix, 1);
> > > > > > > +        matrix++;
> > > > > > > +        distance = dt_read_number(matrix, 1);
> > > > > > > +        matrix++;
> > > > > > > +
> > > > > > > +        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
> > > > > > > +            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
> > > > > > > +        {
> > > > > > > +            printk(XENLOG_WARNING
> > > > > > > +                   "Invalid nodes' distance from node#%d to
> > node#%d
> > > > > > = %d\n",
> > > > > > > +                   from, to, distance);
> > > > > > > +            return -EINVAL;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        printk(XENLOG_INFO "NUMA: distance from node#%d to
> > node#%d
> > > > > > = %d\n",
> > > > > > > +               from, to, distance);
> > > > > > > +        numa_set_distance(from, to, distance);
> > > > > > > +
> > > > > > > +        /* Set default distance of node B->A same as A->B */
> > > > > > > +        if (to > from)
> > > > > > > +             numa_set_distance(to, from, distance);
> > > > > >
> > > > > > I am a bit unsure about this last 2 lines: why calling
> > > > numa_set_distance
> > > > > > in the opposite direction only when to > from? Wouldn't it be OK
> > to
> > > > > > always do both:
> > > > > >
> > > > > > numa_set_distance(from, to, distance);
> > > > > > numa_set_distance(to, from, distance);
> > > > > >
> > > > > > ?
> > > > > >
> > > > > I borrowed this code from Linux, but here is my understanding:
> > > > >
> > > > > First, I read some notes in
> > Documentation/devicetree/bindings/numa.txt
> > > > > 1. Each entry represents distance from first node to second node.
> > > > > The distances are equal in either direction.
> > > > > 2. distance-matrix should have entries in lexicographical ascending
> > > > > order of nodes.
> > > > >
> > > > > Here is an example of distance-map node in DTB:
> > > > > Sample#1, full list:
> > > > > 		distance-map {
> > > > > 			 compatible = "numa-distance-map-v1";
> > > > > 			 distance-matrix = <0 0  10>,
> > > > > 					   <0 1  20>,
> > > > > 					   <0 2  40>,
> > > > > 					   <0 3  20>,
> > > > > 					   <1 0  20>,
> > > > > 					   <1 1  10>,
> > > > > 					   <1 2  20>,
> > > > > 					   <1 3  40>,
> > > > > 					   <2 0  40>,
> > > > > 					   <2 1  20>,
> > > > > 					   <2 2  10>,
> > > > > 					   <2 3  20>,
> > > > > 					   <3 0  20>,
> > > > > 					   <3 1  40>,
> > > > > 					   <3 2  20>,
> > > > > 					   <3 3  10>;
> > > > > 		};
> > > > >
> > > > > Call numa_set_distance when "to > from" will prevent Xen to call
> > > > > numa_set_distance(0, 1, 20) again when it's setting distance for <1
> > 0
> > > > 20>.
> > > > > But, numa_set_distance(1, 0, 20) will be call twice.
> > > > >
> > > > > Normally, distance-map node will be optimized in following sample#2,
> > > > > all redundant entries are removed:
> > > > > Sample#2, partial list:
> > > > > 		distance-map {
> > > > > 			 compatible = "numa-distance-map-v1";
> > > > > 			 distance-matrix = <0 0  10>,
> > > > > 					   <0 1  20>,
> > > > > 					   <0 2  40>,
> > > > > 					   <0 3  20>,
> > > > > 					   <1 1  10>,
> > > > > 					   <1 2  20>,
> > > > > 					   <1 3  40>,
> > > > > 					   <2 2  10>,
> > > > > 					   <2 3  20>,
> > > > > 					   <3 3  10>;
> > > > > 		};
> > > > >
> > > > > There is not any "from > to" entry in the map. But using this
> > partial
> > > > map
> > > > > still can set all distances for all pairs. And numa_set_distance(1,
> > 0,
> > > > 20)
> > > > > will be only once.
> > > >
> > > > I see. I can't find in Documentation/devicetree/bindings/numa.txt
> > where
> > > > it says that "from > to" nodes can be omitted. If it is not written
> > > > down, then somebody could easily optimize it the opposite way:
> > > >
> > > >  			 distance-matrix = <0 0  10>,
> > > >  					   <1 0  20>,
> > > >  					   <2 0  40>,
> > > >  					   <3 0  20>,
> > > >  					   <1 1  10>,
> > > >  					   <2 1  20>,
> > > >  					   <3 1  40>,
> > > >  					   <2 2  10>,
> > > >  					   <3 2  20>,
> > > >  					   <3 3  10>;
> > > >
> > >
> > > Yes, you're right. Spec doesn't say opposite way is unallowed.
> > >
> > > > I think the code in Xen should be resilient and able to cope with a
> > > > device tree like the one you wrote or the one I wrote. From a code
> > > > perspective, it should be very easy to do. If nothing else it would
> > make
> > > > Xen more resilient against buggy firmware.
> > > >
> > > >
> > >
> > > I don't disagree with that.
> > >
> > > > > > But in any case, I have a different suggestion. The binding states
> > > > that
> > > > > > "distances are equal in either direction". Also it has an example
> > > > where
> > > > > > only one direction is expressed unfortunately (at the end of the
> > > > > > document).
> > > > > >
> > > > >
> > > > > Oh, I should see this comment first, then I will not post above
> > > > > comment : )
> > > > >
> > > > > > So my suggestion is to parse it as follows:
> > > > > >
> > > > > > - call numa_set_distance just once from
> > > > > >   device_tree_parse_numa_distance_map_v1
> > > > > >
> > > > > > - in numa_set_distance:
> > > > > >     - set node_distance_map[from][to] = distance;
> > > > > >     - check node_distance_map[to][from]
> > > > > >           - if unset, node_distance_map[to][from] = distance;
> > > > > >           - if already set to the same value, return success;
> > > > > >           - if already set to a different value, return error;
> > > > >
> > > > > I don't really like this implementation. I want the behavior of
> > > > > numa_set_distance just like the function name, do not include
> > > > > implicit operations. Otherwise, except the user read this function
> > > > > implementation before he use it, he probably doesn't know this
> > > > > function has done so many things.
> > > >
> > > > You can leave numa_set_distance as-is without any implicit operations.
> > > >
> > > > In that case, just call numa_set_distance twice from numa_set_distance
> > > > for both from/to and to/from. numa_set_distance could return error is
> > >
> > > I am OK for the first sentence. But...
> > >
> > > > the entry was already set to a different value or success otherwise
> > > > (also in the case it was already set to the same value). This would
> > >
> > > ... I prefer not to check the previous value. Subsequent
> > numa_set_distance
> > > call will override previous calls. Keep numa_set_distance as simple as
> > > it can. And when you pass new data to numa_set_distance, it doesn't
> > > know whether the previous data was correct or the new data is correct.
> > > Only caller may have known.
> > 
> > That might be OK but if not numa_set_distance then somebody else needs
> > to check against overwriting previous values. That is to be able to spot
> > bad device tree cases like:
> > 
> >   0 1 20
> >   1 0 40
> 
> 
> How about we check it still in NUMA distance parse function?
> Before setting the numa_set_distance for one pair nodes (e.g. a -> b),
> we can get its opposite way distance first.
> 
> distance_b_a = __node_distance(b, a); ==> get opposite way distance.
> if (distance_b_a == 0) ==> opposite way distance has not been set
> {
>     numa_set_distance(a, b, 20); ==> set both
>     numa_set_distance(b, a, 20)
> } else {
>     if (distance_b_a == 20) ==> opposite way distance has been set
>        numa_set_distance(a, b, 20); ==> set this way only
>     else ===> opposite way distance has been set, but is unmatched
>        // What can we do here?
>        Panic the system? or Just warning users? Or choose the bigger
>        distance for both ways?
>        
>        And distance_b_a == NUMA_NO_DISTANCE would be a special case
>        here.
> }

Yes, I think something like this would work. If we detect an error in
device tree I would probably print a warning and disable NUMA. It would
also be acceptable to panic.
diff mbox series

Patch

diff --git a/xen/arch/arm/numa_device_tree.c b/xen/arch/arm/numa_device_tree.c
index bbe081dcd1..6e0d1d3d9f 100644
--- a/xen/arch/arm/numa_device_tree.c
+++ b/xen/arch/arm/numa_device_tree.c
@@ -200,3 +200,70 @@  device_tree_parse_numa_memory_node(const void *fdt, int node,
 
     return 0;
 }
+
+/* Parse NUMA distance map v1 */
+int __init
+device_tree_parse_numa_distance_map_v1(const void *fdt, int node)
+{
+    const struct fdt_property *prop;
+    const __be32 *matrix;
+    int entry_count, len, i;
+
+    printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n");
+
+    prop = fdt_get_property(fdt, node, "distance-matrix", &len);
+    if ( !prop )
+    {
+        printk(XENLOG_WARNING
+               "NUMA: No distance-matrix property in distance-map\n");
+
+        return -EINVAL;
+    }
+
+    if ( len % sizeof(uint32_t) != 0 )
+    {
+        printk(XENLOG_WARNING
+               "distance-matrix in node is not a multiple of u32\n");
+        return -EINVAL;
+    }
+
+    entry_count = len / sizeof(uint32_t);
+    if ( entry_count <= 0 )
+    {
+        printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n");
+
+        return -EINVAL;
+    }
+
+    matrix = (const __be32 *)prop->data;
+    for ( i = 0; i + 2 < entry_count; i += 3 )
+    {
+        uint32_t from, to, distance;
+
+        from = dt_read_number(matrix, 1);
+        matrix++;
+        to = dt_read_number(matrix, 1);
+        matrix++;
+        distance = dt_read_number(matrix, 1);
+        matrix++;
+
+        if ( (from == to && distance != NUMA_LOCAL_DISTANCE) ||
+            (from != to && distance <= NUMA_LOCAL_DISTANCE) )
+        {
+            printk(XENLOG_WARNING
+                   "Invalid nodes' distance from node#%d to node#%d = %d\n",
+                   from, to, distance);
+            return -EINVAL;
+        }
+
+        printk(XENLOG_INFO "NUMA: distance from node#%d to node#%d = %d\n",
+               from, to, distance);
+        numa_set_distance(from, to, distance);
+
+        /* Set default distance of node B->A same as A->B */
+        if (to > from)
+             numa_set_distance(to, from, distance);
+    }
+
+    return 0;
+}