Message ID | 20230110084930.1095203-4-wei.chen@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Device tree based NUMA support for Arm - Part#3 | expand |
On 10.01.2023 09:49, Wei Chen wrote: > --- a/xen/arch/arm/include/asm/numa.h > +++ b/xen/arch/arm/include/asm/numa.h > @@ -28,6 +28,20 @@ enum dt_numa_status { > DT_NUMA_OFF, > }; > > +/* > + * In ACPI spec, 0-9 are the reserved values for node distance, > + * 10 indicates local node distance, 20 indicates remote node > + * distance. Set node distance map in device tree will follow > + * the ACPI's definition. > + */ > +#define NUMA_DISTANCE_UDF_MIN 0 > +#define NUMA_DISTANCE_UDF_MAX 9 > +#define NUMA_LOCAL_DISTANCE 10 > +#define NUMA_REMOTE_DISTANCE 20 In the absence of a caller of numa_set_distance() it is entirely unclear whether this tying to ACPI used values is actually appropriate. > --- a/xen/arch/arm/numa.c > +++ b/xen/arch/arm/numa.c > @@ -2,7 +2,7 @@ > /* > * Arm Architecture support layer for NUMA. > * > - * Copyright (C) 2021 Arm Ltd > + * Copyright (C) 2022 Arm Ltd I don't think it makes sense to change such after the fact. And certainly not in an unrelated patch. > @@ -22,6 +22,11 @@ > > static enum dt_numa_status __read_mostly device_tree_numa; > > +static unsigned char __read_mostly > +node_distance_map[MAX_NUMNODES][MAX_NUMNODES] = { > + { 0 } > +}; __ro_after_init? > @@ -42,3 +47,48 @@ int __init arch_numa_setup(const char *opt) > { > return -EINVAL; > } > + > +void __init numa_set_distance(nodeid_t from, nodeid_t to, > + unsigned int distance) > +{ > + if ( from >= MAX_NUMNODES || to >= MAX_NUMNODES ) > + { > + printk(KERN_WARNING > + "NUMA: invalid nodes: from=%"PRIu8" to=%"PRIu8" MAX=%"PRIu8"\n", > + from, to, MAX_NUMNODES); > + return; > + } > + > + /* NUMA defines 0xff as an unreachable node and 0-9 are undefined */ > + if ( distance >= NUMA_NO_DISTANCE || > + (distance >= NUMA_DISTANCE_UDF_MIN && Nit: Indentation. > + distance <= NUMA_DISTANCE_UDF_MAX) || > + (from == to && distance != NUMA_LOCAL_DISTANCE) ) > + { > + printk(KERN_WARNING > + "NUMA: invalid distance: from=%"PRIu8" to=%"PRIu8" distance=%"PRIu32"\n", > + from, to, distance); > + return; > + } > + > + node_distance_map[from][to] = distance; > +} > + > +unsigned char __node_distance(nodeid_t from, nodeid_t to) > +{ > + /* When NUMA is off, any distance will be treated as remote. */ > + if ( numa_disabled() ) > + return NUMA_REMOTE_DISTANCE; > + > + /* > + * Check whether the nodes are in the matrix range. > + * When any node is out of range, except from and to nodes are the > + * same, we treat them as unreachable (return 0xFF) > + */ > + if ( from >= MAX_NUMNODES || to >= MAX_NUMNODES ) I guess using ARRAY_SIZE() here would be more future-proof. Jan
Hi Jan, > -----Original Message----- > From: Jan Beulich <jbeulich@suse.com> > Sent: 2023年1月11日 0:47 > To: Wei Chen <Wei.Chen@arm.com> > Cc: nd <nd@arm.com>; Stefano Stabellini <sstabellini@kernel.org>; Julien > Grall <julien@xen.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>; > Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>; Andrew Cooper > <andrew.cooper3@citrix.com>; George Dunlap <george.dunlap@citrix.com>; Wei > Liu <wl@xen.org>; Roger Pau Monné <roger.pau@citrix.com>; xen- > devel@lists.xenproject.org > Subject: Re: [PATCH v2 03/17] xen/arm: implement node distance helpers for > Arm > > On 10.01.2023 09:49, Wei Chen wrote: > > --- a/xen/arch/arm/include/asm/numa.h > > +++ b/xen/arch/arm/include/asm/numa.h > > @@ -28,6 +28,20 @@ enum dt_numa_status { > > DT_NUMA_OFF, > > }; > > > > +/* > > + * In ACPI spec, 0-9 are the reserved values for node distance, > > + * 10 indicates local node distance, 20 indicates remote node > > + * distance. Set node distance map in device tree will follow > > + * the ACPI's definition. > > + */ > > +#define NUMA_DISTANCE_UDF_MIN 0 > > +#define NUMA_DISTANCE_UDF_MAX 9 > > +#define NUMA_LOCAL_DISTANCE 10 > > +#define NUMA_REMOTE_DISTANCE 20 > > In the absence of a caller of numa_set_distance() it is entirely unclear > whether this tying to ACPI used values is actually appropriate. > From Kernel's NUMA device tree binding, it seems DT NUMA are reusing ACPI used values for distances [1]. > > --- a/xen/arch/arm/numa.c > > +++ b/xen/arch/arm/numa.c > > @@ -2,7 +2,7 @@ > > /* > > * Arm Architecture support layer for NUMA. > > * > > - * Copyright (C) 2021 Arm Ltd > > + * Copyright (C) 2022 Arm Ltd > > I don't think it makes sense to change such after the fact. And certainly > not in an unrelated patch. > I will retore it, and add a SPDX header. > > @@ -22,6 +22,11 @@ > > > > static enum dt_numa_status __read_mostly device_tree_numa; > > > > +static unsigned char __read_mostly > > +node_distance_map[MAX_NUMNODES][MAX_NUMNODES] = { > > + { 0 } > > +}; > > __ro_after_init? > Yes. > > @@ -42,3 +47,48 @@ int __init arch_numa_setup(const char *opt) > > { > > return -EINVAL; > > } > > + > > +void __init numa_set_distance(nodeid_t from, nodeid_t to, > > + unsigned int distance) > > +{ > > + if ( from >= MAX_NUMNODES || to >= MAX_NUMNODES ) > > + { > > + printk(KERN_WARNING > > + "NUMA: invalid nodes: from=%"PRIu8" to=%"PRIu8" > MAX=%"PRIu8"\n", > > + from, to, MAX_NUMNODES); > > + return; > > + } > > + > > + /* NUMA defines 0xff as an unreachable node and 0-9 are undefined > */ > > + if ( distance >= NUMA_NO_DISTANCE || > > + (distance >= NUMA_DISTANCE_UDF_MIN && > > Nit: Indentation. > Ok. > > + distance <= NUMA_DISTANCE_UDF_MAX) || > > + (from == to && distance != NUMA_LOCAL_DISTANCE) ) > > + { > > + printk(KERN_WARNING > > + "NUMA: invalid distance: from=%"PRIu8" to=%"PRIu8" > distance=%"PRIu32"\n", > > + from, to, distance); > > + return; > > + } > > + > > + node_distance_map[from][to] = distance; > > +} > > + > > +unsigned char __node_distance(nodeid_t from, nodeid_t to) > > +{ > > + /* When NUMA is off, any distance will be treated as remote. */ > > + if ( numa_disabled() ) > > + return NUMA_REMOTE_DISTANCE; > > + > > + /* > > + * Check whether the nodes are in the matrix range. > > + * When any node is out of range, except from and to nodes are the > > + * same, we treat them as unreachable (return 0xFF) > > + */ > > + if ( from >= MAX_NUMNODES || to >= MAX_NUMNODES ) > > I guess using ARRAY_SIZE() here would be more future-proof. > I will use it in next version. [1]https://www.kernel.org/doc/Documentation/devicetree/bindings/numa.txt Thanks, Wei Chen > Jan
On 12.01.2023 07:31, Wei Chen wrote: >> -----Original Message----- >> From: Jan Beulich <jbeulich@suse.com> >> Sent: 2023年1月11日 0:47 >> >> On 10.01.2023 09:49, Wei Chen wrote: >>> --- a/xen/arch/arm/include/asm/numa.h >>> +++ b/xen/arch/arm/include/asm/numa.h >>> @@ -28,6 +28,20 @@ enum dt_numa_status { >>> DT_NUMA_OFF, >>> }; >>> >>> +/* >>> + * In ACPI spec, 0-9 are the reserved values for node distance, >>> + * 10 indicates local node distance, 20 indicates remote node >>> + * distance. Set node distance map in device tree will follow >>> + * the ACPI's definition. >>> + */ >>> +#define NUMA_DISTANCE_UDF_MIN 0 >>> +#define NUMA_DISTANCE_UDF_MAX 9 >>> +#define NUMA_LOCAL_DISTANCE 10 >>> +#define NUMA_REMOTE_DISTANCE 20 >> >> In the absence of a caller of numa_set_distance() it is entirely unclear >> whether this tying to ACPI used values is actually appropriate. >> > > From Kernel's NUMA device tree binding, it seems DT NUMA are reusing > ACPI used values for distances [1]. I can't find any mention of ACPI in that doc, so the example values used there matching ACPI's may also be coincidental. In no event can a Linux kernel doc serve as DT specification. If values are to match ACPI's, I expect a DT spec to actually say so. Jan
Hi Jan, > -----Original Message----- > From: Jan Beulich <jbeulich@suse.com> > Sent: 2023年1月12日 16:11 > To: Wei Chen <Wei.Chen@arm.com> > Cc: nd <nd@arm.com>; Stefano Stabellini <sstabellini@kernel.org>; Julien > Grall <julien@xen.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>; > Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>; Andrew Cooper > <andrew.cooper3@citrix.com>; George Dunlap <george.dunlap@citrix.com>; Wei > Liu <wl@xen.org>; Roger Pau Monné <roger.pau@citrix.com>; xen- > devel@lists.xenproject.org > Subject: Re: [PATCH v2 03/17] xen/arm: implement node distance helpers for > Arm > > On 12.01.2023 07:31, Wei Chen wrote: > >> -----Original Message----- > >> From: Jan Beulich <jbeulich@suse.com> > >> Sent: 2023年1月11日 0:47 > >> > >> On 10.01.2023 09:49, Wei Chen wrote: > >>> --- a/xen/arch/arm/include/asm/numa.h > >>> +++ b/xen/arch/arm/include/asm/numa.h > >>> @@ -28,6 +28,20 @@ enum dt_numa_status { > >>> DT_NUMA_OFF, > >>> }; > >>> > >>> +/* > >>> + * In ACPI spec, 0-9 are the reserved values for node distance, > >>> + * 10 indicates local node distance, 20 indicates remote node > >>> + * distance. Set node distance map in device tree will follow > >>> + * the ACPI's definition. > >>> + */ > >>> +#define NUMA_DISTANCE_UDF_MIN 0 > >>> +#define NUMA_DISTANCE_UDF_MAX 9 > >>> +#define NUMA_LOCAL_DISTANCE 10 > >>> +#define NUMA_REMOTE_DISTANCE 20 > >> > >> In the absence of a caller of numa_set_distance() it is entirely > unclear > >> whether this tying to ACPI used values is actually appropriate. > >> > > > > From Kernel's NUMA device tree binding, it seems DT NUMA are reusing > > ACPI used values for distances [1]. > > I can't find any mention of ACPI in that doc, so the example values used > there matching ACPI's may also be coincidental. In no event can a Linux > kernel doc serve as DT specification. If values are to match ACPI's, I > expect a DT spec to actually say so. > Unfortunately, the latest device tree spec doesn't have any NUMA description [1]. But if we define different values for DT NUMA in Xen, we may have an incompatible with Linux DT. [1] https://github.com/devicetree-org/devicetree-specification/releases/download/v0.4-rc1/devicetree-specification-v0.4-rc1.pdf Cheers, Wei Chen > Jan
Hi, On 12/01/2023 08:11, Jan Beulich wrote: > On 12.01.2023 07:31, Wei Chen wrote: >>> -----Original Message----- >>> From: Jan Beulich <jbeulich@suse.com> >>> Sent: 2023年1月11日 0:47 >>> >>> On 10.01.2023 09:49, Wei Chen wrote: >>>> --- a/xen/arch/arm/include/asm/numa.h >>>> +++ b/xen/arch/arm/include/asm/numa.h >>>> @@ -28,6 +28,20 @@ enum dt_numa_status { >>>> DT_NUMA_OFF, >>>> }; >>>> >>>> +/* >>>> + * In ACPI spec, 0-9 are the reserved values for node distance, >>>> + * 10 indicates local node distance, 20 indicates remote node >>>> + * distance. Set node distance map in device tree will follow >>>> + * the ACPI's definition. Looking at the ACPI spec, I agree that the local node distance is defined to 10. But I couldn't find any mention of the value 20. How is NUMA_REMOTE_DISTANCE is meant to be used? Is this a default value? If so, maybe we should add "DEFAULT" in the name. >>>> + */ >>>> +#define NUMA_DISTANCE_UDF_MIN 0 >>>> +#define NUMA_DISTANCE_UDF_MAX 9 >>>> +#define NUMA_LOCAL_DISTANCE 10 >>>> +#define NUMA_REMOTE_DISTANCE 20 >>> >>> In the absence of a caller of numa_set_distance() it is entirely unclear >>> whether this tying to ACPI used values is actually appropriate. >>> >> >> From Kernel's NUMA device tree binding, it seems DT NUMA are reusing >> ACPI used values for distances [1]. > > I can't find any mention of ACPI in that doc, so the example values used > there matching ACPI's may also be coincidental. In no event can a Linux > kernel doc serve as DT specification. The directory Documentation/devicetree is the de-facto place where all the bindings are described. This is used by most (to not say all) users. I vaguely remember there were plan in the past to move the bindings out of the kernel. But it looks like this has not been done. Yet, they tend to be reviewed independently from the kernel. So, as Wei pointed, if we don't follow them then we will not be able to re-use Device-Tree shipped. > If values are to match ACPI's, I > expect a DT spec to actually say so. I don't think it is necessary to say that. Bindings are not meant to change and a developer can rely on the local distance value to not change with the current bindings. So IMHO, it is OK to assume that the local distance value is the same between ACPI and DT. That would definitely simplify the parsing and code. Cheers,
Hi Julien, On 2023/1/12 17:47, Julien Grall wrote: > Hi, > > On 12/01/2023 08:11, Jan Beulich wrote: >> On 12.01.2023 07:31, Wei Chen wrote: >>>> -----Original Message----- >>>> From: Jan Beulich <jbeulich@suse.com> >>>> Sent: 2023年1月11日 0:47 >>>> >>>> On 10.01.2023 09:49, Wei Chen wrote: >>>>> --- a/xen/arch/arm/include/asm/numa.h >>>>> +++ b/xen/arch/arm/include/asm/numa.h >>>>> @@ -28,6 +28,20 @@ enum dt_numa_status { >>>>> DT_NUMA_OFF, >>>>> }; >>>>> >>>>> +/* >>>>> + * In ACPI spec, 0-9 are the reserved values for node distance, >>>>> + * 10 indicates local node distance, 20 indicates remote node >>>>> + * distance. Set node distance map in device tree will follow >>>>> + * the ACPI's definition. > > Looking at the ACPI spec, I agree that the local node distance is > defined to 10. But I couldn't find any mention of the value 20. > > How is NUMA_REMOTE_DISTANCE is meant to be used? Is this a default > value? If so, maybe we should add "DEFAULT" in the name. > I think you're right, maybe we can rename NUMA_REMOTE_DISTANCE to NUMA_DEFAULT_DISTANCE.Different pairs of nodes may have different remote distance values, I can not define one value for them. And why use 20 as default value, I have followed the ACPI's logic. In ACPI NUMA, when ACPI doesn't provide SLIT table, 20 will be used for all nodes default distance. >>>>> + */ >>>>> +#define NUMA_DISTANCE_UDF_MIN 0 >>>>> +#define NUMA_DISTANCE_UDF_MAX 9 >>>>> +#define NUMA_LOCAL_DISTANCE 10 >>>>> +#define NUMA_REMOTE_DISTANCE 20 >>>> >>>> In the absence of a caller of numa_set_distance() it is entirely >>>> unclear >>>> whether this tying to ACPI used values is actually appropriate. >>>> >>> >>> From Kernel's NUMA device tree binding, it seems DT NUMA are reusing >>> ACPI used values for distances [1]. >> >> I can't find any mention of ACPI in that doc, so the example values used >> there matching ACPI's may also be coincidental. In no event can a Linux >> kernel doc serve as DT specification. > > The directory Documentation/devicetree is the de-facto place where all > the bindings are described. This is used by most (to not say all) users. > > I vaguely remember there were plan in the past to move the bindings out > of the kernel. But it looks like this has not been done. Yet, they tend > to be reviewed independently from the kernel. > > So, as Wei pointed, if we don't follow them then we will not be able to > re-use Device-Tree shipped. > >> If values are to match ACPI's, I >> expect a DT spec to actually say so. > I don't think it is necessary to say that. Bindings are not meant to > change and a developer can rely on the local distance value to not > change with the current bindings. > > So IMHO, it is OK to assume that the local distance value is the same > between ACPI and DT. That would definitely simplify the parsing and code. > Thanks for clarifying this. Cheers, Wei Chen > Cheers, >
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile index 4d076b278b..9073398d6e 100644 --- a/xen/arch/arm/Makefile +++ b/xen/arch/arm/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o obj-y += mem_access.o obj-y += mm.o obj-y += monitor.o +obj-$(CONFIG_NUMA) += numa.o obj-y += p2m.o obj-y += percpu.o obj-y += platform.o diff --git a/xen/arch/arm/include/asm/numa.h b/xen/arch/arm/include/asm/numa.h index 52ca414e47..dbdb632711 100644 --- a/xen/arch/arm/include/asm/numa.h +++ b/xen/arch/arm/include/asm/numa.h @@ -28,6 +28,20 @@ enum dt_numa_status { DT_NUMA_OFF, }; +/* + * In ACPI spec, 0-9 are the reserved values for node distance, + * 10 indicates local node distance, 20 indicates remote node + * distance. Set node distance map in device tree will follow + * the ACPI's definition. + */ +#define NUMA_DISTANCE_UDF_MIN 0 +#define NUMA_DISTANCE_UDF_MAX 9 +#define NUMA_LOCAL_DISTANCE 10 +#define NUMA_REMOTE_DISTANCE 20 + +extern void numa_set_distance(nodeid_t from, nodeid_t to, + unsigned int distance); + #else /* Fake one node for now. See also node_online_map. */ diff --git a/xen/arch/arm/numa.c b/xen/arch/arm/numa.c index 1c02b6a25d..34851ceacf 100644 --- a/xen/arch/arm/numa.c +++ b/xen/arch/arm/numa.c @@ -2,7 +2,7 @@ /* * Arm Architecture support layer for NUMA. * - * Copyright (C) 2021 Arm Ltd + * Copyright (C) 2022 Arm Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,6 +22,11 @@ static enum dt_numa_status __read_mostly device_tree_numa; +static unsigned char __read_mostly +node_distance_map[MAX_NUMNODES][MAX_NUMNODES] = { + { 0 } +}; + void __init numa_fw_bad(void) { printk(KERN_ERR "NUMA: device tree numa info table not used.\n"); @@ -42,3 +47,48 @@ int __init arch_numa_setup(const char *opt) { return -EINVAL; } + +void __init numa_set_distance(nodeid_t from, nodeid_t to, + unsigned int distance) +{ + if ( from >= MAX_NUMNODES || to >= MAX_NUMNODES ) + { + printk(KERN_WARNING + "NUMA: invalid nodes: from=%"PRIu8" to=%"PRIu8" MAX=%"PRIu8"\n", + from, to, MAX_NUMNODES); + return; + } + + /* NUMA defines 0xff as an unreachable node and 0-9 are undefined */ + if ( distance >= NUMA_NO_DISTANCE || + (distance >= NUMA_DISTANCE_UDF_MIN && + distance <= NUMA_DISTANCE_UDF_MAX) || + (from == to && distance != NUMA_LOCAL_DISTANCE) ) + { + printk(KERN_WARNING + "NUMA: invalid distance: from=%"PRIu8" to=%"PRIu8" distance=%"PRIu32"\n", + from, to, distance); + return; + } + + node_distance_map[from][to] = distance; +} + +unsigned char __node_distance(nodeid_t from, nodeid_t to) +{ + /* When NUMA is off, any distance will be treated as remote. */ + if ( numa_disabled() ) + return NUMA_REMOTE_DISTANCE; + + /* + * Check whether the nodes are in the matrix range. + * When any node is out of range, except from and to nodes are the + * same, we treat them as unreachable (return 0xFF) + */ + if ( from >= MAX_NUMNODES || to >= MAX_NUMNODES ) + return from == to ? NUMA_LOCAL_DISTANCE : NUMA_NO_DISTANCE; + + return node_distance_map[from][to]; +} + +EXPORT_SYMBOL(__node_distance); diff --git a/xen/arch/x86/include/asm/numa.h b/xen/arch/x86/include/asm/numa.h index 61efe60a95..18b71ddfef 100644 --- a/xen/arch/x86/include/asm/numa.h +++ b/xen/arch/x86/include/asm/numa.h @@ -21,7 +21,6 @@ extern void init_cpu_to_node(void); #define arch_want_default_dmazone() (num_online_nodes() > 1) void srat_parse_regions(paddr_t addr); -extern u8 __node_distance(nodeid_t a, nodeid_t b); unsigned int arch_get_dma_bitsize(void); #endif diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c index 56749ddca5..50faf5d352 100644 --- a/xen/arch/x86/srat.c +++ b/xen/arch/x86/srat.c @@ -328,7 +328,7 @@ unsigned int numa_node_to_arch_nid(nodeid_t n) return 0; } -u8 __node_distance(nodeid_t a, nodeid_t b) +unsigned char __node_distance(nodeid_t a, nodeid_t b) { unsigned index; u8 slit_val; diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h index 7d7aeb3a3c..cff4fb8ccc 100644 --- a/xen/include/xen/numa.h +++ b/xen/include/xen/numa.h @@ -115,6 +115,7 @@ extern bool numa_memblks_available(void); extern bool numa_update_node_memblks(nodeid_t node, unsigned int arch_nid, paddr_t start, paddr_t size, bool hotplug); extern void numa_set_processor_nodes_parsed(nodeid_t node); +extern unsigned char __node_distance(nodeid_t a, nodeid_t b); #else
We will parse NUMA nodes distances from device tree. So we need a matrix to record the distances between any two nodes we parsed. Accordingly, we provide this node_set_distance API for device tree NUMA to set the distance for any two nodes in this patch. When NUMA initialization failed, __node_distance will return NUMA_REMOTE_DISTANCE, this will help us avoid doing rollback for distance maxtrix when NUMA initialization failed. As both x86 and Arm have implemented __node_distance, so we move its definition from asm/numa.h to xen/numa.h. At same time, the outdated u8 return value of x86 has been changed to unsigned char. Signed-off-by: Wei Chen <wei.chen@arm.com> --- v1 -> v2: 1. Use unsigned int/char instead of uint32_t/u8. 2. Re-org the commit message. --- xen/arch/arm/Makefile | 1 + xen/arch/arm/include/asm/numa.h | 14 +++++++++ xen/arch/arm/numa.c | 52 ++++++++++++++++++++++++++++++++- xen/arch/x86/include/asm/numa.h | 1 - xen/arch/x86/srat.c | 2 +- xen/include/xen/numa.h | 1 + 6 files changed, 68 insertions(+), 3 deletions(-)