Message ID | 1500378106-2620-15-git-send-email-vijay.kilari@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Vijay, On 18/07/17 12:41, vijay.kilari@gmail.com wrote: > From: Vijaya Kumar K <Vijaya.Kumar@cavium.com> > > Parse distance-matrix and fetch node distance information. > Store distance information in node_distance[]. > > Register dt_node_distance() function pointer with > the ARM numa code. This approach can be later used for > ACPI. After my comment on v1, I was expecting to see a link to the binding in the commit message... > > Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com> > --- > v3: - Moved __node_distance() declaration to common > header file > - Use device_tree_node_compatible() instead of > device_tree_node_matches() > - Dropped xen/errno.h inclusion > --- > xen/arch/arm/numa/dt_numa.c | 131 ++++++++++++++++++++++++++++++++++++++++++++ > xen/arch/arm/numa/numa.c | 22 ++++++++ > xen/include/asm-arm/numa.h | 2 + > xen/include/asm-x86/numa.h | 1 - > xen/include/xen/numa.h | 3 + > 5 files changed, 158 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c > index 84030e7..46c0346 100644 > --- a/xen/arch/arm/numa/dt_numa.c > +++ b/xen/arch/arm/numa/dt_numa.c > @@ -23,6 +23,48 @@ > #include <xen/numa.h> > #include <asm/setup.h> > > +static uint8_t node_distance[MAX_NUMNODES][MAX_NUMNODES]; On v1, you said that you will look at allocating node_distance on the fly. So why it is not done? You can give a look at alloc_boot_pages(...). > + > +static uint8_t dt_node_distance(nodeid_t nodea, nodeid_t nodeb) > +{ > + if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES ) > + return nodea == nodeb ? LOCAL_DISTANCE : REMOTE_DISTANCE; Do we really expect dt_node_distance to be called with wrong node? Looking at the ACPI code, they don't check that... So likely this should be an ASSERT(...). > + > + return node_distance[nodea][nodeb]; > +} > + > +static int dt_numa_set_distance(uint32_t nodea, uint32_t nodeb, I think this should be __init. > + uint32_t distance) > +{ > + /* node_distance is uint8_t. Ensure distance is less than 255 */ > + if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES || distance > 255 ) > + return -EINVAL; > + > + node_distance[nodea][nodeb] = distance; > + > + return 0; > +} > + > +void init_dt_numa_distance(void) Ditto. > +{ > + int i, j; > + > + for ( i = 0; i < MAX_NUMNODES; i++ ) > + { > + for ( j = 0; j < MAX_NUMNODES; j++ ) > + { > + /* > + * Initialize distance 10 for local distance and > + * 20 for remote distance. > + */ > + if ( i == j ) > + node_distance[i][j] = LOCAL_DISTANCE; > + else > + node_distance[i][j] = REMOTE_DISTANCE; > + } > + } > +} > + > /* > * Even though we connect cpus to numa domains later in SMP > * init, we need to know the node ids now for all cpus. > @@ -58,6 +100,76 @@ static int __init dt_numa_process_cpu_node(const void *fdt) > return 0; > } > > +static int __init dt_numa_parse_distance_map(const void *fdt, int node, > + const char *name, > + uint32_t address_cells, > + uint32_t size_cells) > +{ > + const struct fdt_property *prop; > + const __be32 *matrix; > + int entry_count, len, i; > + > + printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n"); > + > + prop = fdt_get_property(fdt, node, "distance-matrix", &len); > + if ( !prop ) > + { > + printk(XENLOG_WARNING s/XENLOG_WARNING/XENLOG_INFO/ because numa-distance-map is not mandatory. > + "NUMA: No distance-matrix property in distance-map\n"); > + > + return -EINVAL; If I am reading correctly the binding, the distance-matrix is not mandatory. If it is not present, you should use a default matrix. But here you will disable NUMA completely. > + } > + > + if ( len % sizeof(uint32_t) != 0 ) > + { > + printk(XENLOG_WARNING > + "distance-matrix in node is not a multiple of u32\n"); > + > + return -EINVAL; > + } > + > + entry_count = len / sizeof(uint32_t); > + if ( entry_count <= 0 ) > + { > + printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n"); > + > + return -EINVAL; > + } > + > + matrix = (const __be32 *)prop->data; > + for ( i = 0; i + 2 < entry_count; i += 3 ) It would be easier to read if entry_count is the number of triplet. E.g entry_count = (len / sizeof(uint32_t)) / 3; for ( i = 0; i < entry_count; i++ ) > + { > + uint32_t nodea, nodeb, distance; > + > + nodea = dt_read_number(matrix, 1); > + matrix++; nodea = dt_next_cell(1, &matrix) will do the increment for you. > + nodeb = dt_read_number(matrix, 1); > + matrix++; Ditto. > + distance = dt_read_number(matrix, 1); > + matrix++; Ditto. > + > + if ( dt_numa_set_distance(nodea, nodeb, distance) ) > + { > + printk(XENLOG_WARNING > + "NUMA: node-id out of range in distance matrix for [node%d -> node%d]\n", s/%d/%u/ > + nodea, nodeb); > + return -EINVAL; > + > + } > + printk(XENLOG_INFO "NUMA: distance[node%d -> node%d] = %d\n", > + nodea, nodeb, distance); > + > + /* > + * Set default distance of node B->A same as A->B. > + * No need to check for return value of numa_set_distance. > + */ > + if ( nodeb > nodea ) Mind explaining this if in the comment? > + dt_numa_set_distance(nodeb, nodea, distance); > + } > + > + return 0; > +} > + > void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start, > paddr_t size) > { > @@ -90,11 +202,30 @@ void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start, > return; > } > > +static int __init dt_numa_scan_distance_node(const void *fdt, int node, > + const char *name, int depth, > + uint32_t address_cells, > + uint32_t size_cells, void *data) > +{ > + if ( device_tree_node_compatible(fdt, node, "numa-distance-map-v1") ) > + return dt_numa_parse_distance_map(fdt, node, name, address_cells, > + size_cells); > + > + return 0; > +} > + > int __init dt_numa_init(void) > { > int ret; > > ret = dt_numa_process_cpu_node((void *)device_tree_flattened); > + if ( ret ) > + return ret; > + > + ret = device_tree_for_each_node((void *)device_tree_flattened, Why do you need the cast? > + dt_numa_scan_distance_node, NULL); > + if ( !ret ) > + register_node_distance(&dt_node_distance); > > return ret; > } > diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c > index 8227361..c00b92c 100644 > --- a/xen/arch/arm/numa/numa.c > +++ b/xen/arch/arm/numa/numa.c > @@ -18,10 +18,30 @@ > #include <xen/ctype.h> > #include <xen/nodemask.h> > #include <xen/numa.h> > +#include <asm/acpi.h> I don't understand why you include asm/acpi.h with no code using ACPI at the moment... > + > +static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b); > > void numa_failed(void) > { > numa_off = true; > + init_dt_numa_distance(); Why do you need to initialize init_dt_numa_distance when it has failed? The array will never be used in that case. > + node_distance_fn = NULL; > +} > + > +uint8_t __node_distance(nodeid_t a, nodeid_t b) > +{ > + if ( node_distance_fn != NULL); > + return node_distance_fn(a, b); > + > + return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE; > +} > + > +EXPORT_SYMBOL(__node_distance); Please drop EXPORT_SYMBOL, this is not used by Xen and only here when the code is imported from Linux. > + > +void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b)) > +{ > + node_distance_fn = fn; > } > > void __init numa_init(void) > @@ -29,6 +49,8 @@ void __init numa_init(void) > int ret = 0; > > nodes_clear(processor_nodes_parsed); > + init_dt_numa_distance(); This should go in dt_numa_init and would avoid to export it. > + > if ( numa_off ) > goto no_numa; > > diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h > index 36cd782..d1dc83a 100644 > --- a/xen/include/asm-arm/numa.h > +++ b/xen/include/asm-arm/numa.h > @@ -4,6 +4,8 @@ > typedef uint8_t nodeid_t; > > void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size); > +void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b)); > +void init_dt_numa_distance(void); > > #ifdef CONFIG_NUMA > void numa_init(void); > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h > index d8a0a44..ca0a2a6 100644 > --- a/xen/include/asm-x86/numa.h > +++ b/xen/include/asm-x86/numa.h > @@ -18,7 +18,6 @@ extern nodeid_t apicid_to_node[]; > extern void init_cpu_to_node(void); > > void srat_parse_regions(paddr_t addr); > -extern uint8_t __node_distance(nodeid_t a, nodeid_t b); > unsigned int arch_get_dma_bitsize(void); > > #endif > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h > index 110d5dc..10ef4c4 100644 > --- a/xen/include/xen/numa.h > +++ b/xen/include/xen/numa.h > @@ -6,6 +6,8 @@ > #include <asm/numa.h> > > #define NUMA_NO_NODE 0xFF > +#define LOCAL_DISTANCE 10 > +#define REMOTE_DISTANCE 20 I would add DEFAULT in each name. Probably LOCAL_DEFAULT_DISTANCE and REMOVE_LOCAL_DISTANCE. > #define NUMA_NO_DISTANCE 0xFF > > #define MAX_NUMNODES NR_NODES > @@ -70,6 +72,7 @@ int numa_add_memblk(nodeid_t nodeid, paddr_t start, uint64_t size); > int get_num_node_memblks(void); > bool arch_sanitize_nodes_memory(void); > void numa_failed(void); > +uint8_t __node_distance(nodeid_t a, nodeid_t b); > #else > static inline void numa_add_cpu(int cpu) { } > static inline void numa_set_node(int cpu, nodeid_t node) { } > Cheers,
diff --git a/xen/arch/arm/numa/dt_numa.c b/xen/arch/arm/numa/dt_numa.c index 84030e7..46c0346 100644 --- a/xen/arch/arm/numa/dt_numa.c +++ b/xen/arch/arm/numa/dt_numa.c @@ -23,6 +23,48 @@ #include <xen/numa.h> #include <asm/setup.h> +static uint8_t node_distance[MAX_NUMNODES][MAX_NUMNODES]; + +static uint8_t dt_node_distance(nodeid_t nodea, nodeid_t nodeb) +{ + if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES ) + return nodea == nodeb ? LOCAL_DISTANCE : REMOTE_DISTANCE; + + return node_distance[nodea][nodeb]; +} + +static int dt_numa_set_distance(uint32_t nodea, uint32_t nodeb, + uint32_t distance) +{ + /* node_distance is uint8_t. Ensure distance is less than 255 */ + if ( nodea >= MAX_NUMNODES || nodeb >= MAX_NUMNODES || distance > 255 ) + return -EINVAL; + + node_distance[nodea][nodeb] = distance; + + return 0; +} + +void init_dt_numa_distance(void) +{ + int i, j; + + for ( i = 0; i < MAX_NUMNODES; i++ ) + { + for ( j = 0; j < MAX_NUMNODES; j++ ) + { + /* + * Initialize distance 10 for local distance and + * 20 for remote distance. + */ + if ( i == j ) + node_distance[i][j] = LOCAL_DISTANCE; + else + node_distance[i][j] = REMOTE_DISTANCE; + } + } +} + /* * Even though we connect cpus to numa domains later in SMP * init, we need to know the node ids now for all cpus. @@ -58,6 +100,76 @@ static int __init dt_numa_process_cpu_node(const void *fdt) return 0; } +static int __init dt_numa_parse_distance_map(const void *fdt, int node, + const char *name, + uint32_t address_cells, + uint32_t size_cells) +{ + const struct fdt_property *prop; + const __be32 *matrix; + int entry_count, len, i; + + printk(XENLOG_INFO "NUMA: parsing numa-distance-map\n"); + + prop = fdt_get_property(fdt, node, "distance-matrix", &len); + if ( !prop ) + { + printk(XENLOG_WARNING + "NUMA: No distance-matrix property in distance-map\n"); + + return -EINVAL; + } + + if ( len % sizeof(uint32_t) != 0 ) + { + printk(XENLOG_WARNING + "distance-matrix in node is not a multiple of u32\n"); + + return -EINVAL; + } + + entry_count = len / sizeof(uint32_t); + if ( entry_count <= 0 ) + { + printk(XENLOG_WARNING "NUMA: Invalid distance-matrix\n"); + + return -EINVAL; + } + + matrix = (const __be32 *)prop->data; + for ( i = 0; i + 2 < entry_count; i += 3 ) + { + uint32_t nodea, nodeb, distance; + + nodea = dt_read_number(matrix, 1); + matrix++; + nodeb = dt_read_number(matrix, 1); + matrix++; + distance = dt_read_number(matrix, 1); + matrix++; + + if ( dt_numa_set_distance(nodea, nodeb, distance) ) + { + printk(XENLOG_WARNING + "NUMA: node-id out of range in distance matrix for [node%d -> node%d]\n", + nodea, nodeb); + return -EINVAL; + + } + printk(XENLOG_INFO "NUMA: distance[node%d -> node%d] = %d\n", + nodea, nodeb, distance); + + /* + * Set default distance of node B->A same as A->B. + * No need to check for return value of numa_set_distance. + */ + if ( nodeb > nodea ) + dt_numa_set_distance(nodeb, nodea, distance); + } + + return 0; +} + void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size) { @@ -90,11 +202,30 @@ void __init dt_numa_process_memory_node(uint32_t nid, paddr_t start, return; } +static int __init dt_numa_scan_distance_node(const void *fdt, int node, + const char *name, int depth, + uint32_t address_cells, + uint32_t size_cells, void *data) +{ + if ( device_tree_node_compatible(fdt, node, "numa-distance-map-v1") ) + return dt_numa_parse_distance_map(fdt, node, name, address_cells, + size_cells); + + return 0; +} + int __init dt_numa_init(void) { int ret; ret = dt_numa_process_cpu_node((void *)device_tree_flattened); + if ( ret ) + return ret; + + ret = device_tree_for_each_node((void *)device_tree_flattened, + dt_numa_scan_distance_node, NULL); + if ( !ret ) + register_node_distance(&dt_node_distance); return ret; } diff --git a/xen/arch/arm/numa/numa.c b/xen/arch/arm/numa/numa.c index 8227361..c00b92c 100644 --- a/xen/arch/arm/numa/numa.c +++ b/xen/arch/arm/numa/numa.c @@ -18,10 +18,30 @@ #include <xen/ctype.h> #include <xen/nodemask.h> #include <xen/numa.h> +#include <asm/acpi.h> + +static uint8_t (*node_distance_fn)(nodeid_t a, nodeid_t b); void numa_failed(void) { numa_off = true; + init_dt_numa_distance(); + node_distance_fn = NULL; +} + +uint8_t __node_distance(nodeid_t a, nodeid_t b) +{ + if ( node_distance_fn != NULL); + return node_distance_fn(a, b); + + return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE; +} + +EXPORT_SYMBOL(__node_distance); + +void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b)) +{ + node_distance_fn = fn; } void __init numa_init(void) @@ -29,6 +49,8 @@ void __init numa_init(void) int ret = 0; nodes_clear(processor_nodes_parsed); + init_dt_numa_distance(); + if ( numa_off ) goto no_numa; diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h index 36cd782..d1dc83a 100644 --- a/xen/include/asm-arm/numa.h +++ b/xen/include/asm-arm/numa.h @@ -4,6 +4,8 @@ typedef uint8_t nodeid_t; void dt_numa_process_memory_node(uint32_t nid, paddr_t start, paddr_t size); +void register_node_distance(uint8_t (fn)(nodeid_t a, nodeid_t b)); +void init_dt_numa_distance(void); #ifdef CONFIG_NUMA void numa_init(void); diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h index d8a0a44..ca0a2a6 100644 --- a/xen/include/asm-x86/numa.h +++ b/xen/include/asm-x86/numa.h @@ -18,7 +18,6 @@ extern nodeid_t apicid_to_node[]; extern void init_cpu_to_node(void); void srat_parse_regions(paddr_t addr); -extern uint8_t __node_distance(nodeid_t a, nodeid_t b); unsigned int arch_get_dma_bitsize(void); #endif diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h index 110d5dc..10ef4c4 100644 --- a/xen/include/xen/numa.h +++ b/xen/include/xen/numa.h @@ -6,6 +6,8 @@ #include <asm/numa.h> #define NUMA_NO_NODE 0xFF +#define LOCAL_DISTANCE 10 +#define REMOTE_DISTANCE 20 #define NUMA_NO_DISTANCE 0xFF #define MAX_NUMNODES NR_NODES @@ -70,6 +72,7 @@ int numa_add_memblk(nodeid_t nodeid, paddr_t start, uint64_t size); int get_num_node_memblks(void); bool arch_sanitize_nodes_memory(void); void numa_failed(void); +uint8_t __node_distance(nodeid_t a, nodeid_t b); #else static inline void numa_add_cpu(int cpu) { } static inline void numa_set_node(int cpu, nodeid_t node) { }