diff mbox

[RFC,v1,02/21] x86: NUMA: Refactor NUMA code

Message ID 1486655834-9708-3-git-send-email-vijay.kilari@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vijay Kilari Feb. 9, 2017, 3:56 p.m. UTC
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>

Move common generic NUMA code to xen/common/numa.c from
xen/arch/x86/numa.c. Also move generic code in header file
xen/include/asm-x86/numa.h to xen/include/xen/numa.h

This common code can be re-used later for ARM.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
---
 xen/arch/x86/numa.c        | 299 ---------------------------------------
 xen/common/Makefile        |   1 +
 xen/common/numa.c          | 342 +++++++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/numa.h |  47 -------
 xen/include/xen/numa.h     |  54 +++++++
 5 files changed, 397 insertions(+), 346 deletions(-)

Comments

Jan Beulich Feb. 9, 2017, 4:11 p.m. UTC | #1
>>> On 09.02.17 at 16:56, <vijay.kilari@gmail.com> wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
> 
> Move common generic NUMA code to xen/common/numa.c from
> xen/arch/x86/numa.c. Also move generic code in header file
> xen/include/asm-x86/numa.h to xen/include/xen/numa.h
> 
> This common code can be re-used later for ARM.
> 
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>

I would have been nice if you Cc-ed the maintainers of the code
you're moving.

> --- /dev/null
> +++ b/xen/common/numa.c
> @@ -0,0 +1,342 @@
> +/*
> + * Common NUMA handling functions for x86 and arm.
> + * Original code extracted from arch/x86/numa.c
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +
> +#include <xen/mm.h>
> +#include <xen/string.h>
> +#include <xen/init.h>
> +#include <xen/ctype.h>
> +#include <xen/nodemask.h>
> +#include <xen/numa.h>
> +#include <xen/keyhandler.h>
> +#include <xen/time.h>
> +#include <xen/smp.h>
> +#include <xen/pfn.h>
> +#include <xen/sched.h>
> +#include <xen/errno.h>
> +#include <xen/softirq.h>
> +#include <asm/setup.h>

This last one would better not be included here.

> +struct node_data node_data[MAX_NUMNODES];
> +
> +/* Mapping from pdx to node id */
> +int memnode_shift;
> +unsigned long memnodemapsize;
> +u8 *memnodemap;
> +typeof(*memnodemap) _memnodemap[64];

Careful with removing any "static" please. For the last one in
particular you would need to change the name if it's really necessary
to make non-static. Even better would be though to keep it static
and provide suitable accessors.

Also please sanitize types as you're moving stuff: memnode_shift
looks like it really wants to be unsigned int, and u8 should really
be uint8_t (as we're trying to phase out u8 & Co). This also applies
to code further down.

Jan
Julien Grall Feb. 20, 2017, 11:41 a.m. UTC | #2
Hi,

On 09/02/17 16:11, Jan Beulich wrote:
>>>> On 09.02.17 at 16:56, <vijay.kilari@gmail.com> wrote:
>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>
>> Move common generic NUMA code to xen/common/numa.c from
>> xen/arch/x86/numa.c. Also move generic code in header file
>> xen/include/asm-x86/numa.h to xen/include/xen/numa.h
>>
>> This common code can be re-used later for ARM.
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> I would have been nice if you Cc-ed the maintainers of the code
> you're moving.
>
>> --- /dev/null
>> +++ b/xen/common/numa.c
>> @@ -0,0 +1,342 @@
>> +/*
>> + * Common NUMA handling functions for x86 and arm.
>> + * Original code extracted from arch/x86/numa.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +
>> +#include <xen/mm.h>
>> +#include <xen/string.h>
>> +#include <xen/init.h>
>> +#include <xen/ctype.h>
>> +#include <xen/nodemask.h>
>> +#include <xen/numa.h>
>> +#include <xen/keyhandler.h>
>> +#include <xen/time.h>
>> +#include <xen/smp.h>
>> +#include <xen/pfn.h>
>> +#include <xen/sched.h>
>> +#include <xen/errno.h>
>> +#include <xen/softirq.h>
>> +#include <asm/setup.h>
>
> This last one would better not be included here.
>
>> +struct node_data node_data[MAX_NUMNODES];
>> +
>> +/* Mapping from pdx to node id */
>> +int memnode_shift;
>> +unsigned long memnodemapsize;
>> +u8 *memnodemap;
>> +typeof(*memnodemap) _memnodemap[64];
>
> Careful with removing any "static" please. For the last one in
> particular you would need to change the name if it's really necessary
> to make non-static. Even better would be though to keep it static
> and provide suitable accessors.
>
> Also please sanitize types as you're moving stuff: memnode_shift
> looks like it really wants to be unsigned int, and u8 should really
> be uint8_t (as we're trying to phase out u8 & Co). This also applies
> to code further down.

I agree with the changes suggested. If possible I would prefer all those 
changes made in separate patches before the move. This would ease the 
review.

Cheers,
Julien Grall Feb. 20, 2017, 12:37 p.m. UTC | #3
Hello Vijay,

On 09/02/17 15:56, vijay.kilari@gmail.com wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> Move common generic NUMA code to xen/common/numa.c from
> xen/arch/x86/numa.c. Also move generic code in header file
> xen/include/asm-x86/numa.h to xen/include/xen/numa.h
>
> This common code can be re-used later for ARM.
>
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
> ---
>  xen/arch/x86/numa.c        | 299 ---------------------------------------
>  xen/common/Makefile        |   1 +
>  xen/common/numa.c          | 342 +++++++++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/numa.h |  47 -------
>  xen/include/xen/numa.h     |  54 +++++++
>  5 files changed, 397 insertions(+), 346 deletions(-)
>
> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> index 6f4d438..bc787e0 100644
> --- a/xen/arch/x86/numa.c
> +++ b/xen/arch/x86/numa.c
> @@ -25,27 +25,12 @@ custom_param("numa", numa_setup);
>  #define Dprintk(x...)
>  #endif
>
> -/* from proto.h */
> -#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
> -
> -struct node_data node_data[MAX_NUMNODES];
> -
> -/* Mapping from pdx to node id */
> -int memnode_shift;
> -static typeof(*memnodemap) _memnodemap[64];
> -unsigned long memnodemapsize;
> -u8 *memnodemap;
> -
> -nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
> -    [0 ... NR_CPUS-1] = NUMA_NO_NODE
> -};
>  /*
>   * Keep BIOS's CPU2node information, should not be used for memory allocaion
>   */
>  nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
>      [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
>  };
> -cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
>
>  nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };

Why this variable is not moved?

[...]

>  void __init numa_init_array(void)

Same question for this function.

>  {
>      int rr, i;
> @@ -288,16 +145,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
>                      (u64)end_pfn << PAGE_SHIFT);
>  }
>
> -void numa_add_cpu(int cpu)
> -{
> -    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> -}
> -
> -void numa_set_node(int cpu, nodeid_t node)
> -{
> -    cpu_to_node[cpu] = node;
> -}
> -
>  /* [numa=off] */
>  static __init int numa_setup(char *opt)

Same question here. Everything in numa_setup and the fake NUMA looks
valid for ARM too.

[....]

> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index 0fed30b..c1bd2ff 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -63,6 +63,7 @@ obj-y += wait.o
>  obj-bin-y += warning.init.o
>  obj-$(CONFIG_XENOPROF) += xenoprof.o
>  obj-y += xmalloc_tlsf.o
> +obj-y += numa.o

This needs to be gated with CONFIG_NUMA.

>
>  obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo unlz4 earlycpio,$(n).init.o)
>
> diff --git a/xen/common/numa.c b/xen/common/numa.c
> new file mode 100644
> index 0000000..59dcb63
> --- /dev/null
> +++ b/xen/common/numa.c
> @@ -0,0 +1,342 @@
> +/*
> + * Common NUMA handling functions for x86 and arm.
> + * Original code extracted from arch/x86/numa.c
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.

Xen is GPLv2 only. Please update to:

"This program is free software; you can redistribute it and/or
modify it under the terms and conditions of the GNU General Public
License, version 2, as published by the Free Software Foundation."

> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +
> +#include <xen/mm.h>
> +#include <xen/string.h>
> +#include <xen/init.h>
> +#include <xen/ctype.h>
> +#include <xen/nodemask.h>
> +#include <xen/numa.h>
> +#include <xen/keyhandler.h>
> +#include <xen/time.h>
> +#include <xen/smp.h>
> +#include <xen/pfn.h>
> +#include <xen/sched.h>
> +#include <xen/errno.h>
> +#include <xen/softirq.h>
> +#include <asm/setup.h>
> +
> +struct node_data node_data[MAX_NUMNODES];
> +
> +/* Mapping from pdx to node id */

Looking at this comment, it looks like the NUMA support should depend on 
HAS_PDX as this is not something that may be able on all the architecture.

> +int memnode_shift;
> +unsigned long memnodemapsize;
> +u8 *memnodemap;
> +typeof(*memnodemap) _memnodemap[64];
> +
> +nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
> +    [0 ... NR_CPUS-1] = NUMA_NO_NODE
> +};
> +
> +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;

[...]

> +void numa_add_cpu(int cpu)
> +{
> +    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> +}
> +
> +void numa_set_node(int cpu, nodeid_t node)
> +{
> +    cpu_to_node[cpu] = node;
> +}
> +
> +EXPORT_SYMBOL(node_to_cpumask);
> +EXPORT_SYMBOL(memnode_shift);
> +EXPORT_SYMBOL(memnodemap);
> +EXPORT_SYMBOL(node_data);

Those 4 lines are not part of the original code. Why did you add them?

To ease review I would like to see this patch split multiple one:
	- multiple small to prepare the code (export function, change the type...)
         - a patch to move the code and only move it. No changes at all 
in it.

[...]

> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> index 2479238..61bcd8e 100644
> --- a/xen/include/asm-x86/numa.h
> +++ b/xen/include/asm-x86/numa.h
> @@ -17,64 +17,17 @@ extern cpumask_t     node_to_cpumask[];
>  #define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
>  #define node_to_cpumask(node)    (node_to_cpumask[node])
>
> -struct node {
> -	u64 start,end;
> -};
> -
> -extern int compute_hash_shift(struct node *nodes, int numnodes,
> -			      nodeid_t *nodeids);
>  extern nodeid_t pxm_to_node(unsigned int pxm);
>
>  #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
> -#define VIRTUAL_BUG_ON(x)
>
> -extern void numa_add_cpu(int cpu);
>  extern void numa_init_array(void);
>  extern bool_t numa_off;
>
> -

Spurious change.

>  extern int srat_disabled(void);
> -extern void numa_set_node(int cpu, nodeid_t node);
> -extern nodeid_t setup_node(unsigned int pxm);
>  extern void srat_detect_node(int cpu);
>
> -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
>  extern nodeid_t apicid_to_node[];
> -extern void init_cpu_to_node(void);
> -
> -static inline void clear_node_cpumask(int cpu)
> -{
> -	cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> -}
> -
> -/* Simple perfect hash to map pdx to node numbers */
> -extern int memnode_shift;
> -extern unsigned long memnodemapsize;
> -extern u8 *memnodemap;
> -
> -struct node_data {
> -    unsigned long node_start_pfn;
> -    unsigned long node_spanned_pages;
> -};
> -
> -extern struct node_data node_data[];
> -
> -static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
> -{
> -	nodeid_t nid;
> -	VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize);
> -	nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
> -	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
> -	return nid;
> -}
> -
> -#define NODE_DATA(nid)		(&(node_data[nid]))
> -
> -#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
> -#define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
> -#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
> -				 NODE_DATA(nid)->node_spanned_pages)
> -
>  extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
>
>  void srat_parse_regions(u64 addr);
> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> index 7aef1a8..dd33c92 100644
> --- a/xen/include/xen/numa.h
> +++ b/xen/include/xen/numa.h
> @@ -18,4 +18,58 @@
>    (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
>     ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
>
> +struct node {
> +	u64 start,end;

This contains hard tab. It looks like that asm-x86/numa.h add a mix hard 
tab/soft tab. Can you have a clean-up patch to drop them first?

> +};
> +
> +struct node_data {
> +    unsigned long node_start_pfn;
> +    unsigned long node_spanned_pages;
> +    nodeid_t      node_id;
> +};
> +
> +#define NODE_DATA(nid)		(&(node_data[nid]))

Hard tab.

> +#define VIRTUAL_BUG_ON(x)

What is the point to have a BUG_ON that is a nop?

> +
> +#ifdef CONFIG_NUMA
> +extern void init_cpu_to_node(void);
> +
> +static inline void clear_node_cpumask(int cpu)
> +{
> +	cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);

Hard tab.

> +}

You move this function in xen/numa.h but this is never used in xen code. 
It would be better to drop it.

> +
> +/* Simple perfect hash to map pdx to node numbers */
> +extern int memnode_shift;
> +extern unsigned long memnodemapsize;
> +extern u8 *memnodemap;
> +extern typeof(*memnodemap) _memnodemap[];
> +
> +extern struct node_data node_data[];
> +
> +static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
> +{
> +	nodeid_t nid;
> +	VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize);
> +	nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
> +	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
> +	return nid;

Hard tab in all this function.

> +}
> +
> +#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
> +#define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
> +#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
> +				 NODE_DATA(nid)->node_spanned_pages)

Same for those 3 macros.

> +
> +#else
> +#define init_cpu_to_node() do {} while (0)

Please use static inline init_cpu_to_node(void) {}

> +#define clear_node_cpumask(cpu) do {} while (0)

Not point of having this one.

> +#endif /* CONFIG_NUMA */
> +
> +extern void numa_add_cpu(int cpu);
> +extern nodeid_t setup_node(unsigned int pxm);
> +extern void numa_set_node(int cpu, nodeid_t node);
> +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);

I am not sure to understand why those function are not protected by 
#ifdef CONFIFG_NUMA.

> +extern int compute_hash_shift(struct node *nodes, int numnodes,

The function name is a bit too generic.

> +			      nodeid_t *nodeids);
>  #endif /* _XEN_NUMA_H */
>

Cheers,
Vijay Kilari Feb. 22, 2017, 10:04 a.m. UTC | #4
On Mon, Feb 20, 2017 at 6:07 PM, Julien Grall <julien.grall@arm.com> wrote:
> Hello Vijay,
>
>
> On 09/02/17 15:56, vijay.kilari@gmail.com wrote:
>>
>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>
>> Move common generic NUMA code to xen/common/numa.c from
>> xen/arch/x86/numa.c. Also move generic code in header file
>> xen/include/asm-x86/numa.h to xen/include/xen/numa.h
>>
>> This common code can be re-used later for ARM.
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>> ---
>>  xen/arch/x86/numa.c        | 299 ---------------------------------------
>>  xen/common/Makefile        |   1 +
>>  xen/common/numa.c          | 342
>> +++++++++++++++++++++++++++++++++++++++++++++
>>  xen/include/asm-x86/numa.h |  47 -------
>>  xen/include/xen/numa.h     |  54 +++++++
>>  5 files changed, 397 insertions(+), 346 deletions(-)
>>
>> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
>> index 6f4d438..bc787e0 100644
>> --- a/xen/arch/x86/numa.c
>> +++ b/xen/arch/x86/numa.c
>> @@ -25,27 +25,12 @@ custom_param("numa", numa_setup);
>>  #define Dprintk(x...)
>>  #endif
>>
>> -/* from proto.h */
>> -#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
>> -
>> -struct node_data node_data[MAX_NUMNODES];
>> -
>> -/* Mapping from pdx to node id */
>> -int memnode_shift;
>> -static typeof(*memnodemap) _memnodemap[64];
>> -unsigned long memnodemapsize;
>> -u8 *memnodemap;
>> -
>> -nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
>> -    [0 ... NR_CPUS-1] = NUMA_NO_NODE
>> -};
>>  /*
>>   * Keep BIOS's CPU2node information, should not be used for memory
>> allocaion
>>   */
>>  nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
>>      [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
>>  };
>> -cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
>>
>>  nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
>
>
> Why this variable is not moved?
Ok. Can be moved.
>
> [...]
>
>>  void __init numa_init_array(void)
>
>
> Same question for this function.

Initially I was suspicious about the comment in this function and thought
it might be valid of x86 alone. But looks like it is generic.
I will have a look.

>
>>  {
>>      int rr, i;
>> @@ -288,16 +145,6 @@ void __init numa_initmem_init(unsigned long
>> start_pfn, unsigned long end_pfn)
>>                      (u64)end_pfn << PAGE_SHIFT);
>>  }
>>
>> -void numa_add_cpu(int cpu)
>> -{
>> -    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>> -}
>> -
>> -void numa_set_node(int cpu, nodeid_t node)
>> -{
>> -    cpu_to_node[cpu] = node;
>> -}
>> -
>>  /* [numa=off] */
>>  static __init int numa_setup(char *opt)
>
>
> Same question here. Everything in numa_setup and the fake NUMA looks
> valid for ARM too.

numa_setup() is taken in separate patch. fake numa case is not considered.
for x86 it is under separate config CONFIG_NUMA_EMU.

>
> [....]
>
>> diff --git a/xen/common/Makefile b/xen/common/Makefile
>> index 0fed30b..c1bd2ff 100644
>> --- a/xen/common/Makefile
>> +++ b/xen/common/Makefile
>> @@ -63,6 +63,7 @@ obj-y += wait.o
>>  obj-bin-y += warning.init.o
>>  obj-$(CONFIG_XENOPROF) += xenoprof.o
>>  obj-y += xmalloc_tlsf.o
>> +obj-y += numa.o
>
>
> This needs to be gated with CONFIG_NUMA.

As it is shared with x86 and prior this changes it is not gated under
any config for x86.

>
>>
>>  obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo
>> unlz4 earlycpio,$(n).init.o)
>>
>> diff --git a/xen/common/numa.c b/xen/common/numa.c
>> new file mode 100644
>> index 0000000..59dcb63
>> --- /dev/null
>> +++ b/xen/common/numa.c
>> @@ -0,0 +1,342 @@
>> +/*
>> + * Common NUMA handling functions for x86 and arm.
>> + * Original code extracted from arch/x86/numa.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>
>
> Xen is GPLv2 only. Please update to:
>
> "This program is free software; you can redistribute it and/or
> modify it under the terms and conditions of the GNU General Public
> License, version 2, as published by the Free Software Foundation."
>
>
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +
>> +#include <xen/mm.h>
>> +#include <xen/string.h>
>> +#include <xen/init.h>
>> +#include <xen/ctype.h>
>> +#include <xen/nodemask.h>
>> +#include <xen/numa.h>
>> +#include <xen/keyhandler.h>
>> +#include <xen/time.h>
>> +#include <xen/smp.h>
>> +#include <xen/pfn.h>
>> +#include <xen/sched.h>
>> +#include <xen/errno.h>
>> +#include <xen/softirq.h>
>> +#include <asm/setup.h>
>> +
>> +struct node_data node_data[MAX_NUMNODES];
>> +
>> +/* Mapping from pdx to node id */
>
>
> Looking at this comment, it looks like the NUMA support should depend on
> HAS_PDX as this is not something that may be able on all the architecture.

yes it uses pfn_to_pdx() while updating memnodemap.
May be comment should be suffice.

>
>> +int memnode_shift;
>> +unsigned long memnodemapsize;
>> +u8 *memnodemap;
>> +typeof(*memnodemap) _memnodemap[64];
>> +
>> +nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
>> +    [0 ... NR_CPUS-1] = NUMA_NO_NODE
>> +};
>> +
>> +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
>
>
> [...]
>
>> +void numa_add_cpu(int cpu)
>> +{
>> +    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>> +}
>> +
>> +void numa_set_node(int cpu, nodeid_t node)
>> +{
>> +    cpu_to_node[cpu] = node;
>> +}
>> +
>> +EXPORT_SYMBOL(node_to_cpumask);
>> +EXPORT_SYMBOL(memnode_shift);
>> +EXPORT_SYMBOL(memnodemap);
>> +EXPORT_SYMBOL(node_data);
>
>
> Those 4 lines are not part of the original code. Why did you add them?
>
> To ease review I would like to see this patch split multiple one:
>         - multiple small to prepare the code (export function, change the
> type...)
>         - a patch to move the code and only move it. No changes at all in
> it.
OK
>
> [...]
>
>> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
>> index 2479238..61bcd8e 100644
>> --- a/xen/include/asm-x86/numa.h
>> +++ b/xen/include/asm-x86/numa.h
>> @@ -17,64 +17,17 @@ extern cpumask_t     node_to_cpumask[];
>>  #define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
>>  #define node_to_cpumask(node)    (node_to_cpumask[node])
>>
>> -struct node {
>> -       u64 start,end;
>> -};
>> -
>> -extern int compute_hash_shift(struct node *nodes, int numnodes,
>> -                             nodeid_t *nodeids);
>>  extern nodeid_t pxm_to_node(unsigned int pxm);
>>
>>  #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
>> -#define VIRTUAL_BUG_ON(x)
>>
>> -extern void numa_add_cpu(int cpu);
>>  extern void numa_init_array(void);
>>  extern bool_t numa_off;
>>
>> -
>
>
> Spurious change.
>
>
>>  extern int srat_disabled(void);
>> -extern void numa_set_node(int cpu, nodeid_t node);
>> -extern nodeid_t setup_node(unsigned int pxm);
>>  extern void srat_detect_node(int cpu);
>>
>> -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
>>  extern nodeid_t apicid_to_node[];
>> -extern void init_cpu_to_node(void);
>> -
>> -static inline void clear_node_cpumask(int cpu)
>> -{
>> -       cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>> -}
>> -
>> -/* Simple perfect hash to map pdx to node numbers */
>> -extern int memnode_shift;
>> -extern unsigned long memnodemapsize;
>> -extern u8 *memnodemap;
>> -
>> -struct node_data {
>> -    unsigned long node_start_pfn;
>> -    unsigned long node_spanned_pages;
>> -};
>> -
>> -extern struct node_data node_data[];
>> -
>> -static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
>> -{
>> -       nodeid_t nid;
>> -       VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
>> memnodemapsize);
>> -       nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
>> -       VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
>> -       return nid;
>> -}
>> -
>> -#define NODE_DATA(nid)         (&(node_data[nid]))
>> -
>> -#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
>> -#define node_spanned_pages(nid)
>> (NODE_DATA(nid)->node_spanned_pages)
>> -#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
>> -                                NODE_DATA(nid)->node_spanned_pages)
>> -
>>  extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
>>
>>  void srat_parse_regions(u64 addr);
>> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
>> index 7aef1a8..dd33c92 100644
>> --- a/xen/include/xen/numa.h
>> +++ b/xen/include/xen/numa.h
>> @@ -18,4 +18,58 @@
>>    (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
>>     ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
>>
>> +struct node {
>> +       u64 start,end;
>
>
> This contains hard tab. It looks like that asm-x86/numa.h add a mix hard
> tab/soft tab. Can you have a clean-up patch to drop them first?

OK. I will try.
>
>> +};
>> +
>> +struct node_data {
>> +    unsigned long node_start_pfn;
>> +    unsigned long node_spanned_pages;
>> +    nodeid_t      node_id;
>> +};
>> +
>> +#define NODE_DATA(nid)         (&(node_data[nid]))
>
>
> Hard tab.
>
>> +#define VIRTUAL_BUG_ON(x)
>
>
> What is the point to have a BUG_ON that is a nop?

yes.. that is part of original code. As part of clean up patch.
I will drop it.

>
>> +
>> +#ifdef CONFIG_NUMA
>> +extern void init_cpu_to_node(void);
>> +
>> +static inline void clear_node_cpumask(int cpu)
>> +{
>> +       cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>
>
> Hard tab.
>
>> +}
>
>
> You move this function in xen/numa.h but this is never used in xen code. It
> would be better to drop it.
>
>> +
>> +/* Simple perfect hash to map pdx to node numbers */
>> +extern int memnode_shift;
>> +extern unsigned long memnodemapsize;
>> +extern u8 *memnodemap;
>> +extern typeof(*memnodemap) _memnodemap[];
>> +
>> +extern struct node_data node_data[];
>> +
>> +static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
>> +{
>> +       nodeid_t nid;
>> +       VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >=
>> memnodemapsize);
>> +       nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
>> +       VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
>> +       return nid;
>
>
> Hard tab in all this function.
>
>> +}
>> +
>> +#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
>> +#define node_spanned_pages(nid)
>> (NODE_DATA(nid)->node_spanned_pages)
>> +#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
>> +                                NODE_DATA(nid)->node_spanned_pages)
>
>
> Same for those 3 macros.
>
>> +
>> +#else
>> +#define init_cpu_to_node() do {} while (0)
>
>
> Please use static inline init_cpu_to_node(void) {}
>
>> +#define clear_node_cpumask(cpu) do {} while (0)
>
>
> Not point of having this one.
>
>> +#endif /* CONFIG_NUMA */
>> +
>> +extern void numa_add_cpu(int cpu);
>> +extern nodeid_t setup_node(unsigned int pxm);
>> +extern void numa_set_node(int cpu, nodeid_t node);
>> +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
>
>
> I am not sure to understand why those function are not protected by #ifdef
> CONFIFG_NUMA.
As these defined in xen/common/numa.c which is not under CONFIG_NUMA,
I have kept them outside CONFIG_NUMA

>
>> +extern int compute_hash_shift(struct node *nodes, int numnodes,
>
>
> The function name is a bit too generic.
>
>> +                             nodeid_t *nodeids);
>>  #endif /* _XEN_NUMA_H */
>>
>
> Cheers,
>
> --
> Julien Grall
Julien Grall Feb. 22, 2017, 10:55 a.m. UTC | #5
Hello Vijay,

On 22/02/17 10:04, Vijay Kilari wrote:
> On Mon, Feb 20, 2017 at 6:07 PM, Julien Grall <julien.grall@arm.com> wrote:
>>
>>>  {
>>>      int rr, i;
>>> @@ -288,16 +145,6 @@ void __init numa_initmem_init(unsigned long
>>> start_pfn, unsigned long end_pfn)
>>>                      (u64)end_pfn << PAGE_SHIFT);
>>>  }
>>>
>>> -void numa_add_cpu(int cpu)
>>> -{
>>> -    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>>> -}
>>> -
>>> -void numa_set_node(int cpu, nodeid_t node)
>>> -{
>>> -    cpu_to_node[cpu] = node;
>>> -}
>>> -
>>>  /* [numa=off] */
>>>  static __init int numa_setup(char *opt)
>>
>>
>> Same question here. Everything in numa_setup and the fake NUMA looks
>> valid for ARM too.
>
> numa_setup() is taken in separate patch. fake numa case is not considered.
> for x86 it is under separate config CONFIG_NUMA_EMU.

Why fake numa is not considered? This would help to test the series on 
non-NUMA platform.

>>
>> [....]
>>
>>> diff --git a/xen/common/Makefile b/xen/common/Makefile
>>> index 0fed30b..c1bd2ff 100644
>>> --- a/xen/common/Makefile
>>> +++ b/xen/common/Makefile
>>> @@ -63,6 +63,7 @@ obj-y += wait.o
>>>  obj-bin-y += warning.init.o
>>>  obj-$(CONFIG_XENOPROF) += xenoprof.o
>>>  obj-y += xmalloc_tlsf.o
>>> +obj-y += numa.o
>>
>>
>> This needs to be gated with CONFIG_NUMA.
>
> As it is shared with x86 and prior this changes it is not gated under
> any config for x86.

Because x86 code assume that CONFIG_NUMA is always enabled. If you look 
at the .config CONFIG_NUMA will be set so you can gate it here.

>> Looking at this comment, it looks like the NUMA support should depend on
>> HAS_PDX as this is not something that may be able on all the architecture.
>
> yes it uses pfn_to_pdx() while updating memnodemap.
> May be comment should be suffice.

A comment may be missed, gated in the Kconfig will prevent a new 
architecture to use NUMA without knowing PDX is necessary.

[...]

>>> +#endif /* CONFIG_NUMA */
>>> +
>>> +extern void numa_add_cpu(int cpu);
>>> +extern nodeid_t setup_node(unsigned int pxm);
>>> +extern void numa_set_node(int cpu, nodeid_t node);
>>> +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
>>
>>
>> I am not sure to understand why those function are not protected by #ifdef
>> CONFIFG_NUMA.
> As these defined in xen/common/numa.c which is not under CONFIG_NUMA,
> I have kept them outside CONFIG_NUMA

xen/common/numa.c should be under CONFIG_NUMA. There is no reason to 
expose the code on non-NUMA platform.

Regards,
Vijay Kilari Feb. 27, 2017, 11:43 a.m. UTC | #6
Hi Jan,

On Thu, Feb 9, 2017 at 9:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 09.02.17 at 16:56, <vijay.kilari@gmail.com> wrote:
>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>
>> Move common generic NUMA code to xen/common/numa.c from
>> xen/arch/x86/numa.c. Also move generic code in header file
>> xen/include/asm-x86/numa.h to xen/include/xen/numa.h
>>
>> This common code can be re-used later for ARM.
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> I would have been nice if you Cc-ed the maintainers of the code
> you're moving.
>
>> --- /dev/null
>> +++ b/xen/common/numa.c
>> @@ -0,0 +1,342 @@
>> +/*
>> + * Common NUMA handling functions for x86 and arm.
>> + * Original code extracted from arch/x86/numa.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +
>> +#include <xen/mm.h>
>> +#include <xen/string.h>
>> +#include <xen/init.h>
>> +#include <xen/ctype.h>
>> +#include <xen/nodemask.h>
>> +#include <xen/numa.h>
>> +#include <xen/keyhandler.h>
>> +#include <xen/time.h>
>> +#include <xen/smp.h>
>> +#include <xen/pfn.h>
>> +#include <xen/sched.h>
>> +#include <xen/errno.h>
>> +#include <xen/softirq.h>
>> +#include <asm/setup.h>
>
> This last one would better not be included here.
>
>> +struct node_data node_data[MAX_NUMNODES];
>> +
>> +/* Mapping from pdx to node id */
>> +int memnode_shift;
>> +unsigned long memnodemapsize;
>> +u8 *memnodemap;
>> +typeof(*memnodemap) _memnodemap[64];
>
> Careful with removing any "static" please. For the last one in
> particular you would need to change the name if it's really necessary
> to make non-static. Even better would be though to keep it static
> and provide suitable accessors.
>
> Also please sanitize types as you're moving stuff: memnode_shift
> looks like it really wants to be unsigned int, and u8 should really
> be uint8_t (as we're trying to phase out u8 & Co). This also applies
> to code further down.

You mean to change all occurrences of
s/u8/uint8_t
s/u32/uint32_t
s/u64/uint64_t

Also, I see that xen/arch/x86/srat.c coding style is not adhere to xen
coding style.
Shall I clean up before I move the code?
Jan Beulich Feb. 27, 2017, 2:58 p.m. UTC | #7
>>> Vijay Kilari <vijay.kilari@gmail.com> 02/27/17 12:43 PM >>>
>On Thu, Feb 9, 2017 at 9:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
>>>>> On 09.02.17 at 16:56, <vijay.kilari@gmail.com> wrote:
>>> +struct node_data node_data[MAX_NUMNODES];
>>> +
>>> +/* Mapping from pdx to node id */
>>> +int memnode_shift;
>>> +unsigned long memnodemapsize;
>>> +u8 *memnodemap;
>>> +typeof(*memnodemap) _memnodemap[64];
>>
>> Careful with removing any "static" please. For the last one in
>> particular you would need to change the name if it's really necessary
>> to make non-static. Even better would be though to keep it static
>> and provide suitable accessors.
>>
>> Also please sanitize types as you're moving stuff: memnode_shift
>> looks like it really wants to be unsigned int, and u8 should really
>> be uint8_t (as we're trying to phase out u8 & Co). This also applies
>> to code further down.
>
>You mean to change all occurrences of
>s/u8/uint8_t
>s/u32/uint32_t
>s/u64/uint64_t

Yes.

>Also, I see that xen/arch/x86/srat.c coding style is not adhere to xen
>coding style.
>Shall I clean up before I move the code?

If you want to take the time - sure. All I'd like to ask for is that at least the
file(s) you move the code _to_ end up with consistent style.

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index 6f4d438..bc787e0 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -25,27 +25,12 @@  custom_param("numa", numa_setup);
 #define Dprintk(x...)
 #endif
 
-/* from proto.h */
-#define round_up(x,y) ((((x)+(y))-1) & (~((y)-1)))
-
-struct node_data node_data[MAX_NUMNODES];
-
-/* Mapping from pdx to node id */
-int memnode_shift;
-static typeof(*memnodemap) _memnodemap[64];
-unsigned long memnodemapsize;
-u8 *memnodemap;
-
-nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
-    [0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
 /*
  * Keep BIOS's CPU2node information, should not be used for memory allocaion
  */
 nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
     [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
 
 nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
 
@@ -57,134 +42,6 @@  int srat_disabled(void)
     return numa_off || acpi_numa < 0;
 }
 
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 1 if OK
- * 0 if memnodmap[] too small (of shift too small)
- * -1 if node overlap or lost ram (shift too big)
- */
-static int __init populate_memnodemap(const struct node *nodes,
-                                      int numnodes, int shift, nodeid_t *nodeids)
-{
-    unsigned long spdx, epdx;
-    int i, res = -1;
-
-    memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap));
-    for ( i = 0; i < numnodes; i++ )
-    {
-        spdx = paddr_to_pdx(nodes[i].start);
-        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
-        if ( spdx >= epdx )
-            continue;
-        if ( (epdx >> shift) >= memnodemapsize )
-            return 0;
-        do {
-            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
-                return -1;
-
-            if ( !nodeids )
-                memnodemap[spdx >> shift] = i;
-            else
-                memnodemap[spdx >> shift] = nodeids[i];
-
-            spdx += (1UL << shift);
-        } while ( spdx < epdx );
-        res = 1;
-    }
-
-    return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
-    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
-    unsigned long mfn = alloc_boot_pages(size, 1);
-
-    if ( !mfn )
-    {
-        printk(KERN_ERR
-               "NUMA: Unable to allocate Memory to Node hash map\n");
-        memnodemapsize = 0;
-        return -1;
-    }
-
-    memnodemap = mfn_to_virt(mfn);
-    mfn <<= PAGE_SHIFT;
-    size <<= PAGE_SHIFT;
-    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
-           mfn, mfn + size);
-    memnodemapsize = size / sizeof(*memnodemap);
-
-    return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static int __init extract_lsb_from_nodes(const struct node *nodes,
-                                         int numnodes)
-{
-    int i, nodes_used = 0;
-    unsigned long spdx, epdx;
-    unsigned long bitfield = 0, memtop = 0;
-
-    for ( i = 0; i < numnodes; i++ )
-    {
-        spdx = paddr_to_pdx(nodes[i].start);
-        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
-        if ( spdx >= epdx )
-            continue;
-        bitfield |= spdx;
-        nodes_used++;
-        if ( epdx > memtop )
-            memtop = epdx;
-    }
-    if ( nodes_used <= 1 )
-        i = BITS_PER_LONG - 1;
-    else
-        i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
-    memnodemapsize = (memtop >> i) + 1;
-    return i;
-}
-
-int __init compute_hash_shift(struct node *nodes, int numnodes,
-                              nodeid_t *nodeids)
-{
-    int shift;
-
-    shift = extract_lsb_from_nodes(nodes, numnodes);
-    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
-        memnodemap = _memnodemap;
-    else if ( allocate_cachealigned_memnodemap() )
-        return -1;
-    printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
-
-    if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
-    {
-        printk(KERN_INFO "Your memory is not aligned you need to "
-               "rebuild your hypervisor with a bigger NODEMAPSIZE "
-               "shift=%d\n", shift);
-        return -1;
-    }
-
-    return shift;
-}
-/* initialize NODE_DATA given nodeid and start/end */
-void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
-{ 
-    unsigned long start_pfn, end_pfn;
-
-    start_pfn = start >> PAGE_SHIFT;
-    end_pfn = end >> PAGE_SHIFT;
-
-    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-
-    node_set_online(nodeid);
-} 
-
 void __init numa_init_array(void)
 {
     int rr, i;
@@ -288,16 +145,6 @@  void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
                     (u64)end_pfn << PAGE_SHIFT);
 }
 
-void numa_add_cpu(int cpu)
-{
-    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-} 
-
-void numa_set_node(int cpu, nodeid_t node)
-{
-    cpu_to_node[cpu] = node;
-}
-
 /* [numa=off] */
 static __init int numa_setup(char *opt) 
 { 
@@ -373,149 +220,3 @@  unsigned int __init arch_get_dma_bitsize(void)
                  flsl(node_start_pfn(node) + node_spanned_pages(node) / 4 - 1)
                  + PAGE_SHIFT, 32);
 }
-
-static void dump_numa(unsigned char key)
-{
-    s_time_t now = NOW();
-    unsigned int i, j, n;
-    int err;
-    struct domain *d;
-    struct page_info *page;
-    unsigned int page_num_node[MAX_NUMNODES];
-    const struct vnuma_info *vnuma;
-
-    printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
-           (u32)(now>>32), (u32)now);
-
-    for_each_online_node ( i )
-    {
-        paddr_t pa = pfn_to_paddr(node_start_pfn(i) + 1);
-
-        printk("NODE%u start->%lu size->%lu free->%lu\n",
-               i, node_start_pfn(i), node_spanned_pages(i),
-               avail_node_heap_pages(i));
-        /* sanity check phys_to_nid() */
-        if ( phys_to_nid(pa) != i )
-            printk("phys_to_nid(%"PRIpaddr") -> %d should be %u\n",
-                   pa, phys_to_nid(pa), i);
-    }
-
-    j = cpumask_first(&cpu_online_map);
-    n = 0;
-    for_each_online_cpu ( i )
-    {
-        if ( i != j + n || cpu_to_node[j] != cpu_to_node[i] )
-        {
-            if ( n > 1 )
-                printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
-            else
-                printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
-            j = i;
-            n = 1;
-        }
-        else
-            ++n;
-    }
-    if ( n > 1 )
-        printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
-    else
-        printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
-
-    rcu_read_lock(&domlist_read_lock);
-
-    printk("Memory location of each domain:\n");
-    for_each_domain ( d )
-    {
-        process_pending_softirqs();
-
-        printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages);
-
-        for_each_online_node ( i )
-            page_num_node[i] = 0;
-
-        spin_lock(&d->page_alloc_lock);
-        page_list_for_each(page, &d->page_list)
-        {
-            i = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT);
-            page_num_node[i]++;
-        }
-        spin_unlock(&d->page_alloc_lock);
-
-        for_each_online_node ( i )
-            printk("    Node %u: %u\n", i, page_num_node[i]);
-
-        if ( !read_trylock(&d->vnuma_rwlock) )
-            continue;
-
-        if ( !d->vnuma )
-        {
-            read_unlock(&d->vnuma_rwlock);
-            continue;
-        }
-
-        vnuma = d->vnuma;
-        printk("     %u vnodes, %u vcpus, guest physical layout:\n",
-               vnuma->nr_vnodes, d->max_vcpus);
-        for ( i = 0; i < vnuma->nr_vnodes; i++ )
-        {
-            unsigned int start_cpu = ~0U;
-
-            err = snprintf(keyhandler_scratch, 12, "%3u",
-                    vnuma->vnode_to_pnode[i]);
-            if ( err < 0 || vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
-                strlcpy(keyhandler_scratch, "???", sizeof(keyhandler_scratch));
-
-            printk("       %3u: pnode %s,", i, keyhandler_scratch);
-
-            printk(" vcpus ");
-
-            for ( j = 0; j < d->max_vcpus; j++ )
-            {
-                if ( !(j & 0x3f) )
-                    process_pending_softirqs();
-
-                if ( vnuma->vcpu_to_vnode[j] == i )
-                {
-                    if ( start_cpu == ~0U )
-                    {
-                        printk("%d", j);
-                        start_cpu = j;
-                    }
-                }
-                else if ( start_cpu != ~0U )
-                {
-                    if ( j - 1 != start_cpu )
-                        printk("-%d ", j - 1);
-                    else
-                        printk(" ");
-                    start_cpu = ~0U;
-                }
-            }
-
-            if ( start_cpu != ~0U  && start_cpu != j - 1 )
-                printk("-%d", j - 1);
-
-            printk("\n");
-
-            for ( j = 0; j < vnuma->nr_vmemranges; j++ )
-            {
-                if ( vnuma->vmemrange[j].nid == i )
-                    printk("           %016"PRIx64" - %016"PRIx64"\n",
-                           vnuma->vmemrange[j].start,
-                           vnuma->vmemrange[j].end);
-            }
-        }
-
-        read_unlock(&d->vnuma_rwlock);
-    }
-
-    rcu_read_unlock(&domlist_read_lock);
-}
-
-static __init int register_numa_trigger(void)
-{
-    register_keyhandler('u', dump_numa, "dump NUMA info", 1);
-    return 0;
-}
-__initcall(register_numa_trigger);
-
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 0fed30b..c1bd2ff 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -63,6 +63,7 @@  obj-y += wait.o
 obj-bin-y += warning.init.o
 obj-$(CONFIG_XENOPROF) += xenoprof.o
 obj-y += xmalloc_tlsf.o
+obj-y += numa.o
 
 obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo unlz4 earlycpio,$(n).init.o)
 
diff --git a/xen/common/numa.c b/xen/common/numa.c
new file mode 100644
index 0000000..59dcb63
--- /dev/null
+++ b/xen/common/numa.c
@@ -0,0 +1,342 @@ 
+/*
+ * Common NUMA handling functions for x86 and arm.
+ * Original code extracted from arch/x86/numa.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/ctype.h>
+#include <xen/nodemask.h>
+#include <xen/numa.h>
+#include <xen/keyhandler.h>
+#include <xen/time.h>
+#include <xen/smp.h>
+#include <xen/pfn.h>
+#include <xen/sched.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/setup.h>
+
+struct node_data node_data[MAX_NUMNODES];
+
+/* Mapping from pdx to node id */
+int memnode_shift;
+unsigned long memnodemapsize;
+u8 *memnodemap;
+typeof(*memnodemap) _memnodemap[64];
+
+nodeid_t cpu_to_node[NR_CPUS] __read_mostly = {
+    [0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 1 if OK
+ * 0 if memnodmap[] too small (of shift too small)
+ * -1 if node overlap or lost ram (shift too big)
+ */
+static int __init populate_memnodemap(const struct node *nodes,
+                                      int numnodes, int shift,
+                                      nodeid_t *nodeids)
+{
+    unsigned long spdx, epdx;
+    int i, res = -1;
+
+    memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap));
+    for ( i = 0; i < numnodes; i++ )
+    {
+        spdx = paddr_to_pdx(nodes[i].start);
+        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
+        if ( spdx >= epdx )
+            continue;
+        if ( (epdx >> shift) >= memnodemapsize )
+            return 0;
+        do {
+            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
+                return -1;
+
+            if ( !nodeids )
+                memnodemap[spdx >> shift] = i;
+            else
+                memnodemap[spdx >> shift] = nodeids[i];
+
+            spdx += (1UL << shift);
+        } while ( spdx < epdx );
+        res = 1;
+    }
+
+    return res;
+}
+
+static int __init allocate_cachealigned_memnodemap(void)
+{
+    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
+    unsigned long mfn;
+
+    mfn = alloc_boot_pages(size, 1);
+    if ( !mfn )
+    {
+        printk(KERN_ERR
+               "NUMA: Unable to allocate Memory to Node hash map\n");
+        memnodemapsize = 0;
+        return -1;
+    }
+
+    memnodemap = mfn_to_virt(mfn);
+    mfn <<= PAGE_SHIFT;
+    size <<= PAGE_SHIFT;
+    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
+           mfn, mfn + size);
+    memnodemapsize = size / sizeof(*memnodemap);
+
+    return 0;
+}
+
+/*
+ * The LSB of all start and end addresses in the node map is the value of the
+ * maximum possible shift.
+ */
+static int __init extract_lsb_from_nodes(const struct node *nodes,
+                                         int numnodes)
+{
+    int i, nodes_used = 0;
+    unsigned long spdx, epdx;
+    unsigned long bitfield = 0, memtop = 0;
+
+    for ( i = 0; i < numnodes; i++ )
+    {
+        spdx = paddr_to_pdx(nodes[i].start);
+        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
+        if ( spdx >= epdx )
+            continue;
+        bitfield |= spdx;
+        nodes_used++;
+        if ( epdx > memtop )
+            memtop = epdx;
+    }
+    if ( nodes_used <= 1 )
+        i = BITS_PER_LONG - 1;
+    else
+        i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
+
+    memnodemapsize = (memtop >> i) + 1;
+
+    return i;
+}
+
+int __init compute_hash_shift(struct node *nodes, int numnodes,
+                              nodeid_t *nodeids)
+{
+    int shift;
+
+    shift = extract_lsb_from_nodes(nodes, numnodes);
+    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
+        memnodemap = _memnodemap;
+    else if ( allocate_cachealigned_memnodemap() )
+        return -1;
+    printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift);
+
+    if ( populate_memnodemap(nodes, numnodes, shift, nodeids) != 1 )
+    {
+        printk(KERN_INFO "Your memory is not aligned you need to "
+               "rebuild your hypervisor with a bigger NODEMAPSIZE "
+               "shift=%d\n", shift);
+        return -1;
+    }
+
+    return shift;
+}
+
+/* initialize NODE_DATA given nodeid and start/end */
+void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
+{
+    unsigned long start_pfn, end_pfn;
+
+    start_pfn = start >> PAGE_SHIFT;
+    end_pfn = end >> PAGE_SHIFT;
+
+    NODE_DATA(nodeid)->node_id = nodeid;
+    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
+    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+
+    node_set_online(nodeid);
+}
+
+void numa_add_cpu(int cpu)
+{
+    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+}
+
+void numa_set_node(int cpu, nodeid_t node)
+{
+    cpu_to_node[cpu] = node;
+}
+
+EXPORT_SYMBOL(node_to_cpumask);
+EXPORT_SYMBOL(memnode_shift);
+EXPORT_SYMBOL(memnodemap);
+EXPORT_SYMBOL(node_data);
+
+static void dump_numa(unsigned char key)
+{
+    s_time_t now = NOW();
+    unsigned int i, j, n;
+    int err;
+    struct domain *d;
+    struct page_info *page;
+    unsigned int page_num_node[MAX_NUMNODES] = {0};
+    const struct vnuma_info *vnuma;
+
+    printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
+           (u32)(now>>32), (u32)now);
+
+    for_each_online_node ( i )
+    {
+        paddr_t pa = (paddr_t)(NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT;
+        printk("idx%d -> NODE%d start->%lu size->%lu free->%lu\n",
+               i, NODE_DATA(i)->node_id,
+               NODE_DATA(i)->node_start_pfn,
+               NODE_DATA(i)->node_spanned_pages,
+               avail_node_heap_pages(i));
+        /* sanity check phys_to_nid() */
+        printk("phys_to_nid(%"PRIpaddr") -> %d should be %d\n", pa,
+               phys_to_nid(pa),
+               NODE_DATA(i)->node_id);
+    }
+
+    j = cpumask_first(&cpu_online_map);
+    n = 0;
+    for_each_online_cpu ( i )
+    {
+        if ( i != j + n || cpu_to_node[j] != cpu_to_node[i] )
+        {
+            if ( n > 1 )
+                printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
+            else
+                printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
+            j = i;
+            n = 1;
+        }
+        else
+            ++n;
+    }
+    if ( n > 1 )
+        printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
+    else
+        printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
+
+    rcu_read_lock(&domlist_read_lock);
+
+    printk("Memory location of each domain:\n");
+    for_each_domain ( d )
+    {
+        process_pending_softirqs();
+        printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages);
+
+        for_each_online_node ( i )
+            page_num_node[i] = 0;
+
+        spin_lock(&d->page_alloc_lock);
+        page_list_for_each(page, &d->page_list)
+        {
+            i = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT);
+            page_num_node[i]++;
+        }
+        spin_unlock(&d->page_alloc_lock);
+
+        for_each_online_node ( i )
+            printk("    Node %u: %u\n", i, page_num_node[i]);
+
+        if ( !read_trylock(&d->vnuma_rwlock) )
+            continue;
+
+        if ( !d->vnuma )
+        {
+            read_unlock(&d->vnuma_rwlock);
+            continue;
+        }
+
+        vnuma = d->vnuma;
+        printk("     %u vnodes, %u vcpus, guest physical layout:\n",
+               vnuma->nr_vnodes, d->max_vcpus);
+        for ( i = 0; i < vnuma->nr_vnodes; i++ )
+        {
+            unsigned int start_cpu = ~0U;
+
+            err = snprintf(keyhandler_scratch, 12, "%3u",
+                    vnuma->vnode_to_pnode[i]);
+            if ( err < 0 || vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
+                strlcpy(keyhandler_scratch, "???", sizeof(keyhandler_scratch));
+
+            printk("       %3u: pnode %s,", i, keyhandler_scratch);
+
+            printk(" vcpus ");
+
+            for ( j = 0; j < d->max_vcpus; j++ )
+            {
+                if ( !(j & 0x3f) )
+                    process_pending_softirqs();
+
+                if ( vnuma->vcpu_to_vnode[j] == i )
+                {
+                    if ( start_cpu == ~0U )
+                    {
+                        printk("%d", j);
+                        start_cpu = j;
+                    }
+                }
+                else if ( start_cpu != ~0U )
+                {
+                    if ( j - 1 != start_cpu )
+                        printk("-%d ", j - 1);
+                    else
+                        printk(" ");
+                    start_cpu = ~0U;
+                }
+            }
+
+            if ( start_cpu != ~0U  && start_cpu != j - 1 )
+                printk("-%d", j - 1);
+
+            printk("\n");
+
+            for ( j = 0; j < vnuma->nr_vmemranges; j++ )
+            {
+                if ( vnuma->vmemrange[j].nid == i )
+                    printk("           %016"PRIx64" - %016"PRIx64"\n",
+                           vnuma->vmemrange[j].start,
+                           vnuma->vmemrange[j].end);
+            }
+        }
+
+        read_unlock(&d->vnuma_rwlock);
+    }
+
+    rcu_read_unlock(&domlist_read_lock);
+}
+
+static __init int register_numa_trigger(void)
+{
+    register_keyhandler('u', dump_numa, "dump NUMA info", 1);
+    return 0;
+}
+__initcall(register_numa_trigger);
+
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index 2479238..61bcd8e 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -17,64 +17,17 @@  extern cpumask_t     node_to_cpumask[];
 #define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
 #define node_to_cpumask(node)    (node_to_cpumask[node])
 
-struct node { 
-	u64 start,end; 
-};
-
-extern int compute_hash_shift(struct node *nodes, int numnodes,
-			      nodeid_t *nodeids);
 extern nodeid_t pxm_to_node(unsigned int pxm);
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-#define VIRTUAL_BUG_ON(x) 
 
-extern void numa_add_cpu(int cpu);
 extern void numa_init_array(void);
 extern bool_t numa_off;
 
-
 extern int srat_disabled(void);
-extern void numa_set_node(int cpu, nodeid_t node);
-extern nodeid_t setup_node(unsigned int pxm);
 extern void srat_detect_node(int cpu);
 
-extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
 extern nodeid_t apicid_to_node[];
-extern void init_cpu_to_node(void);
-
-static inline void clear_node_cpumask(int cpu)
-{
-	cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-}
-
-/* Simple perfect hash to map pdx to node numbers */
-extern int memnode_shift; 
-extern unsigned long memnodemapsize;
-extern u8 *memnodemap;
-
-struct node_data {
-    unsigned long node_start_pfn;
-    unsigned long node_spanned_pages;
-};
-
-extern struct node_data node_data[];
-
-static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
-{ 
-	nodeid_t nid;
-	VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize);
-	nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift]; 
-	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 
-	return nid; 
-} 
-
-#define NODE_DATA(nid)		(&(node_data[nid]))
-
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
-#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
-				 NODE_DATA(nid)->node_spanned_pages)
-
 extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
 
 void srat_parse_regions(u64 addr);
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 7aef1a8..dd33c92 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -18,4 +18,58 @@ 
   (((d)->vcpu != NULL && (d)->vcpu[0] != NULL) \
    ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
 
+struct node {
+	u64 start,end;
+};
+
+struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+    nodeid_t      node_id;
+};
+
+#define NODE_DATA(nid)		(&(node_data[nid]))
+#define VIRTUAL_BUG_ON(x)
+
+#ifdef CONFIG_NUMA
+extern void init_cpu_to_node(void);
+
+static inline void clear_node_cpumask(int cpu)
+{
+	cpumask_clear_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+}
+
+/* Simple perfect hash to map pdx to node numbers */
+extern int memnode_shift;
+extern unsigned long memnodemapsize;
+extern u8 *memnodemap;
+extern typeof(*memnodemap) _memnodemap[];
+
+extern struct node_data node_data[];
+
+static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr)
+{
+	nodeid_t nid;
+	VIRTUAL_BUG_ON((paddr_to_pdx(addr) >> memnode_shift) >= memnodemapsize);
+	nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
+	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
+	return nid;
+}
+
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
+#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
+				 NODE_DATA(nid)->node_spanned_pages)
+
+#else
+#define init_cpu_to_node() do {} while (0)
+#define clear_node_cpumask(cpu) do {} while (0)
+#endif /* CONFIG_NUMA */
+
+extern void numa_add_cpu(int cpu);
+extern nodeid_t setup_node(unsigned int pxm);
+extern void numa_set_node(int cpu, nodeid_t node);
+extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
+extern int compute_hash_shift(struct node *nodes, int numnodes,
+			      nodeid_t *nodeids);
 #endif /* _XEN_NUMA_H */