diff mbox

[RFC,v3,08/24] NUMA: x86: Move numa code and make it generic

Message ID 1500378106-2620-9-git-send-email-vijay.kilari@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vijay Kilari July 18, 2017, 11:41 a.m. UTC
From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>

Move code from xen/arch/x86/numa.c to xen/common/numa.c
so that it can be used by other archs.

The following changes are done:
- Few generic static functions in x86/numa.c is made
  non-static common/numa.c
- The generic contents of header file asm-x86/numa.h
  are moved to xen/numa.h.
- The header file includes are reordered and externs are
  dropped.
- Moved acpi_numa from asm-x86/acpi.h to xen/acpi.h
- Coding style of code moved to commom/numa.c is changed
  to Xen style.
- numa_add_cpu() and numa_set_node() and moved to header
  file and added inline function in case of CONFIG_NUMA
  is not enabled because these functions are called from
  generic code with out any config check.

Also the node_online_map is defined in x86/numa.c for x86
and arm/smpboot.c for ARM. For x86 it is moved to x86/smpboot.c
If moved to common code the compilation fails because
common/numa.c is compiled only when NUMA is enabled.

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
---
v3: - Moved acpi_numa variable
    - acpi_setup_node declaration move is reverted.
    - Dropped extern in header file
    - Added inline declaration for numa_add_cpu() and
      numa_set_node() function based on CONFIG_NUMA
    - Moved numa_initmem_init() to common code
    - Moved common code from asm-x86/numa.h to xen/numa.h
    - Moved node_online_map from numa.c to smpboot.c
---
 xen/arch/x86/numa.c         | 459 +----------------------------------------
 xen/arch/x86/smpboot.c      |   1 +
 xen/common/Makefile         |   1 +
 xen/common/numa.c           | 487 ++++++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/acpi.h  |   1 -
 xen/include/asm-x86/numa.h  |  56 -----
 xen/include/asm-x86/setup.h |   1 -
 xen/include/xen/numa.h      |  64 ++++++
 8 files changed, 561 insertions(+), 509 deletions(-)

Comments

Wei Liu July 18, 2017, 3:29 p.m. UTC | #1
On Tue, Jul 18, 2017 at 05:11:30PM +0530, vijay.kilari@gmail.com wrote:
[...]
> diff --git a/xen/common/numa.c b/xen/common/numa.c
> new file mode 100644
> index 0000000..0381f1b
> --- /dev/null
> +++ b/xen/common/numa.c
> @@ -0,0 +1,487 @@
> +/*
> + * Common NUMA handling functions for x86 and arm.
> + * Original code extracted from arch/x86/numa.c
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/init.h>
> +#include <xen/ctype.h>
> +#include <xen/sched.h>
> +#include <xen/nodemask.h>
> +#include <xen/numa.h>
> +#include <xen/keyhandler.h>
> +#include <xen/time.h>
> +#include <xen/smp.h>
> +#include <xen/pfn.h>
> +#include <xen/mm.h>
> +#include <xen/softirq.h>
> +#include <xen/string.h>
> +#include <asm/acpi.h>
> +

Since you're moving code anyway, please sort the headers alphabetically.

> +static int numa_setup(char *s);
> +custom_param("numa", numa_setup);
> +
> +struct node_data node_data[MAX_NUMNODES];
> +
> +/* Mapping from pdx to node id */

Is this comment applicable to ARM? Does arm has PDX?

> +unsigned int memnode_shift;
> +
> +/*
> + * In case of numa init failure or numa off,
> + * memnode_shift is initialized to BITS_PER_LONG - 1. Hence allocate
> + * memnodemap[] of BITS_PER_LONG.
> + */
> +static typeof(*memnodemap) _memnodemap[BITS_PER_LONG];
> +unsigned long memnodemapsize;
> +uint8_t *memnodemap;
> +
> +nodeid_t __read_mostly cpu_to_node[NR_CPUS] = {
> +    [0 ... NR_CPUS-1] = NUMA_NO_NODE
> +};
> +
> +cpumask_t __read_mostly node_to_cpumask[MAX_NUMNODES];
> +
> +bool numa_off;
> +s8 acpi_numa = 0;
> +
> +int srat_disabled(void)

bool here.

Should probably be done in a previous patch.

> +
> +void __init numa_init_array(void)
> +{
> +    int rr, i;
> +
> +    /* There are unfortunately some poorly designed mainboards around
> +       that only connect memory to a single CPU. This breaks the 1:1 cpu->node
> +       mapping. To avoid this fill in the mapping for all possible
> +       CPUs, as the number of CPUs is not known yet.
> +       We round robin the existing nodes. */

Please fix the coding style issue here.
Julien Grall July 18, 2017, 6:16 p.m. UTC | #2
Hi,

On 18/07/17 16:29, Wei Liu wrote:
> On Tue, Jul 18, 2017 at 05:11:30PM +0530, vijay.kilari@gmail.com wrote:
> [...]
>> diff --git a/xen/common/numa.c b/xen/common/numa.c
>> new file mode 100644
>> index 0000000..0381f1b
>> --- /dev/null
>> +++ b/xen/common/numa.c
>> @@ -0,0 +1,487 @@
>> +/*
>> + * Common NUMA handling functions for x86 and arm.
>> + * Original code extracted from arch/x86/numa.c
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms and conditions of the GNU General Public
>> + * License, version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <xen/init.h>
>> +#include <xen/ctype.h>
>> +#include <xen/sched.h>
>> +#include <xen/nodemask.h>
>> +#include <xen/numa.h>
>> +#include <xen/keyhandler.h>
>> +#include <xen/time.h>
>> +#include <xen/smp.h>
>> +#include <xen/pfn.h>
>> +#include <xen/mm.h>
>> +#include <xen/softirq.h>
>> +#include <xen/string.h>
>> +#include <asm/acpi.h>
>> +
>
> Since you're moving code anyway, please sort the headers alphabetically.
>
>> +static int numa_setup(char *s);
>> +custom_param("numa", numa_setup);
>> +
>> +struct node_data node_data[MAX_NUMNODES];
>> +
>> +/* Mapping from pdx to node id */
>
> Is this comment applicable to ARM? Does arm has PDX?

Yes ARM has PDX. For new architecture we expect the code to provide 
dummy helpers if they want to support NUMA.

>
>> +unsigned int memnode_shift;
>> +
>> +/*
>> + * In case of numa init failure or numa off,
>> + * memnode_shift is initialized to BITS_PER_LONG - 1. Hence allocate
>> + * memnodemap[] of BITS_PER_LONG.
>> + */
>> +static typeof(*memnodemap) _memnodemap[BITS_PER_LONG];
>> +unsigned long memnodemapsize;
>> +uint8_t *memnodemap;
>> +
>> +nodeid_t __read_mostly cpu_to_node[NR_CPUS] = {
>> +    [0 ... NR_CPUS-1] = NUMA_NO_NODE
>> +};
>> +
>> +cpumask_t __read_mostly node_to_cpumask[MAX_NUMNODES];
>> +
>> +bool numa_off;
>> +s8 acpi_numa = 0;
>> +
>> +int srat_disabled(void)
>
> bool here.
>
> Should probably be done in a previous patch.

Actually, the previous version had srat_disabled return bool. I am aware 
that Jan and I requested to keep acpi_numa as int, I didn't find any 
request of keep moving srat_disabled to int. So can you explain why??

>
>> +
>> +void __init numa_init_array(void)
>> +{
>> +    int rr, i;
>> +
>> +    /* There are unfortunately some poorly designed mainboards around
>> +       that only connect memory to a single CPU. This breaks the 1:1 cpu->node
>> +       mapping. To avoid this fill in the mapping for all possible
>> +       CPUs, as the number of CPUs is not known yet.
>> +       We round robin the existing nodes. */
>
> Please fix the coding style issue here.
>

Cheers,
Vijay Kilari July 19, 2017, 6:47 a.m. UTC | #3
On Tue, Jul 18, 2017 at 11:46 PM, Julien Grall <julien.grall@arm.com> wrote:
> Hi,
>
>
> On 18/07/17 16:29, Wei Liu wrote:
>>
>> On Tue, Jul 18, 2017 at 05:11:30PM +0530, vijay.kilari@gmail.com wrote:
>> [...]
>>>
>>> diff --git a/xen/common/numa.c b/xen/common/numa.c
>>> new file mode 100644
>>> index 0000000..0381f1b
>>> --- /dev/null
>>> +++ b/xen/common/numa.c
>>> @@ -0,0 +1,487 @@
>>> +/*
>>> + * Common NUMA handling functions for x86 and arm.
>>> + * Original code extracted from arch/x86/numa.c
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> + * modify it under the terms and conditions of the GNU General Public
>>> + * License, version 2, as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
>>> + * along with this program; If not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#include <xen/init.h>
>>> +#include <xen/ctype.h>
>>> +#include <xen/sched.h>
>>> +#include <xen/nodemask.h>
>>> +#include <xen/numa.h>
>>> +#include <xen/keyhandler.h>
>>> +#include <xen/time.h>
>>> +#include <xen/smp.h>
>>> +#include <xen/pfn.h>
>>> +#include <xen/mm.h>
>>> +#include <xen/softirq.h>
>>> +#include <xen/string.h>
>>> +#include <asm/acpi.h>
>>> +
>>
>>
>> Since you're moving code anyway, please sort the headers alphabetically.
>>
>>> +static int numa_setup(char *s);
>>> +custom_param("numa", numa_setup);
>>> +
>>> +struct node_data node_data[MAX_NUMNODES];
>>> +
>>> +/* Mapping from pdx to node id */
>>
>>
>> Is this comment applicable to ARM? Does arm has PDX?
>
>
> Yes ARM has PDX. For new architecture we expect the code to provide dummy
> helpers if they want to support NUMA.
>
>>
>>> +unsigned int memnode_shift;
>>> +
>>> +/*
>>> + * In case of numa init failure or numa off,
>>> + * memnode_shift is initialized to BITS_PER_LONG - 1. Hence allocate
>>> + * memnodemap[] of BITS_PER_LONG.
>>> + */
>>> +static typeof(*memnodemap) _memnodemap[BITS_PER_LONG];
>>> +unsigned long memnodemapsize;
>>> +uint8_t *memnodemap;
>>> +
>>> +nodeid_t __read_mostly cpu_to_node[NR_CPUS] = {
>>> +    [0 ... NR_CPUS-1] = NUMA_NO_NODE
>>> +};
>>> +
>>> +cpumask_t __read_mostly node_to_cpumask[MAX_NUMNODES];
>>> +
>>> +bool numa_off;
>>> +s8 acpi_numa = 0;
>>> +
>>> +int srat_disabled(void)
>>
>>
>> bool here.
>>
>> Should probably be done in a previous patch.
>
>
> Actually, the previous version had srat_disabled return bool. I am aware
> that Jan and I requested to keep acpi_numa as int, I didn't find any request
> of keep moving srat_disabled to int. So can you explain why??

My bad. I dropped patch #4 from v2. But this change was part of patch
#4 and missed it out.

>
>>
>>> +
>>> +void __init numa_init_array(void)
>>> +{
>>> +    int rr, i;
>>> +
>>> +    /* There are unfortunately some poorly designed mainboards around
>>> +       that only connect memory to a single CPU. This breaks the 1:1
>>> cpu->node
>>> +       mapping. To avoid this fill in the mapping for all possible
>>> +       CPUs, as the number of CPUs is not known yet.
>>> +       We round robin the existing nodes. */
>>
>>
>> Please fix the coding style issue here.
>>
>
> Cheers,
>
> --
> Julien Grall
Julien Grall July 19, 2017, 5:41 p.m. UTC | #4
Hi Vijay,

On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>
> Move code from xen/arch/x86/numa.c to xen/common/numa.c
> so that it can be used by other archs.
>
> The following changes are done:
> - Few generic static functions in x86/numa.c is made
>   non-static common/numa.c
> - The generic contents of header file asm-x86/numa.h
>   are moved to xen/numa.h.
> - The header file includes are reordered and externs are
>   dropped.
> - Moved acpi_numa from asm-x86/acpi.h to xen/acpi.h
> - Coding style of code moved to commom/numa.c is changed
>   to Xen style.
> - numa_add_cpu() and numa_set_node() and moved to header
>   file and added inline function in case of CONFIG_NUMA
>   is not enabled because these functions are called from
>   generic code with out any config check.
>
> Also the node_online_map is defined in x86/numa.c for x86
> and arm/smpboot.c for ARM. For x86 it is moved to x86/smpboot.c
> If moved to common code the compilation fails because
> common/numa.c is compiled only when NUMA is enabled.

I would much prefer if this patch does one thing: Moving code. The rest 
should be split out to help review and allowing us to easily verify you 
only moved code...

> +#define NODE_DATA(nid)          (&(node_data[nid]))
> +
> +#define node_start_pfn(nid)     NODE_DATA(nid)->node_start_pfn
> +#define node_spanned_pages(nid) NODE_DATA(nid)->node_spanned_pages
> +#define node_end_pfn(nid)       NODE_DATA(nid)->node_start_pfn + \
> +                                 NODE_DATA(nid)->node_spanned_pages
> +
> +void numa_add_cpu(int cpu);
> +void numa_set_node(int cpu, nodeid_t node);
> +#else
> +static inline void numa_add_cpu(int cpu) { }
> +static inline void numa_set_node(int cpu, nodeid_t node) { }

I am not sure why you need to define stub at least for numa_set_node... 
I can't see use in non-NUMA code. I will comment about the numa_add_cpu 
later.

Cheers,
Vijay Kilari July 20, 2017, 8:55 a.m. UTC | #5
On Wed, Jul 19, 2017 at 11:11 PM, Julien Grall <julien.grall@arm.com> wrote:
> Hi Vijay,
>
> On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
>>
>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>
>> Move code from xen/arch/x86/numa.c to xen/common/numa.c
>> so that it can be used by other archs.
>>
>> The following changes are done:
>> - Few generic static functions in x86/numa.c is made
>>   non-static common/numa.c
>> - The generic contents of header file asm-x86/numa.h
>>   are moved to xen/numa.h.
>> - The header file includes are reordered and externs are
>>   dropped.
>> - Moved acpi_numa from asm-x86/acpi.h to xen/acpi.h
>> - Coding style of code moved to commom/numa.c is changed
>>   to Xen style.
>> - numa_add_cpu() and numa_set_node() and moved to header
>>   file and added inline function in case of CONFIG_NUMA
>>   is not enabled because these functions are called from
>>   generic code with out any config check.
>>
>> Also the node_online_map is defined in x86/numa.c for x86
>> and arm/smpboot.c for ARM. For x86 it is moved to x86/smpboot.c
>> If moved to common code the compilation fails because
>> common/numa.c is compiled only when NUMA is enabled.
>
>
> I would much prefer if this patch does one thing: Moving code. The rest
> should be split out to help review and allowing us to easily verify you only
> moved code...

Yes, this patch is doing only code movement. Apart from adding inline function
for numa_add_cpu() and numa_set_node().

>
>> +#define NODE_DATA(nid)          (&(node_data[nid]))
>> +
>> +#define node_start_pfn(nid)     NODE_DATA(nid)->node_start_pfn
>> +#define node_spanned_pages(nid) NODE_DATA(nid)->node_spanned_pages
>> +#define node_end_pfn(nid)       NODE_DATA(nid)->node_start_pfn + \
>> +                                 NODE_DATA(nid)->node_spanned_pages
>> +
>> +void numa_add_cpu(int cpu);
>> +void numa_set_node(int cpu, nodeid_t node);
>> +#else
>> +static inline void numa_add_cpu(int cpu) { }
>> +static inline void numa_set_node(int cpu, nodeid_t node) { }
>
>
> I am not sure why you need to define stub at least for numa_set_node... I
> can't see use in non-NUMA code. I will comment about the numa_add_cpu later.

x86 is using from setup.c. yes if we assume that numa is always enabled for x86,
I can drop numa_set_node() inline function.

>
> Cheers,
>
> --
> Julien Grall
Julien Grall July 20, 2017, 11:14 a.m. UTC | #6
Hi Vijay,

On 20/07/17 09:55, Vijay Kilari wrote:
> On Wed, Jul 19, 2017 at 11:11 PM, Julien Grall <julien.grall@arm.com> wrote:
>> Hi Vijay,
>>
>> On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
>>>
>>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>>
>>> Move code from xen/arch/x86/numa.c to xen/common/numa.c
>>> so that it can be used by other archs.
>>>
>>> The following changes are done:
>>> - Few generic static functions in x86/numa.c is made
>>>   non-static common/numa.c
>>> - The generic contents of header file asm-x86/numa.h
>>>   are moved to xen/numa.h.
>>> - The header file includes are reordered and externs are
>>>   dropped.
>>> - Moved acpi_numa from asm-x86/acpi.h to xen/acpi.h
>>> - Coding style of code moved to commom/numa.c is changed
>>>   to Xen style.
>>> - numa_add_cpu() and numa_set_node() and moved to header
>>>   file and added inline function in case of CONFIG_NUMA
>>>   is not enabled because these functions are called from
>>>   generic code with out any config check.
>>>
>>> Also the node_online_map is defined in x86/numa.c for x86
>>> and arm/smpboot.c for ARM. For x86 it is moved to x86/smpboot.c
>>> If moved to common code the compilation fails because
>>> common/numa.c is compiled only when NUMA is enabled.
>>
>>
>> I would much prefer if this patch does one thing: Moving code. The rest
>> should be split out to help review and allowing us to easily verify you only
>> moved code...
>
> Yes, this patch is doing only code movement. Apart from adding inline function
> for numa_add_cpu() and numa_set_node().

The "apart" should then be in a separate patch. I don't want to spend 
hours trying to decipher a patch mixing code movement and add code at 
the same time.

>
>>
>>> +#define NODE_DATA(nid)          (&(node_data[nid]))
>>> +
>>> +#define node_start_pfn(nid)     NODE_DATA(nid)->node_start_pfn
>>> +#define node_spanned_pages(nid) NODE_DATA(nid)->node_spanned_pages
>>> +#define node_end_pfn(nid)       NODE_DATA(nid)->node_start_pfn + \
>>> +                                 NODE_DATA(nid)->node_spanned_pages
>>> +
>>> +void numa_add_cpu(int cpu);
>>> +void numa_set_node(int cpu, nodeid_t node);
>>> +#else
>>> +static inline void numa_add_cpu(int cpu) { }
>>> +static inline void numa_set_node(int cpu, nodeid_t node) { }
>>
>>
>> I am not sure why you need to define stub at least for numa_set_node... I
>> can't see use in non-NUMA code. I will comment about the numa_add_cpu later.
>
> x86 is using from setup.c. yes if we assume that numa is always enabled for x86,
> I can drop numa_set_node() inline function.

Looking at the code, I don't think there is any way to disable NUMA on 
x86 at the moment... So there is no point to keep it.

Cheers,
Stefano Stabellini July 24, 2017, 8:28 p.m. UTC | #7
On Wed, 19 Jul 2017, Julien Grall wrote:
> Hi Vijay,
> 
> On 18/07/17 12:41, vijay.kilari@gmail.com wrote:
> > From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
> > 
> > Move code from xen/arch/x86/numa.c to xen/common/numa.c
> > so that it can be used by other archs.
> > 
> > The following changes are done:
> > - Few generic static functions in x86/numa.c is made
> >   non-static common/numa.c
> > - The generic contents of header file asm-x86/numa.h
> >   are moved to xen/numa.h.
> > - The header file includes are reordered and externs are
> >   dropped.
> > - Moved acpi_numa from asm-x86/acpi.h to xen/acpi.h
> > - Coding style of code moved to commom/numa.c is changed
> >   to Xen style.
> > - numa_add_cpu() and numa_set_node() and moved to header
> >   file and added inline function in case of CONFIG_NUMA
> >   is not enabled because these functions are called from
> >   generic code with out any config check.
> > 
> > Also the node_online_map is defined in x86/numa.c for x86
> > and arm/smpboot.c for ARM. For x86 it is moved to x86/smpboot.c
> > If moved to common code the compilation fails because
> > common/numa.c is compiled only when NUMA is enabled.
> 
> I would much prefer if this patch does one thing: Moving code. The rest should
> be split out to help review and allowing us to easily verify you only moved
> code...

Indeed. However for the sake of making things easier, I did go through
the patch line by line (manually and automatically) to check the code
movement and it is correct.


> > +#define NODE_DATA(nid)          (&(node_data[nid]))
> > +
> > +#define node_start_pfn(nid)     NODE_DATA(nid)->node_start_pfn
> > +#define node_spanned_pages(nid) NODE_DATA(nid)->node_spanned_pages
> > +#define node_end_pfn(nid)       NODE_DATA(nid)->node_start_pfn + \
> > +                                 NODE_DATA(nid)->node_spanned_pages
> > +
> > +void numa_add_cpu(int cpu);
> > +void numa_set_node(int cpu, nodeid_t node);
> > +#else
> > +static inline void numa_add_cpu(int cpu) { }
> > +static inline void numa_set_node(int cpu, nodeid_t node) { }
> 
> I am not sure why you need to define stub at least for numa_set_node... I
> can't see use in non-NUMA code. I will comment about the numa_add_cpu later.
diff mbox

Patch

diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index 44c2e08..654530b 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -10,323 +10,17 @@ 
 #include <xen/ctype.h>
 #include <xen/nodemask.h>
 #include <xen/numa.h>
-#include <xen/keyhandler.h>
 #include <xen/time.h>
 #include <xen/smp.h>
 #include <xen/pfn.h>
 #include <asm/acpi.h>
-#include <xen/sched.h>
-#include <xen/softirq.h>
-
-static int numa_setup(char *s);
-custom_param("numa", numa_setup);
-
-struct node_data node_data[MAX_NUMNODES];
-
-/* Mapping from pdx to node id */
-unsigned int memnode_shift;
 
 /*
- * In case of numa init failure or numa off,
- * memnode_shift is initialized to BITS_PER_LONG - 1. Hence allocate
- * memnodemap[] of BITS_PER_LONG.
- */
-static typeof(*memnodemap) _memnodemap[BITS_PER_LONG];
-unsigned long memnodemapsize;
-uint8_t *memnodemap;
-
-nodeid_t __read_mostly cpu_to_node[NR_CPUS] = {
-    [0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
-/*
  * Keep BIOS's CPU2node information, should not be used for memory allocaion
  */
 nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
     [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
-cpumask_t __read_mostly node_to_cpumask[MAX_NUMNODES];
-
-nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
-
-bool numa_off;
-s8 acpi_numa = 0;
-
-int srat_disabled(void)
-{
-    return numa_off || acpi_numa < 0;
-}
-
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 0 if OK
- * -ENOSPC if memnodmap[] too small (or shift too small)
- * -EINVAL if node overlap or lost ram (shift too big)
- */
-static int __init populate_memnodemap(const struct node *nodes,
-                                      unsigned int numnodes, unsigned int shift,
-                                      nodeid_t *nodeids)
-{
-    unsigned long spdx, epdx;
-    int i, res = -EINVAL;
-
-    memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap));
-    for ( i = 0; i < numnodes; i++ )
-    {
-        spdx = paddr_to_pdx(nodes[i].start);
-        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
-        if ( spdx >= epdx )
-            continue;
-        if ( (epdx >> shift) >= memnodemapsize )
-            return -ENOSPC;
-        do {
-            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
-                return -EINVAL;
-
-            if ( !nodeids )
-                memnodemap[spdx >> shift] = i;
-            else
-                memnodemap[spdx >> shift] = nodeids[i];
-
-            spdx += (1UL << shift);
-        } while ( spdx < epdx );
-        res = 0;
-    }
-
-    return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
-    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
-    unsigned long mfn = alloc_boot_pages(size, 1);
-
-    if ( !mfn )
-    {
-        printk(KERN_ERR
-               "NUMA: Unable to allocate Memory to Node hash map\n");
-        memnodemapsize = 0;
-        return -ENOMEM;
-    }
-
-    memnodemap = mfn_to_virt(mfn);
-    mfn <<= PAGE_SHIFT;
-    size <<= PAGE_SHIFT;
-    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
-           mfn, mfn + size);
-    memnodemapsize = size / sizeof(*memnodemap);
-
-    return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static unsigned int __init extract_lsb_from_nodes(const struct node *nodes,
-                                                  unsigned int numnodes)
-{
-    unsigned int i, nodes_used = 0;
-    unsigned long spdx, epdx;
-    unsigned long bitfield = 0, memtop = 0;
-
-    for ( i = 0; i < numnodes; i++ )
-    {
-        spdx = paddr_to_pdx(nodes[i].start);
-        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
-        if ( spdx >= epdx )
-            continue;
-        bitfield |= spdx;
-        nodes_used++;
-        if ( epdx > memtop )
-            memtop = epdx;
-    }
-    if ( nodes_used <= 1 )
-        i = BITS_PER_LONG - 1;
-    else
-        i = find_first_bit(&bitfield, sizeof(unsigned long) * 8);
-    memnodemapsize = (memtop >> i) + 1;
-
-    return i;
-}
-
-int __init compute_memnode_shift(struct node *nodes, unsigned int numnodes,
-                                 nodeid_t *nodeids)
-{
-    int ret;
-
-    memnode_shift = extract_lsb_from_nodes(nodes, numnodes);
-
-    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
-        memnodemap = _memnodemap;
-    else if ( allocate_cachealigned_memnodemap() )
-        return -ENOMEM;
-
-    printk(KERN_DEBUG "NUMA: Using %u for the hash shift.\n", memnode_shift);
-
-    ret = populate_memnodemap(nodes, numnodes, memnode_shift, nodeids);
-    if ( ret )
-    {
-        printk(KERN_INFO "Your memory is not aligned you need to "
-               "rebuild your hypervisor with a bigger NODEMAPSIZE "
-               "shift=%u\n", memnode_shift);
-        return ret;
-    }
-
-    return 0;
-}
-/* initialize NODE_DATA given nodeid and start/end */
-void __init setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end)
-{
-    unsigned long start_pfn, end_pfn;
-
-    start_pfn = paddr_to_pfn(start);
-    end_pfn = paddr_to_pfn(end);
-
-    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-
-    node_set_online(nodeid);
-}
-
-void __init numa_init_array(void)
-{
-    int rr, i;
-
-    /* There are unfortunately some poorly designed mainboards around
-       that only connect memory to a single CPU. This breaks the 1:1 cpu->node
-       mapping. To avoid this fill in the mapping for all possible
-       CPUs, as the number of CPUs is not known yet.
-       We round robin the existing nodes. */
-    rr = first_node(node_online_map);
-    for ( i = 0; i < nr_cpu_ids; i++ )
-    {
-        if ( cpu_to_node[i] != NUMA_NO_NODE )
-            continue;
-        numa_set_node(i, rr);
-        rr = next_node(rr, node_online_map);
-        if ( rr == MAX_NUMNODES )
-            rr = first_node(node_online_map);
-    }
-}
-
-#ifdef CONFIG_NUMA_EMU
-static unsigned int __initdata numa_fake;
-
-/* Numa emulation */
-static int __init numa_emulation(uint64_t start_pfn, uint64_t end_pfn)
-{
-    unsigned int i;
-    struct node nodes[MAX_NUMNODES];
-    uint64_t sz = ((end_pfn - start_pfn) << PAGE_SHIFT) / numa_fake;
-
-    /* Kludge needed for the hash function */
-    if ( hweight64(sz) > 1 )
-    {
-        uint64_t x = 1;
-
-        while ( (x << 1) < sz )
-            x <<= 1;
-        if ( x < sz / 2 )
-            printk(KERN_ERR
-                   "Numa emulation unbalanced. Complain to maintainer\n");
-        sz = x;
-    }
-
-    memset(&nodes,0,sizeof(nodes));
-    for ( i = 0; i < numa_fake; i++ )
-    {
-        nodes[i].start = pfn_to_paddr(start_pfn) + i * sz;
-        if ( i == numa_fake - 1 )
-            sz = pfn_to_paddr(end_pfn) - nodes[i].start;
-        nodes[i].end = nodes[i].start + sz;
-        printk(KERN_INFO
-               "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
-               i, nodes[i].start, nodes[i].end,
-               (nodes[i].end - nodes[i].start) >> 20);
-        node_set_online(i);
-    }
-    if ( compute_memnode_shift(nodes, numa_fake, NULL) )
-    {
-        memnode_shift = 0;
-        printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
-        return -1;
-    }
-    for_each_online_node ( i )
-        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-    numa_init_array();
-
-    return 0;
-}
-#endif
-
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
-    int i;
-
-#ifdef CONFIG_NUMA_EMU
-    if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
-        return;
-#endif
-
-#ifdef CONFIG_ACPI_NUMA
-    if ( !numa_off &&
-         !numa_scan_nodes(pfn_to_paddr(start_pfn), pfn_to_paddr(end_pfn)) )
-        return;
-#endif
-
-    printk(KERN_INFO "%s\n",
-           numa_off ? "NUMA turned off" : "No NUMA configuration found");
-
-    printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
-           pfn_to_paddr(start_pfn), pfn_to_paddr(end_pfn));
-    /* setup dummy node covering all memory */
-    memnode_shift = BITS_PER_LONG - 1;
-    memnodemap = _memnodemap;
-    nodes_clear(node_online_map);
-    node_set_online(0);
-    for ( i = 0; i < nr_cpu_ids; i++ )
-        numa_set_node(i, 0);
-    cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
-    setup_node_bootmem(0, pfn_to_paddr(start_pfn), pfn_to_paddr(end_pfn));
-}
-
-void numa_add_cpu(int cpu)
-{
-    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-}
-
-void numa_set_node(int cpu, nodeid_t node)
-{
-    cpu_to_node[cpu] = node;
-}
-
-/* [numa=off] */
-static int __init numa_setup(char *opt)
-{
-    if ( !strncmp(opt, "off", 3) )
-        numa_off = true;
-    if ( !strncmp(opt, "on", 2) )
-        numa_off = false;
-#ifdef CONFIG_NUMA_EMU
-    if ( !strncmp(opt, "fake=", 5) )
-    {
-        numa_off = false;
-        numa_fake = simple_strtoul(opt + 5, NULL, 0);
-        if ( numa_fake >= MAX_NUMNODES )
-            numa_fake = MAX_NUMNODES;
-    }
-#endif
-#ifdef CONFIG_ACPI_NUMA
-    if ( !strncmp(opt,"noacpi", 6) )
-    {
-        numa_off = false;
-        acpi_numa = -1;
-    }
-#endif
-
-    return 1;
-}
 
 /*
  * Setup early cpu_to_node.
@@ -378,148 +72,11 @@  unsigned int __init arch_get_dma_bitsize(void)
                  + PAGE_SHIFT, 32);
 }
 
-static void dump_numa(unsigned char key)
-{
-    s_time_t now = NOW();
-    unsigned int i, j, n;
-    int err;
-    struct domain *d;
-    struct page_info *page;
-    unsigned int page_num_node[MAX_NUMNODES];
-    const struct vnuma_info *vnuma;
-
-    printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
-           (uint32_t)(now >> 32), (uint32_t)now);
-
-    for_each_online_node ( i )
-    {
-        paddr_t pa = pfn_to_paddr(node_start_pfn(i) + 1);
-
-        printk("NODE%u start->%lu size->%lu free->%lu\n",
-               i, node_start_pfn(i), node_spanned_pages(i),
-               avail_node_heap_pages(i));
-        /* sanity check phys_to_nid() */
-        if ( phys_to_nid(pa) != i )
-            printk("phys_to_nid(%"PRIpaddr") -> %d should be %u\n",
-                   pa, phys_to_nid(pa), i);
-    }
-
-    j = cpumask_first(&cpu_online_map);
-    n = 0;
-    for_each_online_cpu ( i )
-    {
-        if ( i != j + n || cpu_to_node[j] != cpu_to_node[i] )
-        {
-            if ( n > 1 )
-                printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
-            else
-                printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
-            j = i;
-            n = 1;
-        }
-        else
-            ++n;
-    }
-    if ( n > 1 )
-        printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
-    else
-        printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
-
-    rcu_read_lock(&domlist_read_lock);
-
-    printk("Memory location of each domain:\n");
-    for_each_domain ( d )
-    {
-        process_pending_softirqs();
-
-        printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages);
-
-        for_each_online_node ( i )
-            page_num_node[i] = 0;
-
-        spin_lock(&d->page_alloc_lock);
-        page_list_for_each(page, &d->page_list)
-        {
-            i = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT);
-            page_num_node[i]++;
-        }
-        spin_unlock(&d->page_alloc_lock);
-
-        for_each_online_node ( i )
-            printk("    Node %u: %u\n", i, page_num_node[i]);
-
-        if ( !read_trylock(&d->vnuma_rwlock) )
-            continue;
-
-        if ( !d->vnuma )
-        {
-            read_unlock(&d->vnuma_rwlock);
-            continue;
-        }
-
-        vnuma = d->vnuma;
-        printk("     %u vnodes, %u vcpus, guest physical layout:\n",
-               vnuma->nr_vnodes, d->max_vcpus);
-        for ( i = 0; i < vnuma->nr_vnodes; i++ )
-        {
-            unsigned int start_cpu = ~0U;
-
-            err = snprintf(keyhandler_scratch, 12, "%3u",
-                    vnuma->vnode_to_pnode[i]);
-            if ( err < 0 || vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
-                strlcpy(keyhandler_scratch, "???", sizeof(keyhandler_scratch));
-
-            printk("       %3u: pnode %s,", i, keyhandler_scratch);
-
-            printk(" vcpus ");
-
-            for ( j = 0; j < d->max_vcpus; j++ )
-            {
-                if ( !(j & 0x3f) )
-                    process_pending_softirqs();
-
-                if ( vnuma->vcpu_to_vnode[j] == i )
-                {
-                    if ( start_cpu == ~0U )
-                    {
-                        printk("%d", j);
-                        start_cpu = j;
-                    }
-                }
-                else if ( start_cpu != ~0U )
-                {
-                    if ( j - 1 != start_cpu )
-                        printk("-%d ", j - 1);
-                    else
-                        printk(" ");
-                    start_cpu = ~0U;
-                }
-            }
-
-            if ( start_cpu != ~0U  && start_cpu != j - 1 )
-                printk("-%d", j - 1);
-
-            printk("\n");
-
-            for ( j = 0; j < vnuma->nr_vmemranges; j++ )
-            {
-                if ( vnuma->vmemrange[j].nid == i )
-                    printk("           %016"PRIx64" - %016"PRIx64"\n",
-                           vnuma->vmemrange[j].start,
-                           vnuma->vmemrange[j].end);
-            }
-        }
-
-        read_unlock(&d->vnuma_rwlock);
-    }
-
-    rcu_read_unlock(&domlist_read_lock);
-}
-
-static int __init register_numa_trigger(void)
-{
-    register_keyhandler('u', dump_numa, "dump NUMA info", 1);
-    return 0;
-}
-__initcall(register_numa_trigger);
-
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 78af0d2..168c9d4 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -58,6 +58,7 @@  DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask);
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, scratch_cpumask);
 static cpumask_t scratch_cpu0mask;
 
+nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
 cpumask_t cpu_online_map __read_mostly;
 EXPORT_SYMBOL(cpu_online_map);
 
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 26c5a64..c8fdaf7 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -64,6 +64,7 @@  obj-y += wait.o
 obj-bin-y += warning.init.o
 obj-$(CONFIG_XENOPROF) += xenoprof.o
 obj-y += xmalloc_tlsf.o
+obj-$(CONFIG_NUMA) += numa.o
 
 obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo unlz4 earlycpio,$(n).init.o)
 
diff --git a/xen/common/numa.c b/xen/common/numa.c
new file mode 100644
index 0000000..0381f1b
--- /dev/null
+++ b/xen/common/numa.c
@@ -0,0 +1,487 @@ 
+/*
+ * Common NUMA handling functions for x86 and arm.
+ * Original code extracted from arch/x86/numa.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/init.h>
+#include <xen/ctype.h>
+#include <xen/sched.h>
+#include <xen/nodemask.h>
+#include <xen/numa.h>
+#include <xen/keyhandler.h>
+#include <xen/time.h>
+#include <xen/smp.h>
+#include <xen/pfn.h>
+#include <xen/mm.h>
+#include <xen/softirq.h>
+#include <xen/string.h>
+#include <asm/acpi.h>
+
+static int numa_setup(char *s);
+custom_param("numa", numa_setup);
+
+struct node_data node_data[MAX_NUMNODES];
+
+/* Mapping from pdx to node id */
+unsigned int memnode_shift;
+
+/*
+ * In case of numa init failure or numa off,
+ * memnode_shift is initialized to BITS_PER_LONG - 1. Hence allocate
+ * memnodemap[] of BITS_PER_LONG.
+ */
+static typeof(*memnodemap) _memnodemap[BITS_PER_LONG];
+unsigned long memnodemapsize;
+uint8_t *memnodemap;
+
+nodeid_t __read_mostly cpu_to_node[NR_CPUS] = {
+    [0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+
+cpumask_t __read_mostly node_to_cpumask[MAX_NUMNODES];
+
+bool numa_off;
+s8 acpi_numa = 0;
+
+int srat_disabled(void)
+{
+    return numa_off || acpi_numa < 0;
+}
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 0 if OK
+ * -ENOSPC if memnodmap[] too small (or shift too small)
+ * -EINVAL if node overlap or lost ram (shift too big)
+ */
+static int __init populate_memnodemap(const struct node *nodes,
+                                      unsigned int numnodes, unsigned int shift,
+                                      nodeid_t *nodeids)
+{
+    unsigned long spdx, epdx;
+    int i, res = -EINVAL;
+
+    memset(memnodemap, NUMA_NO_NODE, memnodemapsize * sizeof(*memnodemap));
+    for ( i = 0; i < numnodes; i++ )
+    {
+        spdx = paddr_to_pdx(nodes[i].start);
+        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
+        if ( spdx >= epdx )
+            continue;
+        if ( (epdx >> shift) >= memnodemapsize )
+            return -ENOSPC;
+        do {
+            if ( memnodemap[spdx >> shift] != NUMA_NO_NODE )
+                return -EINVAL;
+
+            if ( !nodeids )
+                memnodemap[spdx >> shift] = i;
+            else
+                memnodemap[spdx >> shift] = nodeids[i];
+
+            spdx += (1UL << shift);
+        } while ( spdx < epdx );
+        res = 0;
+    }
+
+    return res;
+}
+
+static int __init allocate_cachealigned_memnodemap(void)
+{
+    unsigned long size = PFN_UP(memnodemapsize * sizeof(*memnodemap));
+    unsigned long mfn = alloc_boot_pages(size, 1);
+
+    if ( !mfn )
+    {
+        printk(KERN_ERR
+               "NUMA: Unable to allocate Memory to Node hash map\n");
+        memnodemapsize = 0;
+        return -ENOMEM;
+    }
+
+    memnodemap = mfn_to_virt(mfn);
+    mfn <<= PAGE_SHIFT;
+    size <<= PAGE_SHIFT;
+    printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
+           mfn, mfn + size);
+    memnodemapsize = size / sizeof(*memnodemap);
+
+    return 0;
+}
+
+/*
+ * The LSB of all start and end addresses in the node map is the value of the
+ * maximum possible shift.
+ */
+static unsigned int __init extract_lsb_from_nodes(const struct node *nodes,
+                                                  unsigned int numnodes)
+{
+    unsigned int i, nodes_used = 0;
+    unsigned long spdx, epdx;
+    unsigned long bitfield = 0, memtop = 0;
+
+    for ( i = 0; i < numnodes; i++ )
+    {
+        spdx = paddr_to_pdx(nodes[i].start);
+        epdx = paddr_to_pdx(nodes[i].end - 1) + 1;
+        if ( spdx >= epdx )
+            continue;
+        bitfield |= spdx;
+        nodes_used++;
+        if ( epdx > memtop )
+            memtop = epdx;
+    }
+    if ( nodes_used <= 1 )
+        i = BITS_PER_LONG - 1;
+    else
+        i = find_first_bit(&bitfield, sizeof(unsigned long) * 8);
+    memnodemapsize = (memtop >> i) + 1;
+
+    return i;
+}
+
+int __init compute_memnode_shift(struct node *nodes, unsigned int numnodes,
+                                 nodeid_t *nodeids)
+{
+    int ret;
+
+    memnode_shift = extract_lsb_from_nodes(nodes, numnodes);
+
+    if ( memnodemapsize <= ARRAY_SIZE(_memnodemap) )
+        memnodemap = _memnodemap;
+    else if ( allocate_cachealigned_memnodemap() )
+        return -ENOMEM;
+
+    printk(KERN_DEBUG "NUMA: Using %u for the hash shift.\n", memnode_shift);
+
+    ret = populate_memnodemap(nodes, numnodes, memnode_shift, nodeids);
+    if ( ret )
+    {
+        printk(KERN_INFO "Your memory is not aligned you need to "
+               "rebuild your hypervisor with a bigger NODEMAPSIZE "
+               "shift=%u\n", memnode_shift);
+        return ret;
+    }
+
+    return 0;
+}
+/* initialize NODE_DATA given nodeid and start/end */
+void __init setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end)
+{
+    unsigned long start_pfn, end_pfn;
+
+    start_pfn = paddr_to_pfn(start);
+    end_pfn = paddr_to_pfn(end);
+
+    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
+    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+
+    node_set_online(nodeid);
+}
+
+void __init numa_init_array(void)
+{
+    int rr, i;
+
+    /* There are unfortunately some poorly designed mainboards around
+       that only connect memory to a single CPU. This breaks the 1:1 cpu->node
+       mapping. To avoid this fill in the mapping for all possible
+       CPUs, as the number of CPUs is not known yet.
+       We round robin the existing nodes. */
+    rr = first_node(node_online_map);
+    for ( i = 0; i < nr_cpu_ids; i++ )
+    {
+        if ( cpu_to_node[i] != NUMA_NO_NODE )
+            continue;
+        numa_set_node(i, rr);
+        rr = next_node(rr, node_online_map);
+        if ( rr == MAX_NUMNODES )
+            rr = first_node(node_online_map);
+    }
+}
+
+#ifdef CONFIG_NUMA_EMU
+static unsigned int __initdata numa_fake;
+
+/* Numa emulation */
+static int __init numa_emulation(uint64_t start_pfn, uint64_t end_pfn)
+{
+    unsigned int i;
+    struct node nodes[MAX_NUMNODES];
+    uint64_t sz = ((end_pfn - start_pfn) << PAGE_SHIFT) / numa_fake;
+
+    /* Kludge needed for the hash function */
+    if ( hweight64(sz) > 1 )
+    {
+        uint64_t x = 1;
+
+        while ( (x << 1) < sz )
+            x <<= 1;
+        if ( x < sz / 2 )
+            printk(KERN_ERR
+                   "Numa emulation unbalanced. Complain to maintainer\n");
+        sz = x;
+    }
+
+    memset(&nodes,0,sizeof(nodes));
+    for ( i = 0; i < numa_fake; i++ )
+    {
+        nodes[i].start = pfn_to_paddr(start_pfn) + i * sz;
+        if ( i == numa_fake - 1 )
+            sz = pfn_to_paddr(end_pfn) - nodes[i].start;
+        nodes[i].end = nodes[i].start + sz;
+        printk(KERN_INFO
+               "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
+               i, nodes[i].start, nodes[i].end,
+               (nodes[i].end - nodes[i].start) >> 20);
+        node_set_online(i);
+    }
+    if ( compute_memnode_shift(nodes, numa_fake, NULL) )
+    {
+        memnode_shift = 0;
+        printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
+        return -1;
+    }
+    for_each_online_node ( i )
+        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+    numa_init_array();
+
+    return 0;
+}
+#endif
+
+void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+    int i;
+
+#ifdef CONFIG_NUMA_EMU
+    if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
+        return;
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+    if ( !numa_off &&
+         !numa_scan_nodes(pfn_to_paddr(start_pfn), pfn_to_paddr(end_pfn)) )
+        return;
+#endif
+
+    printk(KERN_INFO "%s\n",
+           numa_off ? "NUMA turned off" : "No NUMA configuration found");
+
+    printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
+           pfn_to_paddr(start_pfn), pfn_to_paddr(end_pfn));
+    /* setup dummy node covering all memory */
+    memnode_shift = BITS_PER_LONG - 1;
+    memnodemap = _memnodemap;
+    nodes_clear(node_online_map);
+    node_set_online(0);
+    for ( i = 0; i < nr_cpu_ids; i++ )
+        numa_set_node(i, 0);
+    cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
+    setup_node_bootmem(0, pfn_to_paddr(start_pfn), pfn_to_paddr(end_pfn));
+}
+
+void numa_add_cpu(int cpu)
+{
+    cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+}
+
+void numa_set_node(int cpu, nodeid_t node)
+{
+    cpu_to_node[cpu] = node;
+}
+
+/* [numa=off] */
+static int __init numa_setup(char *opt)
+{
+    if ( !strncmp(opt, "off", 3) )
+        numa_off = true;
+    if ( !strncmp(opt, "on", 2) )
+        numa_off = false;
+#ifdef CONFIG_NUMA_EMU
+    if ( !strncmp(opt, "fake=", 5) )
+    {
+        numa_off = false;
+        numa_fake = simple_strtoul(opt + 5, NULL, 0);
+        if ( numa_fake >= MAX_NUMNODES )
+            numa_fake = MAX_NUMNODES;
+    }
+#endif
+#ifdef CONFIG_ACPI_NUMA
+    if ( !strncmp(opt,"noacpi", 6) )
+    {
+        numa_off = false;
+        acpi_numa = -1;
+    }
+#endif
+
+    return 1;
+}
+
+static void dump_numa(unsigned char key)
+{
+    s_time_t now = NOW();
+    unsigned int i, j, n;
+    int err;
+    struct domain *d;
+    struct page_info *page;
+    unsigned int page_num_node[MAX_NUMNODES];
+    const struct vnuma_info *vnuma;
+
+    printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
+           (uint32_t)(now >> 32), (uint32_t)now);
+
+    for_each_online_node ( i )
+    {
+        paddr_t pa = pfn_to_paddr(node_start_pfn(i) + 1);
+
+        printk("NODE%u start->%lu size->%lu free->%lu\n",
+               i, node_start_pfn(i), node_spanned_pages(i),
+               avail_node_heap_pages(i));
+        /* sanity check phys_to_nid() */
+        if ( phys_to_nid(pa) != i )
+            printk("phys_to_nid(%"PRIpaddr") -> %d should be %u\n",
+                   pa, phys_to_nid(pa), i);
+    }
+
+    j = cpumask_first(&cpu_online_map);
+    n = 0;
+    for_each_online_cpu ( i )
+    {
+        if ( i != j + n || cpu_to_node[j] != cpu_to_node[i] )
+        {
+            if ( n > 1 )
+                printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
+            else
+                printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
+            j = i;
+            n = 1;
+        }
+        else
+            ++n;
+    }
+    if ( n > 1 )
+        printk("CPU%u...%u -> NODE%d\n", j, j + n - 1, cpu_to_node[j]);
+    else
+        printk("CPU%u -> NODE%d\n", j, cpu_to_node[j]);
+
+    rcu_read_lock(&domlist_read_lock);
+
+    printk("Memory location of each domain:\n");
+    for_each_domain ( d )
+    {
+        process_pending_softirqs();
+
+        printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages);
+
+        for_each_online_node ( i )
+            page_num_node[i] = 0;
+
+        spin_lock(&d->page_alloc_lock);
+        page_list_for_each(page, &d->page_list)
+        {
+            i = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT);
+            page_num_node[i]++;
+        }
+        spin_unlock(&d->page_alloc_lock);
+
+        for_each_online_node ( i )
+            printk("    Node %u: %u\n", i, page_num_node[i]);
+
+        if ( !read_trylock(&d->vnuma_rwlock) )
+            continue;
+
+        if ( !d->vnuma )
+        {
+            read_unlock(&d->vnuma_rwlock);
+            continue;
+        }
+
+        vnuma = d->vnuma;
+        printk("     %u vnodes, %u vcpus, guest physical layout:\n",
+               vnuma->nr_vnodes, d->max_vcpus);
+        for ( i = 0; i < vnuma->nr_vnodes; i++ )
+        {
+            unsigned int start_cpu = ~0U;
+
+            err = snprintf(keyhandler_scratch, 12, "%3u",
+                    vnuma->vnode_to_pnode[i]);
+            if ( err < 0 || vnuma->vnode_to_pnode[i] == NUMA_NO_NODE )
+                strlcpy(keyhandler_scratch, "???", sizeof(keyhandler_scratch));
+
+            printk("       %3u: pnode %s,", i, keyhandler_scratch);
+
+            printk(" vcpus ");
+
+            for ( j = 0; j < d->max_vcpus; j++ )
+            {
+                if ( !(j & 0x3f) )
+                    process_pending_softirqs();
+
+                if ( vnuma->vcpu_to_vnode[j] == i )
+                {
+                    if ( start_cpu == ~0U )
+                    {
+                        printk("%d", j);
+                        start_cpu = j;
+                    }
+                }
+                else if ( start_cpu != ~0U )
+                {
+                    if ( j - 1 != start_cpu )
+                        printk("-%d ", j - 1);
+                    else
+                        printk(" ");
+                    start_cpu = ~0U;
+                }
+            }
+
+            if ( start_cpu != ~0U  && start_cpu != j - 1 )
+                printk("-%d", j - 1);
+
+            printk("\n");
+
+            for ( j = 0; j < vnuma->nr_vmemranges; j++ )
+            {
+                if ( vnuma->vmemrange[j].nid == i )
+                    printk("           %016"PRIx64" - %016"PRIx64"\n",
+                           vnuma->vmemrange[j].start,
+                           vnuma->vmemrange[j].end);
+            }
+        }
+
+        read_unlock(&d->vnuma_rwlock);
+    }
+
+    rcu_read_unlock(&domlist_read_lock);
+}
+
+static int __init register_numa_trigger(void)
+{
+    register_keyhandler('u', dump_numa, "dump NUMA info", 1);
+    return 0;
+}
+__initcall(register_numa_trigger);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h
index 220c2d7..a65c85f 100644
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -103,7 +103,6 @@  extern void acpi_reserve_bootmem(void);
 
 #define ARCH_HAS_POWER_INIT	1
 
-extern s8 acpi_numa;
 extern int numa_scan_nodes(paddr_t start, paddr_t end);
 
 #ifdef CONFIG_ACPI_SLEEP
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index acf509c..41bb3ef 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -3,76 +3,20 @@ 
 
 #include <xen/cpumask.h>
 
-#define MAX_NUMNODES    NR_NODES
-#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
-
 typedef uint8_t nodeid_t;
 
 extern int srat_rev;
 
-extern nodeid_t      cpu_to_node[NR_CPUS];
-extern cpumask_t     node_to_cpumask[];
-
-#define cpu_to_node(cpu)         (cpu_to_node[cpu])
-#define parent_node(node)        (node)
-#define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
-#define node_to_cpumask(node)    (node_to_cpumask[node])
-
-struct node {
-    paddr_t start;
-    paddr_t end;
-};
-
-extern int compute_memnode_shift(struct node *nodes, unsigned int numnodes,
-                                 nodeid_t *nodeids);
 extern nodeid_t pxm_to_node(unsigned int pxm);
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
-extern void numa_add_cpu(int cpu);
-extern void numa_init_array(void);
-extern bool numa_off;
-
-
-extern int srat_disabled(void);
-extern void numa_set_node(int cpu, nodeid_t node);
 extern nodeid_t acpi_setup_node(unsigned int pxm);
 extern void srat_detect_node(int cpu);
 
-extern void setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end);
 extern nodeid_t apicid_to_node[];
 extern void init_cpu_to_node(void);
 
-/* Simple perfect hash to map pdx to node numbers */
-extern unsigned int memnode_shift;
-extern unsigned long memnodemapsize;
-extern uint8_t *memnodemap;
-
-struct node_data {
-    unsigned long node_start_pfn;
-    unsigned long node_spanned_pages;
-};
-
-extern struct node_data node_data[];
-
-static inline __attribute_pure__ nodeid_t phys_to_nid(paddr_t addr)
-{
-   nodeid_t nid;
-
-   ASSERT((paddr_to_pdx(addr) >> memnode_shift) < memnodemapsize);
-   nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
-   ASSERT(nid <= MAX_NUMNODES || !node_data[nid].node_start_pfn);
-
-   return nid;
-}
-
-#define NODE_DATA(nid)          (&(node_data[nid]))
-
-#define node_start_pfn(nid)     NODE_DATA(nid)->node_start_pfn
-#define node_spanned_pages(nid) NODE_DATA(nid)->node_spanned_pages
-#define node_end_pfn(nid)       NODE_DATA(nid)->node_start_pfn + \
-                                 NODE_DATA(nid)->node_spanned_pages
-
 extern int valid_numa_range(paddr_t start, paddr_t end, nodeid_t node);
 
 void srat_parse_regions(paddr_t addr);
diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
index c5b3d4e..cfd83d6 100644
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -26,7 +26,6 @@  int transmeta_init_cpu(void);
 
 void set_nr_cpu_ids(unsigned int max_cpus);
 
-void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
 void arch_init_memory(void);
 void subarch_init_memory(void);
 
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 3bb4afc..c6bbbdf 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -1,11 +1,75 @@ 
 #ifndef _XEN_NUMA_H
 #define _XEN_NUMA_H
 
+#include <xen/cpumask.h>
+#include <xen/mm.h>
 #include <asm/numa.h>
 
 #define NUMA_NO_NODE     0xFF
 #define NUMA_NO_DISTANCE 0xFF
 
+#define MAX_NUMNODES    NR_NODES
+#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
+
+struct node {
+    paddr_t start;
+    paddr_t end;
+};
+
+extern nodeid_t      cpu_to_node[NR_CPUS];
+extern cpumask_t     node_to_cpumask[];
+/* Simple perfect hash to map pdx to node numbers */
+extern unsigned int memnode_shift;
+extern unsigned long memnodemapsize;
+extern uint8_t *memnodemap;
+extern bool numa_off;
+extern s8 acpi_numa;
+
+void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+int compute_memnode_shift(struct node *nodes, unsigned int numnodes,
+                          nodeid_t *nodeids);
+int srat_disabled(void);
+void numa_init_array(void);
+void setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end);
+
+#ifdef CONFIG_NUMA
+#define cpu_to_node(cpu)         (cpu_to_node[cpu])
+#define parent_node(node)        (node)
+#define node_to_first_cpu(node)  (__ffs(node_to_cpumask[node]))
+#define node_to_cpumask(node)    (node_to_cpumask[node])
+
+struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+};
+
+extern struct node_data node_data[];
+
+static inline __attribute_pure__ nodeid_t phys_to_nid(paddr_t addr)
+{
+   nodeid_t nid;
+
+   ASSERT((paddr_to_pdx(addr) >> memnode_shift) < memnodemapsize);
+   nid = memnodemap[paddr_to_pdx(addr) >> memnode_shift];
+   ASSERT(nid <= MAX_NUMNODES || !node_data[nid].node_start_pfn);
+
+   return nid;
+}
+
+#define NODE_DATA(nid)          (&(node_data[nid]))
+
+#define node_start_pfn(nid)     NODE_DATA(nid)->node_start_pfn
+#define node_spanned_pages(nid) NODE_DATA(nid)->node_spanned_pages
+#define node_end_pfn(nid)       NODE_DATA(nid)->node_start_pfn + \
+                                 NODE_DATA(nid)->node_spanned_pages
+
+void numa_add_cpu(int cpu);
+void numa_set_node(int cpu, nodeid_t node);
+#else
+static inline void numa_add_cpu(int cpu) { }
+static inline void numa_set_node(int cpu, nodeid_t node) { }
+#endif
+
 #define vcpu_to_node(v) (cpu_to_node((v)->processor))
 
 #define domain_to_node(d) \