@@ -42,6 +42,15 @@
#include "device-assignment.h"
#include "kvm.h"
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#include <numaif.h>
+#ifndef MPOL_F_RELATIVE_NODES
+ #define MPOL_F_RELATIVE_NODES (1 << 14)
+ #define MPOL_F_STATIC_NODES (1 << 15)
+#endif
+#endif
+
/* output Bochs bios info messages */
//#define DEBUG_BIOS
@@ -882,6 +891,53 @@ void pc_cpus_init(const char *cpu_model)
}
}
+static void bind_numa(ram_addr_t ram_addr)
+{
+#ifdef CONFIG_NUMA
+ int i;
+ char* ram_ptr;
+ ram_addr_t len, ram_offset;
+ int bind_mode;
+
+ ram_ptr = qemu_get_ram_ptr(ram_addr);
+
+ ram_offset = 0;
+ for (i = 0; i < nb_numa_nodes; i++) {
+ len = numa_info[i].guest_mem;
+ if (numa_info[i].flags != 0) {
+ switch (numa_info[i].flags & NODE_HOST_POLICY_MASK) {
+ case NODE_HOST_BIND:
+ bind_mode = MPOL_BIND;
+ break;
+ case NODE_HOST_INTERLEAVE:
+ bind_mode = MPOL_INTERLEAVE;
+ break;
+ case NODE_HOST_PREFERRED:
+ bind_mode = MPOL_PREFERRED;
+ break;
+ default:
+ bind_mode = MPOL_DEFAULT;
+ break;
+ }
+ bind_mode |= (numa_info[i].flags & NODE_HOST_RELATIVE) ?
+ MPOL_F_RELATIVE_NODES : MPOL_F_STATIC_NODES;
+
+ /* This is a workaround for a long standing bug in Linux'
+ * mbind implementation, which cuts off the last specified
+ * node. To stay compatible should this bug be fixed, we
+ * specify one more node and zero this one out.
+ */
+ clear_bit(numa_num_configured_nodes() + 1, numa_info[i].host_mem);
+ if (mbind(ram_ptr + ram_offset, len, bind_mode,
+ numa_info[i].host_mem, numa_num_configured_nodes() + 1, 0))
+ perror("mbind");
+ }
+ ram_offset += len;
+ }
+#endif
+ return;
+}
+
void pc_memory_init(ram_addr_t ram_size,
const char *kernel_filename,
const char *kernel_cmdline,
@@ -919,6 +975,8 @@ void pc_memory_init(ram_addr_t ram_size,
cpu_register_physical_memory(0x100000,
below_4g_mem_size - 0x100000,
ram_addr + 0x100000);
+ bind_numa(ram_addr);
+
#if TARGET_PHYS_ADDR_BITS > 32
cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
ram_addr + below_4g_mem_size);