@@ -1769,6 +1769,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that the amount of memory usable for all allocations
is not too small.
+ movablenode [KNL,X86] This parameter enables/disables the
+ kernel to arrange hotpluggable memory ranges recorded
+ in ACPI SRAT(System Resource Affinity Table) as
+ ZONE_MOVABLE. And these memory can be hot-removed when
+ the system is up.
+ By specifying this option, all the hotpluggable memory
+ will be in ZONE_MOVABLE, which the kernel cannot use.
+ This will cause NUMA performance down. For users who
+ care about NUMA performance, just don't use it.
+ If all the memory ranges in the system are hotpluggable,
+ then the ones used by the kernel at early time, such as
+ kernel code and data segments, initrd file and so on,
+ won't be set as ZONE_MOVABLE, and won't be hotpluggable.
+ Otherwise the kernel won't have enough memory to boot.
+
MTD_Partition= [MTD]
Format: <name>,<region-number>,<size>,<offset>
@@ -1104,6 +1104,31 @@ void __init setup_arch(char **cmdline_p)
trim_platform_memory_ranges();
trim_low_memory_range();
+#ifdef CONFIG_MOVABLE_NODE
+ if (movablenode_enable_srat) {
+ /*
+ * Memory used by the kernel cannot be hot-removed because Linux
+ * cannot migrate the kernel pages. When memory hotplug is
+ * enabled, we should prevent memblock from allocating memory
+ * for the kernel.
+ *
+ * ACPI SRAT records all hotpluggable memory ranges. But before
+ * SRAT is parsed, we don't know about it.
+ *
+ * The kernel image is loaded into memory at very early time. We
+ * cannot prevent this anyway. So on NUMA system, we set any
+ * node the kernel resides in as un-hotpluggable.
+ *
+ * Since on modern servers, one node could have double-digit
+ * gigabytes memory, we can assume the memory around the kernel
+ * image is also un-hotpluggable. So before SRAT is parsed, just
+ * allocate memory near the kernel image to try the best to keep
+ * the kernel away from hotpluggable memory.
+ */
+ memblock_set_current_direction(MEMBLOCK_DIRECTION_LOW_TO_HIGH);
+ }
+#endif /* CONFIG_MOVABLE_NODE */
+
init_mem_mapping();
early_trap_pf_init();
@@ -1142,6 +1167,17 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init();
initmem_init();
+
+#ifdef CONFIG_MOVABLE_NODE
+ if (movablenode_enable_srat) {
+ /*
+ * When ACPI SRAT is parsed, which is done in initmem_init(),
+ * set memblock back to the default behavior.
+ */
+ memblock_set_current_direction(MEMBLOCK_DIRECTION_DEFAULT);
+ }
+#endif /* CONFIG_MOVABLE_NODE */
+
memblock_find_dma_reserve();
/*
@@ -33,6 +33,11 @@ enum {
ONLINE_MOVABLE,
};
+#ifdef CONFIG_MOVABLE_NODE
+/* Enable/disable SRAT in movablenode boot option */
+extern bool movablenode_enable_srat;
+#endif /* CONFIG_MOVABLE_NODE */
+
/*
* pgdat resizing functions
*/
@@ -1345,6 +1345,15 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
{
return true;
}
+
+bool __initdata movablenode_enable_srat;
+
+static int __init cmdline_parse_movablenode(char *p)
+{
+ movablenode_enable_srat = true;
+ return 0;
+}
+early_param("movablenode", cmdline_parse_movablenode);
#else /* CONFIG_MOVABLE_NODE */
/* ensure the node has NORMAL memory if it is still online */
static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)