@@ -608,11 +608,11 @@ void __init paging_init(void)
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
-pg_data_t *arch_alloc_nodedata(int nid)
+pg_data_t * __init arch_alloc_nodedata(int nid)
{
unsigned long size = compute_pernodesize(nid);
- return kzalloc(size, GFP_KERNEL);
+ return memblock_alloc(size, SMP_CACHE_BYTES);
}
void arch_free_nodedata(pg_data_t *pgdat)
@@ -44,7 +44,7 @@ extern void arch_refresh_nodedata(int ni
*/
#define generic_alloc_nodedata(nid) \
({ \
- kzalloc(sizeof(pg_data_t), GFP_KERNEL); \
+ memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \
})
/*
* This definition is just for error path in node hotadd.
@@ -707,4 +707,6 @@ void vunmap_range_noflush(unsigned long
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);
+DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+
#endif /* __MM_INTERNAL_H */
@@ -1162,19 +1162,21 @@ static void reset_node_present_pages(pg_
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-static pg_data_t __ref *hotadd_new_pgdat(int nid)
+static pg_data_t __ref *hotadd_init_pgdat(int nid)
{
struct pglist_data *pgdat;
pgdat = NODE_DATA(nid);
- if (!pgdat) {
- pgdat = arch_alloc_nodedata(nid);
- if (!pgdat)
- return NULL;
+ /*
+ * NODE_DATA is preallocated (free_area_init) but its internal
+ * state is not allocated completely. Add missing pieces.
+ * Completely offline nodes stay around and they just need
+ * reintialization.
+ */
+ if (pgdat->per_cpu_nodestats == &boot_nodestats) {
pgdat->per_cpu_nodestats =
alloc_percpu(struct per_cpu_nodestat);
- arch_refresh_nodedata(nid, pgdat);
} else {
int cpu;
/*
@@ -1193,8 +1195,6 @@ static pg_data_t __ref *hotadd_new_pgdat
}
}
- /* we can use NODE_DATA(nid) from here */
- pgdat->node_id = nid;
pgdat->node_start_pfn = 0;
/* init node's zones as empty zones, we don't have any present pages.*/
@@ -1246,7 +1246,7 @@ static int __try_online_node(int nid, bo
if (node_online(nid))
return 0;
- pgdat = hotadd_new_pgdat(nid);
+ pgdat = hotadd_init_pgdat(nid);
if (!pgdat) {
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
ret = -ENOMEM;
@@ -1445,9 +1445,6 @@ int __ref add_memory_resource(int nid, s
return ret;
error:
- /* rollback pgdat allocation and others */
- if (new_node)
- rollback_node_hotadd(nid);
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
memblock_remove(start, size);
error_mem_hotplug_end:
@@ -6341,7 +6341,7 @@ static void per_cpu_pages_init(struct pe
#define BOOT_PAGESET_BATCH 1
static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset);
static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats);
-static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static void __build_all_zonelists(void *data)
{
@@ -6363,7 +6363,11 @@ static void __build_all_zonelists(void *
if (self && !node_online(self->node_id)) {
build_zonelists(self);
} else {
- for_each_online_node(nid) {
+ /*
+ * All possible nodes have pgdat preallocated
+ * in free_area_init
+ */
+ for_each_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
build_zonelists(pgdat);
@@ -8063,8 +8067,36 @@ void __init free_area_init(unsigned long
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
- for_each_online_node(nid) {
- pg_data_t *pgdat = NODE_DATA(nid);
+ for_each_node(nid) {
+ pg_data_t *pgdat;
+
+ if (!node_online(nid)) {
+ pr_info("Initializing node %d as memoryless\n", nid);
+
+ /* Allocator not initialized yet */
+ pgdat = arch_alloc_nodedata(nid);
+ if (!pgdat) {
+ pr_err("Cannot allocate %zuB for node %d.\n",
+ sizeof(*pgdat), nid);
+ continue;
+ }
+ arch_refresh_nodedata(nid, pgdat);
+ free_area_init_memoryless_node(nid);
+
+ /*
+ * We do not want to confuse userspace by sysfs
+ * files/directories for node without any memory
+ * attached to it, so this node is not marked as
+ * N_MEMORY and not marked online so that no sysfs
+ * hierarchy will be created via register_one_node for
+ * it. The pgdat will get fully initialized by
+ * hotadd_init_pgdat() when memory is hotplugged into
+ * this node.
+ */
+ continue;
+ }
+
+ pgdat = NODE_DATA(nid);
free_area_init_node(nid);
/* Any memory on that node */