@@ -54,7 +54,7 @@ enum cpuhp_state {
CPUHP_MM_MEMCQ_DEAD,
CPUHP_PERCPU_CNT_DEAD,
CPUHP_RADIX_DEAD,
- CPUHP_PAGE_ALLOC_DEAD,
+ CPUHP_PAGE_ALLOC,
CPUHP_NET_DEV_DEAD,
CPUHP_PCI_XGENE_DEAD,
CPUHP_IOMMU_IOVA_DEAD,
@@ -206,7 +206,7 @@ extern int user_min_free_kbytes;
extern void free_unref_page(struct page *page);
extern void free_unref_page_list(struct list_head *list);
-extern void zone_pcp_update(struct zone *zone);
+extern void zone_pcp_update(struct zone *zone, int cpu_online);
extern void zone_pcp_reset(struct zone *zone);
extern void zone_pcp_disable(struct zone *zone);
extern void zone_pcp_enable(struct zone *zone);
@@ -6667,7 +6667,7 @@ static int zone_batchsize(struct zone *z
#endif
}
-static int zone_highsize(struct zone *zone, int batch)
+static int zone_highsize(struct zone *zone, int batch, int cpu_online)
{
#ifdef CONFIG_MMU
int high;
@@ -6678,9 +6678,10 @@ static int zone_highsize(struct zone *zo
* so that if they are full then background reclaim will not be
* started prematurely. The value is split across all online CPUs
* local to the zone. Note that early in boot that CPUs may not be
- * online yet.
+ * online yet and that during CPU hotplug that the cpumask is not
+ * yet updated when a CPU is being onlined.
*/
- nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
+ nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))) + cpu_online;
high = low_wmark_pages(zone) / nr_local_cpus;
/*
@@ -6754,12 +6755,12 @@ static void __zone_set_pageset_high_and_
* Calculate and set new high and batch values for all per-cpu pagesets of a
* zone based on the zone's size.
*/
-static void zone_set_pageset_high_and_batch(struct zone *zone)
+static void zone_set_pageset_high_and_batch(struct zone *zone, int cpu_online)
{
int new_high, new_batch;
new_batch = max(1, zone_batchsize(zone));
- new_high = zone_highsize(zone, new_batch);
+ new_high = zone_highsize(zone, new_batch, cpu_online);
if (zone->pageset_high == new_high &&
zone->pageset_batch == new_batch)
@@ -6789,7 +6790,7 @@ void __meminit setup_zone_pageset(struct
per_cpu_pages_init(pcp, pzstats);
}
- zone_set_pageset_high_and_batch(zone);
+ zone_set_pageset_high_and_batch(zone, 0);
}
/*
@@ -8044,6 +8045,7 @@ void __init set_dma_reserve(unsigned lon
static int page_alloc_cpu_dead(unsigned int cpu)
{
+ struct zone *zone;
lru_add_drain_cpu(cpu);
drain_pages(cpu);
@@ -8064,6 +8066,19 @@ static int page_alloc_cpu_dead(unsigned
* race with what we are doing.
*/
cpu_vm_stats_fold(cpu);
+
+ for_each_populated_zone(zone)
+ zone_pcp_update(zone, 0);
+
+ return 0;
+}
+
+static int page_alloc_cpu_online(unsigned int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone)
+ zone_pcp_update(zone, 1);
return 0;
}
@@ -8089,8 +8104,9 @@ void __init page_alloc_init(void)
hashdist = 0;
#endif
- ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
- "mm/page_alloc:dead", NULL,
+ ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC,
+ "mm/page_alloc:pcp",
+ page_alloc_cpu_online,
page_alloc_cpu_dead);
WARN_ON(ret < 0);
}
@@ -8252,7 +8268,7 @@ void setup_per_zone_wmarks(void)
* and high limits or the limits may be inappropriate.
*/
for_each_zone(zone)
- zone_pcp_update(zone);
+ zone_pcp_update(zone, 0);
}
/*
@@ -9053,10 +9069,10 @@ EXPORT_SYMBOL(free_contig_range);
* The zone indicated has a new number of managed_pages; batch sizes and percpu
* page high values need to be recalculated.
*/
-void __meminit zone_pcp_update(struct zone *zone)
+void zone_pcp_update(struct zone *zone, int cpu_online)
{
mutex_lock(&pcp_batch_high_lock);
- zone_set_pageset_high_and_batch(zone);
+ zone_set_pageset_high_and_batch(zone, cpu_online);
mutex_unlock(&pcp_batch_high_lock);
}