@@ -961,7 +961,6 @@ int __ref online_pages(unsigned long pfn
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
- zone_pcp_update(zone);
/* Basic onlining is complete, allow allocation of onlined pages. */
undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
@@ -974,6 +973,7 @@ int __ref online_pages(unsigned long pfn
*/
shuffle_zone(zone);
+ /* reinitialise watermarks and update pcp limits */
init_per_zone_wmark_min();
kswapd_run(nid);
@@ -1829,13 +1829,13 @@ int __ref offline_pages(unsigned long st
adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
adjust_present_page_count(zone, -nr_pages);
+ /* reinitialise watermarks and update pcp limits */
init_per_zone_wmark_min();
if (!populated_zone(zone)) {
zone_pcp_reset(zone);
build_all_zonelists(NULL);
- } else
- zone_pcp_update(zone);
+ }
node_states_clear_node(node, &arg);
if (arg.status_change_nid >= 0) {
@@ -2175,14 +2175,6 @@ void __init page_alloc_init_late(void)
wait_for_completion(&pgdat_init_all_done_comp);
/*
- * The number of managed pages has changed due to the initialisation
- * so the pcpu batch and high limits needs to be updated or the limits
- * will be artificially small.
- */
- for_each_populated_zone(zone)
- zone_pcp_update(zone);
-
- /*
* We initialized the rest of the deferred pages. Permanently disable
* on-demand struct page initialization.
*/
@@ -6633,13 +6625,12 @@ static int zone_batchsize(struct zone *z
int batch;
/*
- * The per-cpu-pages pools are set to around 1000th of the
- * size of the zone.
+ * The number of pages to batch allocate is either ~0.1%
+ * of the zone or 1MB, whichever is smaller. The batch
+ * size is striking a balance between allocation latency
+ * and zone lock contention.
*/
- batch = zone_managed_pages(zone) / 1024;
- /* But no more than a meg. */
- if (batch * PAGE_SIZE > 1024 * 1024)
- batch = (1024 * 1024) / PAGE_SIZE;
+ batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE);
batch /= 4; /* We effectively *= 4 below */
if (batch < 1)
batch = 1;
@@ -6676,6 +6667,34 @@ static int zone_batchsize(struct zone *z
#endif
}
+static int zone_highsize(struct zone *zone, int batch)
+{
+#ifdef CONFIG_MMU
+ int high;
+ int nr_local_cpus;
+
+ /*
+ * The high value of the pcp is based on the zone low watermark
+ * so that if they are full then background reclaim will not be
+ * started prematurely. The value is split across all online CPUs
+ * local to the zone. Note that early in boot that CPUs may not be
+ * online yet.
+ */
+ nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
+ high = low_wmark_pages(zone) / nr_local_cpus;
+
+ /*
+ * Ensure high is at least batch*4. The multiple is based on the
+ * historical relationship between high and batch.
+ */
+ high = max(high, batch << 2);
+
+ return high;
+#else
+ return 0;
+#endif
+}
+
/*
* pcp->high and pcp->batch values are related and generally batch is lower
* than high. They are also related to pcp->count such that count is lower
@@ -6737,11 +6756,10 @@ static void __zone_set_pageset_high_and_
*/
static void zone_set_pageset_high_and_batch(struct zone *zone)
{
- unsigned long new_high, new_batch;
+ int new_high, new_batch;
- new_batch = zone_batchsize(zone);
- new_high = 6 * new_batch;
- new_batch = max(1UL, 1 * new_batch);
+ new_batch = max(1, zone_batchsize(zone));
+ new_high = zone_highsize(zone, new_batch);
if (zone->pageset_high == new_high &&
zone->pageset_batch == new_batch)
@@ -8222,11 +8240,19 @@ static void __setup_per_zone_wmarks(void
*/
void setup_per_zone_wmarks(void)
{
+ struct zone *zone;
static DEFINE_SPINLOCK(lock);
spin_lock(&lock);
__setup_per_zone_wmarks();
spin_unlock(&lock);
+
+ /*
+ * The watermark size have changed so update the pcpu batch
+ * and high limits or the limits may be inappropriate.
+ */
+ for_each_zone(zone)
+ zone_pcp_update(zone);
}
/*