diff mbox

[RFC,10/12,v2] Intercept periodic nohz idle balancing

Message ID 1399832221-8314-11-git-send-email-yuyang.du@intel.com (mailing list archive)
State RFC, archived
Headers show

Commit Message

Yuyang Du May 11, 2014, 6:16 p.m. UTC
We intercept load balancing to contain the load and load balancing in
the consolidated CPUs according to our consolidating mechanism.

In periodic nohz idle balance, we skip the idle but non-consolidated
CPUs from load balancing.

Signed-off-by: Yuyang Du <yuyang.du@intel.com>
---
 kernel/sched/fair.c |   50 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 7 deletions(-)
diff mbox

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 94c7a6a..9bb1304 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6867,10 +6867,46 @@  static struct {
 
 static inline int find_new_ilb(void)
 {
+#ifdef CONFIG_WORKLOAD_CONSOLIDATION
+	struct cpumask *nonshielded = __get_cpu_var(local_cpu_mask);
+	int ilb, weight;
+	int this_cpu = smp_processor_id();
+
+	/*
+	 * Optimize for the case when we have no idle CPUs or only one
+	 * idle CPU. Don't walk the sched_domain hierarchy in such cases
+	 */
+	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
+		return nr_cpu_ids;
+
+	ilb = cpumask_first(nohz.idle_cpus_mask);
+
+	if (ilb < nr_cpu_ids && idle_cpu(ilb)) {
+
+		cpumask_copy(nonshielded, nohz.idle_cpus_mask);
+
+		rcu_read_lock();
+		workload_consolidation_nonshielded_mask(this_cpu, nonshielded);
+		rcu_read_unlock();
+
+		weight = cpumask_weight(nonshielded);
+
+		if (weight < 2)
+			return nr_cpu_ids;
+
+		/*
+		 * get idle load balancer again
+		 */
+		ilb = cpumask_first(nonshielded);
+	    if (ilb < nr_cpu_ids && idle_cpu(ilb))
+			return ilb;
+	}
+#else
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
 
 	if (ilb < nr_cpu_ids && idle_cpu(ilb))
 		return ilb;
+#endif
 
 	return nr_cpu_ids;
 }
@@ -7107,7 +7143,7 @@  out:
  * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
  * rebalancing for all the cpus for whom scheduler ticks are stopped.
  */
-static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
+static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle, struct cpumask *mask)
 {
 	int this_cpu = this_rq->cpu;
 	struct rq *rq;
@@ -7117,7 +7153,7 @@  static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
 	    !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
 		goto end;
 
-	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
+	for_each_cpu(balance_cpu, mask) {
 		if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
 			continue;
 
@@ -7165,10 +7201,10 @@  static inline int nohz_kick_needed(struct rq *rq)
 	if (unlikely(rq->idle_balance))
 		return 0;
 
-       /*
-	* We may be recently in ticked or tickless idle mode. At the first
-	* busy tick after returning from idle, we will update the busy stats.
-	*/
+	/*
+	 * We may be recently in ticked or tickless idle mode. At the first
+	 * busy tick after returning from idle, we will update the busy stats.
+	 */
 	set_cpu_sd_state_busy();
 	nohz_balance_exit_idle(cpu);
 
@@ -7211,7 +7247,7 @@  need_kick:
 	return 1;
 }
 #else
-static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
+static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle, struct cpumask *mask) { }
 #endif
 
 /*