From patchwork Tue May 12 19:38:58 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Morten Rasmussen X-Patchwork-Id: 6390851 Return-Path: X-Original-To: patchwork-linux-pm@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 2EF6ABEEE1 for ; Tue, 12 May 2015 19:42:28 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 11883203AB for ; Tue, 12 May 2015 19:42:27 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id CCFB1202D1 for ; Tue, 12 May 2015 19:42:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933963AbbELTis (ORCPT ); Tue, 12 May 2015 15:38:48 -0400 Received: from foss.arm.com ([217.140.101.70]:33955 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933956AbbELTiq (ORCPT ); Tue, 12 May 2015 15:38:46 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id C97912A; Tue, 12 May 2015 12:38:08 -0700 (PDT) Received: from e105550-lin.cambridge.arm.com (e105550-lin.cambridge.arm.com [10.2.131.193]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 736EE3F218; Tue, 12 May 2015 12:38:43 -0700 (PDT) From: Morten Rasmussen To: peterz@infradead.org, mingo@redhat.com Cc: vincent.guittot@linaro.org, Dietmar Eggemann , yuyang.du@intel.com, preeti@linux.vnet.ibm.com, mturquette@linaro.org, rjw@rjwysocki.net, Juri Lelli , sgurrappadi@nvidia.com, pang.xunlei@zte.com.cn, linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org, morten.rasmussen@arm.com Subject: [RFCv4 PATCH 23/34] sched: Extend sched_group_energy to test load-balancing decisions Date: Tue, 12 May 2015 20:38:58 +0100 Message-Id: <1431459549-18343-24-git-send-email-morten.rasmussen@arm.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1431459549-18343-1-git-send-email-morten.rasmussen@arm.com> References: <1431459549-18343-1-git-send-email-morten.rasmussen@arm.com> Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, T_RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Extended sched_group_energy() to support energy prediction with usage (tasks) added/removed from a specific cpu or migrated between a pair of cpus. Useful for load-balancing decision making. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen --- kernel/sched/fair.c | 86 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 26 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2677ca6..52403e9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4810,15 +4810,17 @@ static unsigned long capacity_curr_of(int cpu) * capacity_curr (but not capacity_orig) as it useful for predicting the * capacity required after task migrations (scheduler-driven DVFS). */ - -static unsigned long get_cpu_usage(int cpu) +static unsigned long __get_cpu_usage(int cpu, int delta) { int sum; unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg; unsigned long blocked = cpu_rq(cpu)->cfs.utilization_blocked_avg; unsigned long capacity_orig = capacity_orig_of(cpu); - sum = usage + blocked; + sum = usage + blocked + delta; + + if (sum < 0) + return 0; if (sum >= capacity_orig) return capacity_orig; @@ -4826,13 +4828,28 @@ static unsigned long get_cpu_usage(int cpu) return sum; } +static unsigned long get_cpu_usage(int cpu) +{ + return __get_cpu_usage(cpu, 0); +} + static inline bool energy_aware(void) { return sched_feat(ENERGY_AWARE); } +struct energy_env { + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int usage_delta; + int src_cpu; + int dst_cpu; + int energy; +}; + /* - * cpu_norm_usage() returns the cpu usage relative to a specific capacity, + * __cpu_norm_usage() returns the cpu usage relative to a specific capacity, * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for * energy calculations. Using the scale-invariant usage returned by * get_cpu_usage() and approximating scale-invariant usage by: @@ -4845,9 +4862,9 @@ static inline bool energy_aware(void) * * norm_usage = running_time/time ~ usage/capacity */ -static unsigned long cpu_norm_usage(int cpu, unsigned long capacity) +static unsigned long __cpu_norm_usage(int cpu, unsigned long capacity, int delta) { - int usage = __get_cpu_usage(cpu); + int usage = __get_cpu_usage(cpu, delta); if (usage >= capacity) return SCHED_CAPACITY_SCALE; @@ -4855,13 +4872,25 @@ static unsigned long cpu_norm_usage(int cpu, unsigned long capacity) return (usage << SCHED_CAPACITY_SHIFT)/capacity; } -static unsigned long group_max_usage(struct sched_group *sg) +static int calc_usage_delta(struct energy_env *eenv, int cpu) { - int i; + if (cpu == eenv->src_cpu) + return -eenv->usage_delta; + if (cpu == eenv->dst_cpu) + return eenv->usage_delta; + return 0; +} + +static +unsigned long group_max_usage(struct energy_env *eenv, struct sched_group *sg) +{ + int i, delta; unsigned long max_usage = 0; - for_each_cpu(i, sched_group_cpus(sg)) - max_usage = max(max_usage, get_cpu_usage(i)); + for_each_cpu(i, sched_group_cpus(sg)) { + delta = calc_usage_delta(eenv, i); + max_usage = max(max_usage, __get_cpu_usage(i, delta)); + } return max_usage; } @@ -4875,31 +4904,36 @@ static unsigned long group_max_usage(struct sched_group *sg) * latter is used as the estimate as it leads to a more pessimistic energy * estimate (more busy). */ -static unsigned long group_norm_usage(struct sched_group *sg, int cap_idx) +static unsigned +long group_norm_usage(struct energy_env *eenv, struct sched_group *sg) { - int i; + int i, delta; unsigned long usage_sum = 0; - unsigned long capacity = sg->sge->cap_states[cap_idx].cap; + unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap; - for_each_cpu(i, sched_group_cpus(sg)) - usage_sum += cpu_norm_usage(i, capacity); + for_each_cpu(i, sched_group_cpus(sg)) { + delta = calc_usage_delta(eenv, i); + usage_sum += __cpu_norm_usage(i, capacity, delta); + } if (usage_sum > SCHED_CAPACITY_SCALE) return SCHED_CAPACITY_SCALE; return usage_sum; } -static int find_new_capacity(struct sched_group *sg, +static int find_new_capacity(struct energy_env *eenv, struct sched_group_energy *sge) { int idx; - unsigned long util = group_max_usage(sg); + unsigned long util = group_max_usage(eenv, eenv->sg_cap); for (idx = 0; idx < sge->nr_cap_states; idx++) { if (sge->cap_states[idx].cap >= util) return idx; } + eenv->cap_idx = idx; + return idx; } @@ -4912,16 +4946,16 @@ static int find_new_capacity(struct sched_group *sg, * gather the same usage statistics multiple times. This can probably be done in * a faster but more complex way. */ -static unsigned int sched_group_energy(struct sched_group *sg_top) +static unsigned int sched_group_energy(struct energy_env *eenv) { struct sched_domain *sd; int cpu, total_energy = 0; struct cpumask visit_cpus; struct sched_group *sg; - WARN_ON(!sg_top->sge); + WARN_ON(!eenv->sg_top->sge); - cpumask_copy(&visit_cpus, sched_group_cpus(sg_top)); + cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; @@ -4944,17 +4978,16 @@ static unsigned int sched_group_energy(struct sched_group *sg_top) break; do { - struct sched_group *sg_cap_util; unsigned long group_util; int sg_busy_energy, sg_idle_energy, cap_idx; if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) - sg_cap_util = sg_shared_cap; + eenv->sg_cap = sg_shared_cap; else - sg_cap_util = sg; + eenv->sg_cap = sg; - cap_idx = find_new_capacity(sg_cap_util, sg->sge); - group_util = group_norm_usage(sg, cap_idx); + cap_idx = find_new_capacity(eenv, sg->sge); + group_util = group_norm_usage(eenv, sg); sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) >> SCHED_CAPACITY_SHIFT; sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) * sg->sge->idle_states[0].power) @@ -4965,7 +4998,7 @@ static unsigned int sched_group_energy(struct sched_group *sg_top) if (!sd->child) cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); - if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top))) + if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) goto next_cpu; } while (sg = sg->next, sg != sd->groups); @@ -4974,6 +5007,7 @@ static unsigned int sched_group_energy(struct sched_group *sg_top) continue; } + eenv->energy = total_energy; return total_energy; }