From patchwork Fri Oct 11 13:44:55 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Douglas RAILLARD X-Patchwork-Id: 11185507 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C450E14ED for ; Fri, 11 Oct 2019 13:45:35 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id AFC2A2196E for ; Fri, 11 Oct 2019 13:45:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728573AbfJKNpf (ORCPT ); Fri, 11 Oct 2019 09:45:35 -0400 Received: from foss.arm.com ([217.140.110.172]:60870 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728594AbfJKNpe (ORCPT ); Fri, 11 Oct 2019 09:45:34 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id DE0B11000; Fri, 11 Oct 2019 06:45:33 -0700 (PDT) Received: from e107049-lin.arm.com (e107049-lin.cambridge.arm.com [10.1.195.43]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 37D383F68E; Fri, 11 Oct 2019 06:45:32 -0700 (PDT) From: Douglas RAILLARD To: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org, mingo@redhat.com, peterz@infradead.org, rjw@rjwysocki.net, viresh.kumar@linaro.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, douglas.raillard@arm.com, dietmar.eggemann@arm.com, qperret@qperret.net, patrick.bellasi@matbug.net, dh.han@samsung.com Subject: [RFC PATCH v3 1/6] PM: Introduce em_pd_get_higher_freq() Date: Fri, 11 Oct 2019 14:44:55 +0100 Message-Id: <20191011134500.235736-2-douglas.raillard@arm.com> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191011134500.235736-1-douglas.raillard@arm.com> References: <20191011134500.235736-1-douglas.raillard@arm.com> MIME-Version: 1.0 Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org em_pd_get_higher_freq() returns a frequency greater or equal to the provided one while taking into account a given cost margin. It also skips inefficient OPPs that have a higher cost than another one with a higher frequency. The efficiency of an OPP is measured as efficiency=capacity/power. OPPs with the same efficiency are assumed to be equivalent, since they will consume as much energy for a given amount of work to do. That may take more or less time depending on the frequency, but will consume the same energy. Signed-off-by: Douglas RAILLARD --- include/linux/energy_model.h | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index d249b88a4d5a..dd6a35f099ea 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -159,6 +159,53 @@ static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) return pd->nr_cap_states; } +#define EM_COST_MARGIN_SCALE 1024U + +/** + * em_pd_get_higher_freq() - Get the highest frequency that does not exceed the + * given cost margin compared to min_freq + * @pd : performance domain for which this must be done + * @min_freq : minimum frequency to return + * @cost_margin : allowed margin compared to min_freq, on the + * EM_COST_MARGIN_SCALE scale. + * + * Return: the chosen frequency, guaranteed to be at least as high as min_freq. + */ +static inline unsigned long em_pd_get_higher_freq(struct em_perf_domain *pd, + unsigned long min_freq, unsigned long cost_margin) +{ + unsigned long max_cost = 0; + struct em_cap_state *cs; + int i; + + if (!pd) + return min_freq; + + /* Compute the maximum allowed cost */ + for (i = 0; i < pd->nr_cap_states; i++) { + cs = &pd->table[i]; + if (cs->frequency >= min_freq) { + max_cost = cs->cost + + (cs->cost * cost_margin) / EM_COST_MARGIN_SCALE; + break; + } + } + + /* Find the highest frequency that will not exceed the cost margin */ + for (i = pd->nr_cap_states-1; i >= 0; i--) { + cs = &pd->table[i]; + if (cs->cost <= max_cost) + return cs->frequency; + } + + /* + * We should normally never reach here, unless min_freq was higher than + * the highest available frequency, which is not expected to happen. + */ + return min_freq; +} + + #else struct em_data_callback {}; #define EM_DATA_CB(_active_power_cb) { } @@ -181,6 +228,12 @@ static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) { return 0; } + +static inline unsigned long em_pd_get_higher_freq(struct em_perf_domain *pd, + unsigned long min_freq, unsigned long cost_margin) +{ + return min_freq; +} #endif #endif From patchwork Fri Oct 11 13:44:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Douglas RAILLARD X-Patchwork-Id: 11185509 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0E3E914ED for ; Fri, 11 Oct 2019 13:45:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id ED92E222C4 for ; Fri, 11 Oct 2019 13:45:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728646AbfJKNpi (ORCPT ); Fri, 11 Oct 2019 09:45:38 -0400 Received: from foss.arm.com ([217.140.110.172]:60890 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728594AbfJKNph (ORCPT ); Fri, 11 Oct 2019 09:45:37 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id BAB21337; Fri, 11 Oct 2019 06:45:36 -0700 (PDT) Received: from e107049-lin.arm.com (e107049-lin.cambridge.arm.com [10.1.195.43]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 14AF73F68E; Fri, 11 Oct 2019 06:45:34 -0700 (PDT) From: Douglas RAILLARD To: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org, mingo@redhat.com, peterz@infradead.org, rjw@rjwysocki.net, viresh.kumar@linaro.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, douglas.raillard@arm.com, dietmar.eggemann@arm.com, qperret@qperret.net, patrick.bellasi@matbug.net, dh.han@samsung.com Subject: [RFC PATCH v3 2/6] sched/cpufreq: Attach perf domain to sugov policy Date: Fri, 11 Oct 2019 14:44:56 +0100 Message-Id: <20191011134500.235736-3-douglas.raillard@arm.com> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191011134500.235736-1-douglas.raillard@arm.com> References: <20191011134500.235736-1-douglas.raillard@arm.com> MIME-Version: 1.0 Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Attach an Energy Model perf_domain to each sugov_policy to prepare the ground for energy-aware schedutil. Signed-off-by: Douglas RAILLARD --- kernel/sched/cpufreq_schedutil.c | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index ba9e8309eec7..9abda58827c0 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -42,6 +42,10 @@ struct sugov_policy { bool limits_changed; bool need_freq_update; + +#ifdef CONFIG_ENERGY_MODEL + struct em_perf_domain *pd; +#endif }; struct sugov_cpu { @@ -66,6 +70,38 @@ static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); /************************ Governor internals ***********************/ +#ifdef CONFIG_ENERGY_MODEL +static void sugov_policy_attach_pd(struct sugov_policy *sg_policy) +{ + struct em_perf_domain *pd; + struct cpufreq_policy *policy = sg_policy->policy; + + sg_policy->pd = NULL; + pd = em_cpu_get(policy->cpu); + if (!pd) + return; + + if (cpumask_equal(policy->related_cpus, to_cpumask(pd->cpus))) + sg_policy->pd = pd; + else + pr_warn("%s: Not all CPUs in schedutil policy %u share the same perf domain, no perf domain for that policy will be registered\n", + __func__, policy->cpu); +} + +static struct em_perf_domain *sugov_policy_get_pd( + struct sugov_policy *sg_policy) +{ + return sg_policy->pd; +} +#else /* CONFIG_ENERGY_MODEL */ +static void sugov_policy_attach_pd(struct sugov_policy *sg_policy) {} +static struct em_perf_domain *sugov_policy_get_pd( + struct sugov_policy *sg_policy) +{ + return NULL; +} +#endif /* CONFIG_ENERGY_MODEL */ + static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) { s64 delta_ns; @@ -859,6 +895,9 @@ static int sugov_start(struct cpufreq_policy *policy) sugov_update_shared : sugov_update_single); } + + sugov_policy_attach_pd(sg_policy); + return 0; } From patchwork Fri Oct 11 13:44:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Douglas RAILLARD X-Patchwork-Id: 11185517 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 526C614DB for ; Fri, 11 Oct 2019 13:46:00 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 3026D2064A for ; Fri, 11 Oct 2019 13:46:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728544AbfJKNpl (ORCPT ); Fri, 11 Oct 2019 09:45:41 -0400 Received: from foss.arm.com ([217.140.110.172]:60910 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728594AbfJKNpk (ORCPT ); Fri, 11 Oct 2019 09:45:40 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id AA1721570; Fri, 11 Oct 2019 06:45:39 -0700 (PDT) Received: from e107049-lin.arm.com (e107049-lin.cambridge.arm.com [10.1.195.43]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 051BE3F68E; Fri, 11 Oct 2019 06:45:37 -0700 (PDT) From: Douglas RAILLARD To: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org, mingo@redhat.com, peterz@infradead.org, rjw@rjwysocki.net, viresh.kumar@linaro.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, douglas.raillard@arm.com, dietmar.eggemann@arm.com, qperret@qperret.net, patrick.bellasi@matbug.net, dh.han@samsung.com Subject: [RFC PATCH v3 3/6] sched/cpufreq: Hook em_pd_get_higher_power() into get_next_freq() Date: Fri, 11 Oct 2019 14:44:57 +0100 Message-Id: <20191011134500.235736-4-douglas.raillard@arm.com> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191011134500.235736-1-douglas.raillard@arm.com> References: <20191011134500.235736-1-douglas.raillard@arm.com> MIME-Version: 1.0 Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Choose the highest OPP for a given energy cost, allowing to skip lower frequencies that would not be cheaper in terms of consumed power. These frequencies can still be interesting to keep in the energy model to give more freedom to thermal throttling, but should not be selected under normal circumstances. This also prepares the ground for energy-aware frequency boosting. Signed-off-by: Douglas RAILLARD --- kernel/sched/cpufreq_schedutil.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9abda58827c0..aab8c0498dd1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -10,6 +10,7 @@ #include "sched.h" +#include #include #include @@ -208,9 +209,16 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; + struct em_perf_domain *pd = sugov_policy_get_pd(sg_policy); freq = map_util_freq(util, freq, max); + /* + * Try to get a higher frequency if one is available, given the extra + * power we are ready to spend. + */ + freq = em_pd_get_higher_freq(pd, freq, 0); + if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) return sg_policy->next_freq; From patchwork Fri Oct 11 13:44:58 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Douglas RAILLARD X-Patchwork-Id: 11185515 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 34FEA14DB for ; Fri, 11 Oct 2019 13:45:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 1F39E214AF for ; Fri, 11 Oct 2019 13:45:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728594AbfJKNpm (ORCPT ); Fri, 11 Oct 2019 09:45:42 -0400 Received: from foss.arm.com ([217.140.110.172]:60928 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728684AbfJKNpm (ORCPT ); Fri, 11 Oct 2019 09:45:42 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 237C21597; Fri, 11 Oct 2019 06:45:42 -0700 (PDT) Received: from e107049-lin.arm.com (e107049-lin.cambridge.arm.com [10.1.195.43]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 733BF3F68E; Fri, 11 Oct 2019 06:45:40 -0700 (PDT) From: Douglas RAILLARD To: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org, mingo@redhat.com, peterz@infradead.org, rjw@rjwysocki.net, viresh.kumar@linaro.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, douglas.raillard@arm.com, dietmar.eggemann@arm.com, qperret@qperret.net, patrick.bellasi@matbug.net, dh.han@samsung.com Subject: [RFC PATCH v3 4/6] sched/cpufreq: Introduce sugov_cpu_ramp_boost Date: Fri, 11 Oct 2019 14:44:58 +0100 Message-Id: <20191011134500.235736-5-douglas.raillard@arm.com> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191011134500.235736-1-douglas.raillard@arm.com> References: <20191011134500.235736-1-douglas.raillard@arm.com> MIME-Version: 1.0 Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Use the utilization signals dynamic to detect when the utilization of a set of tasks starts increasing because of a change in tasks' behavior. This allows detecting when spending extra power for faster frequency ramp up response would be beneficial to the reactivity of the system. This ramp boost is computed as the difference util_avg-util_est_enqueued. This number somehow represents a lower bound of how much extra utilization this tasks is actually using, compared to our best current stable knowledge of it (which is util_est_enqueued). When the set of runnable tasks changes, the boost is disabled as the impact of blocked utilization on util_avg will make the delta with util_est_enqueued not very informative. Signed-off-by: Douglas RAILLARD --- kernel/sched/cpufreq_schedutil.c | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index aab8c0498dd1..c118f85d1f3d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -61,6 +61,10 @@ struct sugov_cpu { unsigned long bw_dl; unsigned long max; + unsigned long ramp_boost; + unsigned long util_est_enqueued; + unsigned long util_avg; + /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON unsigned long saved_idle_calls; @@ -181,6 +185,42 @@ static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time, } } +static unsigned long sugov_cpu_ramp_boost(struct sugov_cpu *sg_cpu) +{ + return READ_ONCE(sg_cpu->ramp_boost); +} + +static unsigned long sugov_cpu_ramp_boost_update(struct sugov_cpu *sg_cpu) +{ + struct rq *rq = cpu_rq(sg_cpu->cpu); + unsigned long util_est_enqueued; + unsigned long util_avg; + unsigned long boost = 0; + + util_est_enqueued = READ_ONCE(rq->cfs.avg.util_est.enqueued); + util_avg = READ_ONCE(rq->cfs.avg.util_avg); + + /* + * Boost when util_avg becomes higher than the previous stable + * knowledge of the enqueued tasks' set util, which is CPU's + * util_est_enqueued. + * + * We try to spot changes in the workload itself, so we want to + * avoid the noise of tasks being enqueued/dequeued. To do that, + * we only trigger boosting when the "amount of work' enqueued + * is stable. + */ + if (util_est_enqueued == sg_cpu->util_est_enqueued && + util_avg >= sg_cpu->util_avg && + util_avg > util_est_enqueued) + boost = util_avg - util_est_enqueued; + + sg_cpu->util_est_enqueued = util_est_enqueued; + sg_cpu->util_avg = util_avg; + WRITE_ONCE(sg_cpu->ramp_boost, boost); + return boost; +} + /** * get_next_freq - Compute a new frequency for a given cpufreq policy. * @sg_policy: schedutil policy object to compute the new frequency for. @@ -512,6 +552,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); util = sugov_get_util(sg_cpu); + sugov_cpu_ramp_boost_update(sg_cpu); max = sg_cpu->max; util = sugov_iowait_apply(sg_cpu, time, util, max); next_f = get_next_freq(sg_policy, util, max); @@ -552,6 +593,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) unsigned long j_util, j_max; j_util = sugov_get_util(j_sg_cpu); + if (j_sg_cpu == sg_cpu) + sugov_cpu_ramp_boost_update(sg_cpu); j_max = j_sg_cpu->max; j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); @@ -561,6 +604,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) } } + return get_next_freq(sg_policy, util, max); } From patchwork Fri Oct 11 13:44:59 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Douglas RAILLARD X-Patchwork-Id: 11185511 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 875EA14DB for ; Fri, 11 Oct 2019 13:45:46 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 6B20A222D2 for ; Fri, 11 Oct 2019 13:45:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728701AbfJKNpp (ORCPT ); Fri, 11 Oct 2019 09:45:45 -0400 Received: from foss.arm.com ([217.140.110.172]:60958 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728684AbfJKNpp (ORCPT ); Fri, 11 Oct 2019 09:45:45 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id BAD1C337; Fri, 11 Oct 2019 06:45:44 -0700 (PDT) Received: from e107049-lin.arm.com (e107049-lin.cambridge.arm.com [10.1.195.43]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 16AFF3F68E; Fri, 11 Oct 2019 06:45:42 -0700 (PDT) From: Douglas RAILLARD To: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org, mingo@redhat.com, peterz@infradead.org, rjw@rjwysocki.net, viresh.kumar@linaro.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, douglas.raillard@arm.com, dietmar.eggemann@arm.com, qperret@qperret.net, patrick.bellasi@matbug.net, dh.han@samsung.com Subject: [RFC PATCH v3 5/6] sched/cpufreq: Boost schedutil frequency ramp up Date: Fri, 11 Oct 2019 14:44:59 +0100 Message-Id: <20191011134500.235736-6-douglas.raillard@arm.com> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191011134500.235736-1-douglas.raillard@arm.com> References: <20191011134500.235736-1-douglas.raillard@arm.com> MIME-Version: 1.0 Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org In some situations, it can be interesting to spend temporarily more power if that can give a useful frequency boost. Use the new sugov_cpu_ramp_boost() function to drive an energy-aware boost, on top of the minimal required frequency. As that boost number is not accurate (and cannot be without a crystal ball), we only use it in a way that allows direct control over the power it is going to cost. This allows keeping a platform-independent level of control over the average power, while allowing for frequency bursts when we know a (set of) tasks can make use of it. In shared policies, the maximum of all CPU's boost is used. Since the extra power expenditure is bounded, it cannot skyrocket even on platforms with a large number of cores in the same frequency domain and/or very high ratio between lowest and highest OPP cost. Signed-off-by: Douglas RAILLARD --- kernel/sched/cpufreq_schedutil.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index c118f85d1f3d..7c1a749fb6ef 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -226,6 +226,9 @@ static unsigned long sugov_cpu_ramp_boost_update(struct sugov_cpu *sg_cpu) * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. + * @boost: Extra power that can be spent on top of the minimum amount of power + * required to meet capacity requirements, as a percentage between 0 and + * EM_COST_MARGIN_SCALE. * * If the utilization is frequency-invariant, choose the new frequency to be * proportional to it, that is @@ -244,7 +247,8 @@ static unsigned long sugov_cpu_ramp_boost_update(struct sugov_cpu *sg_cpu) * cpufreq driver limitations. */ static unsigned int get_next_freq(struct sugov_policy *sg_policy, - unsigned long util, unsigned long max) + unsigned long util, unsigned long max, + unsigned long boost) { struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? @@ -257,7 +261,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, * Try to get a higher frequency if one is available, given the extra * power we are ready to spend. */ - freq = em_pd_get_higher_freq(pd, freq, 0); + freq = em_pd_get_higher_freq(pd, freq, boost); if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) return sg_policy->next_freq; @@ -539,6 +543,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned long util, max; unsigned int next_f; bool busy; + unsigned long ramp_boost = 0; sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -552,10 +557,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); util = sugov_get_util(sg_cpu); - sugov_cpu_ramp_boost_update(sg_cpu); + ramp_boost = sugov_cpu_ramp_boost_update(sg_cpu); max = sg_cpu->max; util = sugov_iowait_apply(sg_cpu, time, util, max); - next_f = get_next_freq(sg_policy, util, max); + next_f = get_next_freq(sg_policy, util, max, ramp_boost); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -587,6 +592,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) struct cpufreq_policy *policy = sg_policy->policy; unsigned long util = 0, max = 1; unsigned int j; + unsigned long ramp_boost = 0; + unsigned long j_ramp_boost = 0; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); @@ -594,7 +601,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) j_util = sugov_get_util(j_sg_cpu); if (j_sg_cpu == sg_cpu) - sugov_cpu_ramp_boost_update(sg_cpu); + j_ramp_boost = sugov_cpu_ramp_boost_update(sg_cpu); + else + j_ramp_boost = sugov_cpu_ramp_boost(j_sg_cpu); + ramp_boost = max(ramp_boost, j_ramp_boost); + j_max = j_sg_cpu->max; j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); @@ -605,7 +616,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) } - return get_next_freq(sg_policy, util, max); + return get_next_freq(sg_policy, util, max, ramp_boost); } static void From patchwork Fri Oct 11 13:45:00 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Douglas RAILLARD X-Patchwork-Id: 11185513 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8CF5014DB for ; Fri, 11 Oct 2019 13:45:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 78D7321D7D for ; Fri, 11 Oct 2019 13:45:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728714AbfJKNps (ORCPT ); Fri, 11 Oct 2019 09:45:48 -0400 Received: from foss.arm.com ([217.140.110.172]:60978 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728713AbfJKNps (ORCPT ); Fri, 11 Oct 2019 09:45:48 -0400 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id BFDE51000; Fri, 11 Oct 2019 06:45:47 -0700 (PDT) Received: from e107049-lin.arm.com (e107049-lin.cambridge.arm.com [10.1.195.43]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 16B923F68E; Fri, 11 Oct 2019 06:45:45 -0700 (PDT) From: Douglas RAILLARD To: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org, mingo@redhat.com, peterz@infradead.org, rjw@rjwysocki.net, viresh.kumar@linaro.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, douglas.raillard@arm.com, dietmar.eggemann@arm.com, qperret@qperret.net, patrick.bellasi@matbug.net, dh.han@samsung.com Subject: [RFC PATCH v3 6/6] sched/cpufreq: Add schedutil_em_tp tracepoint Date: Fri, 11 Oct 2019 14:45:00 +0100 Message-Id: <20191011134500.235736-7-douglas.raillard@arm.com> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191011134500.235736-1-douglas.raillard@arm.com> References: <20191011134500.235736-1-douglas.raillard@arm.com> MIME-Version: 1.0 Sender: linux-pm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pm@vger.kernel.org Introduce a new tracepoint reporting the effect of using the Energy Model inside get_next_freq() in schedutil. Signed-off-by: Douglas RAILLARD --- include/trace/events/power.h | 9 +++++++++ kernel/sched/cpufreq_schedutil.c | 20 ++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index f7aece721aed..87a14f5208a7 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -529,6 +529,15 @@ DEFINE_EVENT(dev_pm_qos_request, dev_pm_qos_remove_request, TP_ARGS(name, type, new_value) ); + +DECLARE_TRACE(schedutil_em_tp, + TP_PROTO(unsigned int cpu, unsigned long util, + unsigned int cost_margin, unsigned int policy_cost_margin, + unsigned int base_freq, unsigned int boosted_freq), + TP_ARGS(cpu, util, cost_margin, policy_cost_margin, base_freq, + boosted_freq) +); + #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 7c1a749fb6ef..076bbb69ff42 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -14,6 +14,8 @@ #include #include +EXPORT_TRACEPOINT_SYMBOL_GPL(schedutil_em_tp); + #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) struct sugov_tunables { @@ -223,7 +225,7 @@ static unsigned long sugov_cpu_ramp_boost_update(struct sugov_cpu *sg_cpu) /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_policy: schedutil policy object to compute the new frequency for. + * @sg_cpu: schedutil CPU object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @boost: Extra power that can be spent on top of the minimum amount of power @@ -246,22 +248,28 @@ static unsigned long sugov_cpu_ramp_boost_update(struct sugov_cpu *sg_cpu) * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_policy *sg_policy, +static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, unsigned long max, unsigned long boost) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; struct em_perf_domain *pd = sugov_policy_get_pd(sg_policy); + unsigned int base_freq; - freq = map_util_freq(util, freq, max); + base_freq = map_util_freq(util, freq, max); /* * Try to get a higher frequency if one is available, given the extra * power we are ready to spend. */ - freq = em_pd_get_higher_freq(pd, freq, boost); + freq = em_pd_get_higher_freq(pd, base_freq, boost); + + trace_schedutil_em_tp(sg_cpu->cpu, util, + sugov_cpu_ramp_boost(sg_cpu), boost, + base_freq, freq); if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) return sg_policy->next_freq; @@ -560,7 +568,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, ramp_boost = sugov_cpu_ramp_boost_update(sg_cpu); max = sg_cpu->max; util = sugov_iowait_apply(sg_cpu, time, util, max); - next_f = get_next_freq(sg_policy, util, max, ramp_boost); + next_f = get_next_freq(sg_cpu, util, max, ramp_boost); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -616,7 +624,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) } - return get_next_freq(sg_policy, util, max, ramp_boost); + return get_next_freq(sg_cpu, util, max, ramp_boost); } static void