[05/14] sched: cpufreq: Move schedutil_cpu_util()

Message ID	20200507181012.29791-6-qperret@google.com (mailing list archive)
State	RFC, archived
Headers	show Return-Path: <SRS0=udDz=6V=vger.kernel.org=linux-pm-owner@kernel.org> Date: Thu, 7 May 2020 19:10:03 +0100 In-Reply-To: <20200507181012.29791-1-qperret@google.com> Message-Id: <20200507181012.29791-6-qperret@google.com> Mime-Version: 1.0 References: <20200507181012.29791-1-qperret@google.com> Subject: [PATCH 05/14] sched: cpufreq: Move schedutil_cpu_util() From: Quentin Perret <qperret@google.com> To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org Cc: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, x86@kernel.org, hpa@zytor.com, sudeep.holla@arm.com, gregkh@linuxfoundation.org, rafael@kernel.org, viresh.kumar@linaro.org, peterz@infradead.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, dietmar.eggemann@arm.com, rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de, mcgrof@kernel.org, keescook@chromium.org, yzaikin@google.com, fweisbec@gmail.com, tkjos@google.com, kernel-team@android.com, qperret@google.com Content-Type: text/plain; charset="UTF-8" Sender: linux-pm-owner@vger.kernel.org Precedence: bulk
Series	Modularize schedutil \| expand [00/14] Modularize schedutil [01/14] sched: Provide sched_set_deadline() [02/14] sched: cpufreq: Use sched_set_deadline() from sugov [03/14] sched: cpufreq: Introduce 'want_eas' governor flag [04/14] sched: cpufreq: Move sched_cpufreq_governor_change() [05/14] sched: cpufreq: Move schedutil_cpu_util() [06/14] arch_topology: Export cpu_scale per-cpu array [07/14] kthread: Export kthread_bind_mask() [08/14] sched/core: Export runqueues per-cpu array [09/14] sched/cpufreq: Export cpufreq_this_cpu_can_update() [10/14] sched/fair: Export cpu_util_freq() [11/14] tick/sched: Export tick_nohz_get_idle_calls_cpu [12/14] x86: Export arch_scale_freq_key [13/14] sched: cpufreq: Use IS_ENABLED() for schedutil [14/14] sched: cpufreq: Modularize schedutil

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 33e67c48f668..aeb04cc5b740 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -183,122 +183,15 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, return cpufreq_driver_resolve_freq(policy, freq); } -/* - * This function computes an effective utilization for the given CPU, to be - * used for frequency selection given the linear relation: f = u * f_max. - * - * The scheduler tracks the following metrics: - * - * cpu_util_{cfs,rt,dl,irq}() - * cpu_bw_dl() - * - * Where the cfs,rt and dl util numbers are tracked with the same metric and - * synchronized windows and are thus directly comparable. - * - * The cfs,rt,dl utilization are the running times measured with rq->clock_task - * which excludes things like IRQ and steal-time. These latter are then accrued - * in the irq utilization. - * - * The DL bandwidth number otoh is not a measured metric but a value computed - * based on the task model parameters and gives the minimal utilization - * required to meet deadlines. - */ -unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, - unsigned long max, enum schedutil_type type, - struct task_struct *p) -{ - unsigned long dl_util, util, irq; - struct rq *rq = cpu_rq(cpu); - - if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) && - type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) { - return max; - } - - /* - * Early check to see if IRQ/steal time saturates the CPU, can be - * because of inaccuracies in how we track these -- see - * update_irq_load_avg(). - */ - irq = cpu_util_irq(rq); - if (unlikely(irq >= max)) - return max; - - /* - * Because the time spend on RT/DL tasks is visible as 'lost' time to - * CFS tasks and we use the same metric to track the effective - * utilization (PELT windows are synchronized) we can directly add them - * to obtain the CPU's actual utilization. - * - * CFS and RT utilization can be boosted or capped, depending on - * utilization clamp constraints requested by currently RUNNABLE - * tasks. - * When there are no CFS RUNNABLE tasks, clamps are released and - * frequency will be gracefully reduced with the utilization decay. - */ - util = util_cfs + cpu_util_rt(rq); - if (type == FREQUENCY_UTIL) - util = uclamp_rq_util_with(rq, util, p); - - dl_util = cpu_util_dl(rq); - - /* - * For frequency selection we do not make cpu_util_dl() a permanent part - * of this sum because we want to use cpu_bw_dl() later on, but we need - * to check if the CFS+RT+DL sum is saturated (ie. no idle time) such - * that we select f_max when there is no idle time. - * - * NOTE: numerical errors or stop class might cause us to not quite hit - * saturation when we should -- something for later. - */ - if (util + dl_util >= max) - return max; - - /* - * OTOH, for energy computation we need the estimated running time, so - * include util_dl and ignore dl_bw. - */ - if (type == ENERGY_UTIL) - util += dl_util; - - /* - * There is still idle time; further improve the number by using the - * irq metric. Because IRQ/steal time is hidden from the task clock we - * need to scale the task numbers: - * - * max - irq - * U' = irq + --------- * U - * max - */ - util = scale_irq_capacity(util, irq, max); - util += irq; - - /* - * Bandwidth required by DEADLINE must always be granted while, for - * FAIR and RT, we use blocked utilization of IDLE CPUs as a mechanism - * to gracefully reduce the frequency when no tasks show up for longer - * periods of time. - * - * Ideally we would like to set bw_dl as min/guaranteed freq and util + - * bw_dl as requested freq. However, cpufreq is not yet ready for such - * an interface. So, we only do the latter for now. - */ - if (type == FREQUENCY_UTIL) - util += cpu_bw_dl(rq); - - return min(max, util); -} - static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - unsigned long util = cpu_util_cfs(rq); unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->max = max; sg_cpu->bw_dl = cpu_bw_dl(rq); - return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); + return cpu_util_freq(sg_cpu->cpu, max); } /** diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02f323b85b6d..dadf0a060abe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6377,6 +6377,118 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) return min_t(unsigned long, util, capacity_orig_of(cpu)); } +/* + * This function computes an effective utilization for the given CPU, to be + * used for frequency selection given the linear relation: f = u * f_max. + * + * The scheduler tracks the following metrics: + * + * cpu_util_{cfs,rt,dl,irq}() + * cpu_bw_dl() + * + * Where the cfs,rt and dl util numbers are tracked with the same metric and + * synchronized windows and are thus directly comparable. + * + * The cfs,rt,dl utilization are the running times measured with rq->clock_task + * which excludes things like IRQ and steal-time. These latter are then accrued + * in the irq utilization. + * + * The DL bandwidth number otoh is not a measured metric but a value computed + * based on the task model parameters and gives the minimal utilization + * required to meet deadlines. + */ +static unsigned long aggregate_cpu_util(int cpu, unsigned long util_cfs, + unsigned long max, + enum cpu_util_type type, + struct task_struct *p) +{ + unsigned long dl_util, util, irq; + struct rq *rq = cpu_rq(cpu); + + if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) && + type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) { + return max; + } + + /* + * Early check to see if IRQ/steal time saturates the CPU, can be + * because of inaccuracies in how we track these -- see + * update_irq_load_avg(). + */ + irq = cpu_util_irq(rq); + if (unlikely(irq >= max)) + return max; + + /* + * Because the time spend on RT/DL tasks is visible as 'lost' time to + * CFS tasks and we use the same metric to track the effective + * utilization (PELT windows are synchronized) we can directly add them + * to obtain the CPU's actual utilization. + * + * CFS and RT utilization can be boosted or capped, depending on + * utilization clamp constraints requested by currently RUNNABLE + * tasks. + * When there are no CFS RUNNABLE tasks, clamps are released and + * frequency will be gracefully reduced with the utilization decay. + */ + util = util_cfs + cpu_util_rt(rq); + if (type == FREQUENCY_UTIL) + util = uclamp_rq_util_with(rq, util, p); + + dl_util = cpu_util_dl(rq); + + /* + * For frequency selection we do not make cpu_util_dl() a permanent part + * of this sum because we want to use cpu_bw_dl() later on, but we need + * to check if the CFS+RT+DL sum is saturated (ie. no idle time) such + * that we select f_max when there is no idle time. + * + * NOTE: numerical errors or stop class might cause us to not quite hit + * saturation when we should -- something for later. + */ + if (util + dl_util >= max) + return max; + + /* + * OTOH, for energy computation we need the estimated running time, so + * include util_dl and ignore dl_bw. + */ + if (type == ENERGY_UTIL) + util += dl_util; + + /* + * There is still idle time; further improve the number by using the + * irq metric. Because IRQ/steal time is hidden from the task clock we + * need to scale the task numbers: + * + * max - irq + * U' = irq + --------- * U + * max + */ + util = scale_irq_capacity(util, irq, max); + util += irq; + + /* + * Bandwidth required by DEADLINE must always be granted while, for + * FAIR and RT, we use blocked utilization of IDLE CPUs as a mechanism + * to gracefully reduce the frequency when no tasks show up for longer + * periods of time. + * + * Ideally we would like to set bw_dl as min/guaranteed freq and util + + * bw_dl as requested freq. However, cpufreq is not yet ready for such + * an interface. So, we only do the latter for now. + */ + if (type == FREQUENCY_UTIL) + util += cpu_bw_dl(rq); + + return min(max, util); +} + +unsigned long cpu_util_freq(int cpu, unsigned long max) +{ + return aggregate_cpu_util(cpu, cpu_util(cpu), max, FREQUENCY_UTIL, NULL); +} + /* * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued) * to @dst_cpu. @@ -6449,7 +6561,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) * is already enough to scale the EM reported power * consumption at the (eventually clamped) cpu_capacity. */ - sum_util += schedutil_cpu_util(cpu, util_cfs, cpu_cap, + sum_util += aggregate_cpu_util(cpu, util_cfs, cpu_cap, ENERGY_UTIL, NULL); /* @@ -6459,7 +6571,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) * NOTE: in case RT tasks are running, by default the * FREQUENCY_UTIL's utilization can be max OPP. */ - cpu_util = schedutil_cpu_util(cpu, util_cfs, cpu_cap, + cpu_util = aggregate_cpu_util(cpu, util_cfs, cpu_cap, FREQUENCY_UTIL, tsk); max_util = max(max_util, cpu_util); } @@ -6556,7 +6668,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) * IOW, placing the task there would make the CPU * overutilized. Take uclamp into account to see how * much capacity we can get out of the CPU; this is - * aligned with schedutil_cpu_util(). + * aligned with aggregate_cpu_util(). */ util = uclamp_rq_util_with(cpu_rq(cpu), util, p); if (!fits_capacity(util, cpu_cap)) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index db3a57675ccf..60592cde80e8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2397,10 +2397,9 @@ static inline unsigned long capacity_orig_of(int cpu) { return cpu_rq(cpu)->cpu_capacity_orig; } -#endif /** - * enum schedutil_type - CPU utilization type + * enum cpu_util_type - CPU utilization type * @FREQUENCY_UTIL: Utilization used to select frequency * @ENERGY_UTIL: Utilization used during energy calculation * @@ -2409,16 +2408,12 @@ static inline unsigned long capacity_orig_of(int cpu) * enum is used within schedutil_freq_util() to differentiate the types of * utilization expected by the callers, and adjust the aggregation accordingly. */ -enum schedutil_type { +enum cpu_util_type { FREQUENCY_UTIL, ENERGY_UTIL, }; -#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL - -unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, - unsigned long max, enum schedutil_type type, - struct task_struct *p); +unsigned long cpu_util_freq(int cpu, unsigned long max); static inline unsigned long cpu_bw_dl(struct rq *rq) { @@ -2430,30 +2425,11 @@ static inline unsigned long cpu_util_dl(struct rq *rq) return READ_ONCE(rq->avg_dl.util_avg); } -static inline unsigned long cpu_util_cfs(struct rq *rq) -{ - unsigned long util = READ_ONCE(rq->cfs.avg.util_avg); - - if (sched_feat(UTIL_EST)) { - util = max_t(unsigned long, util, - READ_ONCE(rq->cfs.avg.util_est.enqueued)); - } - - return util; -} - static inline unsigned long cpu_util_rt(struct rq *rq) { return READ_ONCE(rq->avg_rt.util_avg); } -#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ -static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, - unsigned long max, enum schedutil_type type, - struct task_struct *p) -{ - return 0; -} -#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */ +#endif /* CONFIG_SMP */ #ifdef CONFIG_HAVE_SCHED_AVG_IRQ static inline unsigned long cpu_util_irq(struct rq *rq)

[05/14] sched: cpufreq: Move schedutil_cpu_util()

Commit Message

Patch