[RFC,2/6] sched: Introduce energy models of CPUs

Message ID	20180320094312.24081-3-dietmar.eggemann@arm.com (mailing list archive)
State	RFC, archived
Headers	show Return-Path: <linux-pm-owner@kernel.org> From: Dietmar Eggemann <dietmar.eggemann@arm.com> To: linux-kernel@vger.kernel.org, Peter Zijlstra <peterz@infradead.org>, Quentin Perret <quentin.perret@arm.com>, Thara Gopinath <thara.gopinath@linaro.org> Cc: linux-pm@vger.kernel.org, Morten Rasmussen <morten.rasmussen@arm.com>, Chris Redpath <chris.redpath@arm.com>, Patrick Bellasi <patrick.bellasi@arm.com>, Valentin Schneider <valentin.schneider@arm.com>, "Rafael J . Wysocki" <rjw@rjwysocki.net>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Vincent Guittot <vincent.guittot@linaro.org>, Viresh Kumar <viresh.kumar@linaro.org>, Todd Kjos <tkjos@google.com>, Joel Fernandes <joelaf@google.com> Subject: [RFC PATCH 2/6] sched: Introduce energy models of CPUs Date: Tue, 20 Mar 2018 09:43:08 +0000 Message-Id: <20180320094312.24081-3-dietmar.eggemann@arm.com> In-Reply-To: <20180320094312.24081-1-dietmar.eggemann@arm.com> References: <20180320094312.24081-1-dietmar.eggemann@arm.com> Sender: linux-pm-owner@vger.kernel.org Precedence: bulk

diff --git a/include/linux/sched/energy.h b/include/linux/sched/energy.h new file mode 100644 index 000000000000..b4f43564ffe4 --- /dev/null +++ b/include/linux/sched/energy.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_SCHED_ENERGY_H +#define _LINUX_SCHED_ENERGY_H + +#ifdef CONFIG_SMP +struct capacity_state { + unsigned long cap; /* compute capacity */ + unsigned long power; /* power consumption at this compute capacity */ +}; + +struct sched_energy_model { + int nr_cap_states; + struct capacity_state *cap_states; +}; + +struct freq_domain { + struct list_head next; + cpumask_t span; +}; + +extern struct sched_energy_model ** __percpu energy_model; +extern struct static_key_false sched_energy_present; +extern struct list_head freq_domains; +#define for_each_freq_domain(fdom) \ + list_for_each_entry(fdom, &freq_domains, next) + +void init_sched_energy(void); +#else +static inline void init_sched_energy(void) { } +#endif + +#endif diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index d9a02b318108..912972ad4dbc 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o energy.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c new file mode 100644 index 000000000000..4662c993e096 --- /dev/null +++ b/kernel/sched/energy.c @@ -0,0 +1,190 @@ +/* + * Released under the GPLv2 only. + * SPDX-License-Identifier: GPL-2.0 + * + * Energy-aware scheduling models + * + * Copyright (C) 2018, Arm Ltd. + * Written by: Quentin Perret, Arm Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#define pr_fmt(fmt) "sched-energy: " fmt + +#include <linux/sched/topology.h> +#include <linux/sched/energy.h> +#include <linux/pm_opp.h> + +#include "sched.h" + +DEFINE_STATIC_KEY_FALSE(sched_energy_present); +struct sched_energy_model ** __percpu energy_model; + +/* + * A copy of the cpumasks representing the frequency domains is kept private + * to the scheduler. They are stacked in a dynamically allocated linked list + * as we don't know how many frequency domains the system has. + */ +LIST_HEAD(freq_domains); + +#ifdef CONFIG_PM_OPP +static struct sched_energy_model *build_energy_model(int cpu) +{ + unsigned long cap_scale = arch_scale_cpu_capacity(NULL, cpu); + unsigned long cap, freq, power, max_freq = ULONG_MAX; + unsigned long opp_eff, prev_opp_eff = ULONG_MAX; + struct sched_energy_model *em = NULL; + struct device *cpu_dev; + struct dev_pm_opp *opp; + int opp_cnt, i; + + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_err("CPU%d: Failed to get device\n", cpu); + return NULL; + } + + opp_cnt = dev_pm_opp_get_opp_count(cpu_dev); + if (opp_cnt <= 0) { + pr_err("CPU%d: Failed to get # of available OPPs.\n", cpu); + return NULL; + } + + opp = dev_pm_opp_find_freq_floor(cpu_dev, &max_freq); + if (IS_ERR(opp)) { + pr_err("CPU%d: Failed to get max frequency.\n", cpu); + return NULL; + } + + dev_pm_opp_put(opp); + if (!max_freq) { + pr_err("CPU%d: Found null max frequency.\n", cpu); + return NULL; + } + + em = kzalloc(sizeof(*em), GFP_KERNEL); + if (!em) + return NULL; + + em->cap_states = kcalloc(opp_cnt, sizeof(*em->cap_states), GFP_KERNEL); + if (!em->cap_states) + goto free_em; + + for (i = 0, freq = 0; i < opp_cnt; i++, freq++) { + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq); + if (IS_ERR(opp)) { + pr_err("CPU%d: Failed to get OPP %d.\n", cpu, i+1); + goto free_cs; + } + + power = dev_pm_opp_get_power(opp); + dev_pm_opp_put(opp); + if (!power || !freq) + goto free_cs; + + cap = freq * cap_scale / max_freq; + em->cap_states[i].power = power; + em->cap_states[i].cap = cap; + + /* + * The capacity/watts efficiency ratio should decrease as the + * frequency grows on sane platforms. If not, warn the user + * that some high OPPs are more power efficient than some + * of the lower ones. + */ + opp_eff = (cap << 20) / power; + if (opp_eff >= prev_opp_eff) + pr_warn("CPU%d: cap/pwr: OPP%d > OPP%d\n", cpu, i, i-1); + prev_opp_eff = opp_eff; + } + + em->nr_cap_states = opp_cnt; + return em; + +free_cs: + kfree(em->cap_states); +free_em: + kfree(em); + return NULL; +} + +static void free_energy_model(void) +{ + struct sched_energy_model *em; + struct freq_domain *tmp, *pos; + int cpu; + + list_for_each_entry_safe(pos, tmp, &freq_domains, next) { + cpu = cpumask_first(&(pos->span)); + em = *per_cpu_ptr(energy_model, cpu); + if (em) { + kfree(em->cap_states); + kfree(em); + } + + list_del(&(pos->next)); + kfree(pos); + } + + free_percpu(energy_model); +} + +void init_sched_energy(void) +{ + struct freq_domain *fdom; + struct sched_energy_model *em; + struct device *cpu_dev; + int cpu, ret, fdom_cpu; + + /* Energy Aware Scheduling is used for asymmetric systems only. */ + if (!lowest_flag_domain(smp_processor_id(), SD_ASYM_CPUCAPACITY)) + return; + + energy_model = alloc_percpu(struct sched_energy_model *); + if (!energy_model) + goto exit_fail; + + for_each_possible_cpu(cpu) { + if (*per_cpu_ptr(energy_model, cpu)) + continue; + + /* Keep a copy of the sharing_cpus mask */ + fdom = kzalloc(sizeof(struct freq_domain), GFP_KERNEL); + if (!fdom) + goto free_em; + + cpu_dev = get_cpu_device(cpu); + ret = dev_pm_opp_get_sharing_cpus(cpu_dev, &(fdom->span)); + if (ret) + goto free_em; + list_add(&(fdom->next), &freq_domains); + + /* + * Build the energy model of one CPU, and link it to all CPUs + * in its frequency domain. This should be correct as long as + * they share the same micro-architecture. + */ + fdom_cpu = cpumask_first(&(fdom->span)); + em = build_energy_model(fdom_cpu); + if (!em) + goto free_em; + + for_each_cpu(fdom_cpu, &(fdom->span)) + *per_cpu_ptr(energy_model, fdom_cpu) = em; + } + + static_branch_enable(&sched_energy_present); + + pr_info("Energy Aware Scheduling started.\n"); + return; +free_em: + free_energy_model(); +exit_fail: + pr_err("Energy Aware Scheduling initialization failed.\n"); +} +#else +void init_sched_energy(void) {} +#endif

[RFC,2/6] sched: Introduce energy models of CPUs

Commit Message

Comments

Patch