diff mbox

[RFC,v3,07/10] sched/fair: Introduce an energy estimation helper function

Message ID 20180521142505.6522-8-quentin.perret@arm.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Quentin Perret May 21, 2018, 2:25 p.m. UTC
In preparation for the definition of an energy-aware wakeup path, a
helper function is provided to estimate the consequence on system energy
when a specific task wakes-up on a specific CPU. compute_energy()
estimates the capacity state to be reached by all frequency domains and
estimates the consumption of each online CPU according to its Energy Model
and its percentage of busy time.

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 kernel/sched/fair.c  | 55 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  2 +-
 2 files changed, 56 insertions(+), 1 deletion(-)

Comments

Juri Lelli June 8, 2018, 10:30 a.m. UTC | #1
On 21/05/18 15:25, Quentin Perret wrote:

[...]

> +static long compute_energy(struct task_struct *p, int dst_cpu)
> +{
> +	long util, max_util, sum_util, energy = 0;
> +	struct sched_energy_fd *sfd;
> +	int cpu;
> +
> +	for_each_freq_domain(sfd) {
> +		max_util = sum_util = 0;
> +		for_each_cpu_and(cpu, freq_domain_span(sfd), cpu_online_mask) {
> +			util = cpu_util_next(cpu, p, dst_cpu);
> +			util += cpu_util_dl(cpu_rq(cpu));
> +			/* XXX: add RT util_avg when available. */

em_fd_energy() below uses this to know which power to pick in the freq
table. So, if you have any RT task running on cpu freq will be at max
anyway. It seems to me that in this case max_util for the freq_domain
must be max_freq (w/o considering rt.util_avg as schedutil does). Then
you could probably still use rt.util_avg to get the percentage of busy
time with sum_util?

> +
> +			max_util = max(util, max_util);
> +			sum_util += util;
> +		}
> +
> +		energy += em_fd_energy(sfd->fd, max_util, sum_util);
> +	}
> +
> +	return energy;
> +}

Best,

- Juri
Pavan Kondeti June 19, 2018, 9:51 a.m. UTC | #2
On Mon, May 21, 2018 at 03:25:02PM +0100, Quentin Perret wrote:

<snip>

>  
> +/*
> + * Returns the util of "cpu" if "p" wakes up on "dst_cpu".
> + */
> +static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
> +{
> +	unsigned long util, util_est;
> +	struct cfs_rq *cfs_rq;
> +
> +	/* Task is where it should be, or has no impact on cpu */
> +	if ((task_cpu(p) == dst_cpu) || (cpu != task_cpu(p) && cpu != dst_cpu))
> +		return cpu_util(cpu);
> +
> +	cfs_rq = &cpu_rq(cpu)->cfs;
> +	util = READ_ONCE(cfs_rq->avg.util_avg);
> +
> +	if (dst_cpu == cpu)
> +		util += task_util(p);
> +	else
> +		util = max_t(long, util - task_util(p), 0);
> +
> +	if (sched_feat(UTIL_EST)) {
> +		util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
> +		if (dst_cpu == cpu)
> +			util_est += _task_util_est(p);
> +		else
> +			util_est = max_t(long, util_est - _task_util_est(p), 0);

For UTIL_EST case, the waking task will not have any contribution in the
previous CPU's util_est. So we can just use the previous CPU util_est as is.

> +		util = max(util, util_est);
> +	}
> +
> +	return min_t(unsigned long, util, capacity_orig_of(cpu));
> +}
> +
Quentin Perret June 19, 2018, 9:53 a.m. UTC | #3
On Tuesday 19 Jun 2018 at 15:21:40 (+0530), Pavan Kondeti wrote:
> On Mon, May 21, 2018 at 03:25:02PM +0100, Quentin Perret wrote:
> 
> <snip>
> 
> >  
> > +/*
> > + * Returns the util of "cpu" if "p" wakes up on "dst_cpu".
> > + */
> > +static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
> > +{
> > +	unsigned long util, util_est;
> > +	struct cfs_rq *cfs_rq;
> > +
> > +	/* Task is where it should be, or has no impact on cpu */
> > +	if ((task_cpu(p) == dst_cpu) || (cpu != task_cpu(p) && cpu != dst_cpu))
> > +		return cpu_util(cpu);
> > +
> > +	cfs_rq = &cpu_rq(cpu)->cfs;
> > +	util = READ_ONCE(cfs_rq->avg.util_avg);
> > +
> > +	if (dst_cpu == cpu)
> > +		util += task_util(p);
> > +	else
> > +		util = max_t(long, util - task_util(p), 0);
> > +
> > +	if (sched_feat(UTIL_EST)) {
> > +		util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
> > +		if (dst_cpu == cpu)
> > +			util_est += _task_util_est(p);
> > +		else
> > +			util_est = max_t(long, util_est - _task_util_est(p), 0);
> 
> For UTIL_EST case, the waking task will not have any contribution in the
> previous CPU's util_est. So we can just use the previous CPU util_est as is.

Right, good catch, I actually spotted that one as well and already fixed it
for v4 ;-)
diff mbox

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ec797d7ede83..1f7029258df2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6628,6 +6628,61 @@  static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
 	return min_cap * 1024 < task_util(p) * capacity_margin;
 }
 
+/*
+ * Returns the util of "cpu" if "p" wakes up on "dst_cpu".
+ */
+static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
+{
+	unsigned long util, util_est;
+	struct cfs_rq *cfs_rq;
+
+	/* Task is where it should be, or has no impact on cpu */
+	if ((task_cpu(p) == dst_cpu) || (cpu != task_cpu(p) && cpu != dst_cpu))
+		return cpu_util(cpu);
+
+	cfs_rq = &cpu_rq(cpu)->cfs;
+	util = READ_ONCE(cfs_rq->avg.util_avg);
+
+	if (dst_cpu == cpu)
+		util += task_util(p);
+	else
+		util = max_t(long, util - task_util(p), 0);
+
+	if (sched_feat(UTIL_EST)) {
+		util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
+		if (dst_cpu == cpu)
+			util_est += _task_util_est(p);
+		else
+			util_est = max_t(long, util_est - _task_util_est(p), 0);
+		util = max(util, util_est);
+	}
+
+	return min_t(unsigned long, util, capacity_orig_of(cpu));
+}
+
+static long compute_energy(struct task_struct *p, int dst_cpu)
+{
+	long util, max_util, sum_util, energy = 0;
+	struct sched_energy_fd *sfd;
+	int cpu;
+
+	for_each_freq_domain(sfd) {
+		max_util = sum_util = 0;
+		for_each_cpu_and(cpu, freq_domain_span(sfd), cpu_online_mask) {
+			util = cpu_util_next(cpu, p, dst_cpu);
+			util += cpu_util_dl(cpu_rq(cpu));
+			/* XXX: add RT util_avg when available. */
+
+			max_util = max(util, max_util);
+			sum_util += util;
+		}
+
+		energy += em_fd_energy(sfd->fd, max_util, sum_util);
+	}
+
+	return energy;
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef5d4ebc205e..0dd895554f78 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2148,7 +2148,7 @@  static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 # define arch_scale_freq_invariant()	false
 #endif
 
-#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
+#ifdef CONFIG_SMP
 static inline unsigned long cpu_util_dl(struct rq *rq)
 {
 	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;