diff mbox series

[3/6] sched: Simplify sched_info_on()

Message ID 20210505111525.121458839@infradead.org (mailing list archive)
State New, archived
Headers show
Series sched,delayacct: Some cleanups | expand

Commit Message

Peter Zijlstra May 5, 2021, 10:59 a.m. UTC
The situation around sched_info is somewhat complicated, it is used by
sched_stats and delayacct and, indirectly, kvm.

If SCHEDSTATS=Y (but disabled by default) sched_info_on() is
unconditionally true -- this is the case for all distro kernel configs
I checked.

If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then
sched_info_on() can return false when delayacct is disabled,
presumably because there would be no other users left; except kvm is.

Instead of complicating matters further by accurately accounting
sched_stat and kvm state, simply unconditionally enable when
SCHED_INFO=Y, matching the common distro case.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched/stat.h |   10 ++--------
 kernel/delayacct.c         |    1 -
 kernel/sched/stats.h       |   37 ++++++++++---------------------------
 3 files changed, 12 insertions(+), 36 deletions(-)

Comments

Johannes Weiner May 6, 2021, 2:03 p.m. UTC | #1
On Wed, May 05, 2021 at 12:59:43PM +0200, Peter Zijlstra wrote:
> The situation around sched_info is somewhat complicated, it is used by
> sched_stats and delayacct and, indirectly, kvm.
> 
> If SCHEDSTATS=Y (but disabled by default) sched_info_on() is
> unconditionally true -- this is the case for all distro kernel configs
> I checked.
> 
> If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then
> sched_info_on() can return false when delayacct is disabled,
> presumably because there would be no other users left; except kvm is.
> 
> Instead of complicating matters further by accurately accounting
> sched_stat and kvm state, simply unconditionally enable when
> SCHED_INFO=Y, matching the common distro case.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Mel Gorman May 12, 2021, 11:10 a.m. UTC | #2
On Wed, May 05, 2021 at 12:59:43PM +0200, Peter Zijlstra wrote:
> The situation around sched_info is somewhat complicated, it is used by
> sched_stats and delayacct and, indirectly, kvm.
> 
> If SCHEDSTATS=Y (but disabled by default) sched_info_on() is
> unconditionally true -- this is the case for all distro kernel configs
> I checked.
> 
> If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then
> sched_info_on() can return false when delayacct is disabled,
> presumably because there would be no other users left; except kvm is.
> 
> Instead of complicating matters further by accurately accounting
> sched_stat and kvm state, simply unconditionally enable when
> SCHED_INFO=Y, matching the common distro case.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> @@ -163,13 +158,12 @@ static inline void sched_info_reset_dequ
>   */
>  static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
>  {
> -	unsigned long long now = rq_clock(rq), delta = 0;
> +	unsigned long long delta = 0;
>  
> -	if (sched_info_on()) {
> -		if (t->sched_info.last_queued)
> -			delta = now - t->sched_info.last_queued;
> +	if (t->sched_info.last_queued) {
> +		delta = rq_clock(rq) - t->sched_info.last_queued;
> +		t->sched_info.last_queued = 0;
>  	}
> -	sched_info_reset_dequeued(t);
>  	t->sched_info.run_delay += delta;
>  
>  	rq_sched_info_dequeue(rq, delta);

As delta is !0 iff t->sched_info.last_queued, why not this?

diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 33ffd41935ba..37e33c0eeb7c 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -158,15 +158,14 @@ static inline void psi_sched_switch(struct task_struct *prev,
  */
 static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 {
-	unsigned long long delta = 0;
-
 	if (t->sched_info.last_queued) {
+		unsigned long long delta;
+
 		delta = rq_clock(rq) - t->sched_info.last_queued;
 		t->sched_info.last_queued = 0;
+		t->sched_info.run_delay += delta;
+		rq_sched_info_dequeue(rq, delta);
 	}
-	t->sched_info.run_delay += delta;
-
-	rq_sched_info_dequeue(rq, delta);
 }
 
 /*

> @@ -184,9 +178,10 @@ static void sched_info_arrive(struct rq
>  {
>  	unsigned long long now = rq_clock(rq), delta = 0;
>  
> -	if (t->sched_info.last_queued)
> +	if (t->sched_info.last_queued) {
>  		delta = now - t->sched_info.last_queued;
> -	sched_info_reset_dequeued(t);
> +		t->sched_info.last_queued = 0;
> +	}
>  	t->sched_info.run_delay += delta;
>  	t->sched_info.last_arrival = now;
>  	t->sched_info.pcount++;

Similarly

@@ -176,17 +175,18 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
  */
 static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 {
-	unsigned long long now = rq_clock(rq), delta = 0;
+	unsigned long long now = rq_clock(rq);
 
 	if (t->sched_info.last_queued) {
+		unsigned long long delta;
+
 		delta = now - t->sched_info.last_queued;
 		t->sched_info.last_queued = 0;
+		t->sched_info.run_delay += delta;
+		rq_sched_info_arrive(rq, delta);
 	}
-	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
-
-	rq_sched_info_arrive(rq, delta);
 }
 
 /*
Peter Zijlstra May 12, 2021, 11:32 a.m. UTC | #3
On Wed, May 12, 2021 at 12:10:40PM +0100, Mel Gorman wrote:
> As delta is !0 iff t->sched_info.last_queued, why not this?
> 
> diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
> index 33ffd41935ba..37e33c0eeb7c 100644
> --- a/kernel/sched/stats.h
> +++ b/kernel/sched/stats.h
> @@ -158,15 +158,14 @@ static inline void psi_sched_switch(struct task_struct *prev,
>   */
>  static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
>  {
> -	unsigned long long delta = 0;
> -
>  	if (t->sched_info.last_queued) {
> +		unsigned long long delta;
> +
>  		delta = rq_clock(rq) - t->sched_info.last_queued;
>  		t->sched_info.last_queued = 0;
> +		t->sched_info.run_delay += delta;
> +		rq_sched_info_dequeue(rq, delta);
>  	}
> -	t->sched_info.run_delay += delta;
> -
> -	rq_sched_info_dequeue(rq, delta);
>  }

Right.. clearly I missed the obvious there.. Lemme go add another patch
on top of the lot.

Something like this I suppose.

---
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 33ffd41935ba..111072ee9663 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -160,10 +160,11 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 {
 	unsigned long long delta = 0;
 
-	if (t->sched_info.last_queued) {
-		delta = rq_clock(rq) - t->sched_info.last_queued;
-		t->sched_info.last_queued = 0;
-	}
+	if (!t->sched_info.last_queued)
+		return;
+
+	delta = rq_clock(rq) - t->sched_info.last_queued;
+	t->sched_info.last_queued = 0;
 	t->sched_info.run_delay += delta;
 
 	rq_sched_info_dequeue(rq, delta);
@@ -176,12 +177,14 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
  */
 static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 {
-	unsigned long long now = rq_clock(rq), delta = 0;
+	unsigned long long now, delta = 0;
 
-	if (t->sched_info.last_queued) {
-		delta = now - t->sched_info.last_queued;
-		t->sched_info.last_queued = 0;
-	}
+	if (!t->sched_info.last_queued)
+		return;
+
+	now = rq_clock(rq);
+	delta = now - t->sched_info.last_queued;
+	t->sched_info.last_queued = 0;
 	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
Mel Gorman May 12, 2021, 12:51 p.m. UTC | #4
On Wed, May 12, 2021 at 01:32:03PM +0200, Peter Zijlstra wrote:
> On Wed, May 12, 2021 at 12:10:40PM +0100, Mel Gorman wrote:
> > As delta is !0 iff t->sched_info.last_queued, why not this?
> > 
> > diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
> > index 33ffd41935ba..37e33c0eeb7c 100644
> > --- a/kernel/sched/stats.h
> > +++ b/kernel/sched/stats.h
> > @@ -158,15 +158,14 @@ static inline void psi_sched_switch(struct task_struct *prev,
> >   */
> >  static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
> >  {
> > -	unsigned long long delta = 0;
> > -
> >  	if (t->sched_info.last_queued) {
> > +		unsigned long long delta;
> > +
> >  		delta = rq_clock(rq) - t->sched_info.last_queued;
> >  		t->sched_info.last_queued = 0;
> > +		t->sched_info.run_delay += delta;
> > +		rq_sched_info_dequeue(rq, delta);
> >  	}
> > -	t->sched_info.run_delay += delta;
> > -
> > -	rq_sched_info_dequeue(rq, delta);
> >  }
> 
> Right.. clearly I missed the obvious there.. Lemme go add another patch
> on top of the lot.
> 
> Something like this I suppose.
> 

That works for me.
diff mbox series

Patch

--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -3,6 +3,7 @@ 
 #define _LINUX_SCHED_STAT_H
 
 #include <linux/percpu.h>
+#include <linux/kconfig.h>
 
 /*
  * Various counters maintained by the scheduler and fork(),
@@ -23,14 +24,7 @@  extern unsigned long nr_iowait_cpu(int c
 
 static inline int sched_info_on(void)
 {
-#ifdef CONFIG_SCHEDSTATS
-	return 1;
-#elif defined(CONFIG_TASK_DELAY_ACCT)
-	extern int delayacct_on;
-	return delayacct_on;
-#else
-	return 0;
-#endif
+	return IS_ENABLED(CONFIG_SCHED_INFO);
 }
 
 #ifdef CONFIG_SCHEDSTATS
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -15,7 +15,6 @@ 
 #include <linux/module.h>
 
 int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
-EXPORT_SYMBOL_GPL(delayacct_on);
 struct kmem_cache *delayacct_cache;
 
 static int __init delayacct_setup_disable(char *str)
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -150,11 +150,6 @@  static inline void psi_sched_switch(stru
 #endif /* CONFIG_PSI */
 
 #ifdef CONFIG_SCHED_INFO
-static inline void sched_info_reset_dequeued(struct task_struct *t)
-{
-	t->sched_info.last_queued = 0;
-}
-
 /*
  * We are interested in knowing how long it was from the *first* time a
  * task was queued to the time that it finally hit a CPU, we call this routine
@@ -163,13 +158,12 @@  static inline void sched_info_reset_dequ
  */
 static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 {
-	unsigned long long now = rq_clock(rq), delta = 0;
+	unsigned long long delta = 0;
 
-	if (sched_info_on()) {
-		if (t->sched_info.last_queued)
-			delta = now - t->sched_info.last_queued;
+	if (t->sched_info.last_queued) {
+		delta = rq_clock(rq) - t->sched_info.last_queued;
+		t->sched_info.last_queued = 0;
 	}
-	sched_info_reset_dequeued(t);
 	t->sched_info.run_delay += delta;
 
 	rq_sched_info_dequeue(rq, delta);
@@ -184,9 +178,10 @@  static void sched_info_arrive(struct rq
 {
 	unsigned long long now = rq_clock(rq), delta = 0;
 
-	if (t->sched_info.last_queued)
+	if (t->sched_info.last_queued) {
 		delta = now - t->sched_info.last_queued;
-	sched_info_reset_dequeued(t);
+		t->sched_info.last_queued = 0;
+	}
 	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
@@ -201,10 +196,8 @@  static void sched_info_arrive(struct rq
  */
 static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t)
 {
-	if (sched_info_on()) {
-		if (!t->sched_info.last_queued)
-			t->sched_info.last_queued = rq_clock(rq);
-	}
+	if (!t->sched_info.last_queued)
+		t->sched_info.last_queued = rq_clock(rq);
 }
 
 /*
@@ -231,7 +224,7 @@  static inline void sched_info_depart(str
  * the idle task.)  We are only called when prev != next.
  */
 static inline void
-__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
+sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
 {
 	/*
 	 * prev now departs the CPU.  It's not interesting to record
@@ -245,18 +238,8 @@  __sched_info_switch(struct rq *rq, struc
 		sched_info_arrive(rq, next);
 }
 
-static inline void
-sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
-{
-	if (sched_info_on())
-		__sched_info_switch(rq, prev, next);
-}
-
 #else /* !CONFIG_SCHED_INFO: */
 # define sched_info_enqueue(rq, t)	do { } while (0)
-# define sched_info_reset_dequeued(t)	do { } while (0)
 # define sched_info_dequeue(rq, t)	do { } while (0)
-# define sched_info_depart(rq, t)	do { } while (0)
-# define sched_info_arrive(rq, next)	do { } while (0)
 # define sched_info_switch(rq, t, next)	do { } while (0)
 #endif /* CONFIG_SCHED_INFO */