diff mbox series

[v1,1/1] sched/fair: do not preempt current task if it is going to call schedule()

Message ID 20200305081611.29323-2-cl@rock-chips.com (mailing list archive)
State New, archived
Headers show
Series wait_task_inactive() spend too much time on system startup | expand

Commit Message

陈亮 March 5, 2020, 8:16 a.m. UTC
From: Liang Chen <cl@rock-chips.com>

when we create a kthread with ktrhead_create_on_cpu(),the child thread
entry is ktread.c:ktrhead() which will be preempted by the parent after
call complete(done) while schedule() is not called yet,then the parent
will call wait_task_inactive(child) but the child is still on the runqueue,
so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
time,especially on startup.

  parent                             child
ktrhead_create_on_cpu()
  wait_fo_completion(&done) -----> ktread.c:ktrhead()
                             |----- complete(done);--wakeup and preempted by parent
 kthread_bind() <------------|  |-> schedule();--dequeue here
  wait_task_inactive(child)     |
   schedule_hrtimeout(1 jiffy) -|

So we hope the child just wakeup parent but not preempted by parent, and the
child is going to call schedule() soon,then the parent will not call
schedule_hrtimeout(1 jiffy) as the child is already dequeue.

The same issue for ktrhead_park()&&kthread_parkme().
This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

Signed-off-by: Liang Chen <cl@rock-chips.com>
---
 arch/arm/include/asm/thread_info.h   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 include/linux/sched.h                | 15 +++++++++++++++
 kernel/kthread.c                     |  4 ++++
 kernel/sched/fair.c                  |  4 ++++
 5 files changed, 25 insertions(+)

Comments

Kees Cook March 5, 2020, 8:43 a.m. UTC | #1
On Thu, Mar 05, 2020 at 04:16:11PM +0800, cl@rock-chips.com wrote:
> From: Liang Chen <cl@rock-chips.com>
> 
> when we create a kthread with ktrhead_create_on_cpu(),the child thread
> entry is ktread.c:ktrhead() which will be preempted by the parent after
> call complete(done) while schedule() is not called yet,then the parent
> will call wait_task_inactive(child) but the child is still on the runqueue,
> so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
> time,especially on startup.
> 
>   parent                             child
> ktrhead_create_on_cpu()
>   wait_fo_completion(&done) -----> ktread.c:ktrhead()
>                              |----- complete(done);--wakeup and preempted by parent
>  kthread_bind() <------------|  |-> schedule();--dequeue here
>   wait_task_inactive(child)     |
>    schedule_hrtimeout(1 jiffy) -|
> 
> So we hope the child just wakeup parent but not preempted by parent, and the
> child is going to call schedule() soon,then the parent will not call
> schedule_hrtimeout(1 jiffy) as the child is already dequeue.
> 
> The same issue for ktrhead_park()&&kthread_parkme().
> This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

Interesting improvement!

> 
> Signed-off-by: Liang Chen <cl@rock-chips.com>
> ---
>  arch/arm/include/asm/thread_info.h   |  1 +
>  arch/arm64/include/asm/thread_info.h |  1 +
>  include/linux/sched.h                | 15 +++++++++++++++
>  kernel/kthread.c                     |  4 ++++
>  kernel/sched/fair.c                  |  4 ++++
>  5 files changed, 25 insertions(+)
> 
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index 0d0d5178e2c3..51802991ba1f 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
>  #define TIF_USING_IWMMXT	17
>  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
>  #define TIF_RESTORE_SIGMASK	20
> +#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
>  
>  #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
>  #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index f0cec4160136..332786f11dc3 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -78,6 +78,7 @@ void arch_release_task_struct(struct task_struct *tsk);
>  #define TIF_SVE_VL_INHERIT	24	/* Inherit sve_vl_onexec across exec */
>  #define TIF_SSBD		25	/* Wants SSB mitigation */
>  #define TIF_TAGGED_ADDR		26	/* Allow tagged user addresses */
> +#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
>  
>  #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
>  #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)

I don't think you want a TIF flag for this (they're used in special
places, especially in entry code, etc). Since you're only changing
"normal" C code, I would suggest a atomic_flags addition instead:

#define PFA_GOING_TO_SCHED	8
...
TASK_PFA_TEST(GOING_TO_SCHED, going_to_sched)
TASK_PFA_SET(GOING_TO_SCHED, going_to_sched)
TASK_PFA_CLEAR(GOING_TO_SCHED, going_to_sched)

(Also if you used TIF, you'd need to add the TIF to every architecture
to use it in the common scheduler code, which too much work.)

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 04278493bf15..cb9058d2cf0b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1768,6 +1768,21 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
>  	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
>  }
>  
> +static inline void set_tsk_going_to_sched(struct task_struct *tsk)
> +{
> +	set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
> +}
> +
> +static inline void clear_tsk_going_to_sched(struct task_struct *tsk)
> +{
> +	clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
> +}
> +
> +static inline int test_tsk_going_to_sched(struct task_struct *tsk)
> +{
> +	return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
> +}

Then you can drop these wrappers since you'll have the test/set/clear
functions declared above (task_set/clear_going_to_sched(),
task_going_to_sched()) with the TASK_PFA... macros.

> +
>  /*
>   * cond_resched() and cond_resched_lock(): latency reduction via
>   * explicit rescheduling in places that are safe. The return
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..8a4e4c9cdc22 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
>  		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
>  			break;
>  
> +		set_tsk_going_to_sched(current);

task_set_going_to_sched(current);

>  		complete(&self->parked);
>  		schedule();
> +		clear_tsk_going_to_sched(current);

task_clear_going_to_sched(current);

>  	}
>  	__set_current_state(TASK_RUNNING);
>  }
> @@ -245,8 +247,10 @@ static int kthread(void *_create)
>  	/* OK, tell user we're spawned, wait for stop or wakeup */
>  	__set_current_state(TASK_UNINTERRUPTIBLE);
>  	create->result = current;
> +	set_tsk_going_to_sched(current);
>  	complete(done);
>  	schedule();
> +	clear_tsk_going_to_sched(current);

etc.

>  
>  	ret = -EINTR;
>  	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3c8a379c357e..28a308743bf8 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4330,6 +4330,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>  			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
>  		return;
>  #endif
> +	if (test_tsk_going_to_sched(rq_of(cfs_rq)->curr))
> +		return;

if (unlikely(task_going_to_sched(rq_of(cfs_rq)->curr)))
	return;

>  
>  	if (cfs_rq->nr_running > 1)
>  		check_preempt_tick(cfs_rq, curr);
> @@ -6633,6 +6635,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
>  	 */
>  	if (test_tsk_need_resched(curr))
>  		return;
> +	if (test_tsk_going_to_sched(curr))
> +		return;

same.

>  
>  	/* Idle tasks are by definition preempted by non-idle tasks. */
>  	if (unlikely(task_has_idle_policy(curr)) &&
> -- 
> 2.17.1
> 
> 
> 

I'd add comments above each of the "return" cases to help people
understand why the test is important.
Peter Zijlstra March 5, 2020, 9:58 a.m. UTC | #2
On Thu, Mar 05, 2020 at 04:16:11PM +0800, cl@rock-chips.com wrote:
> From: Liang Chen <cl@rock-chips.com>
> 
> when we create a kthread with ktrhead_create_on_cpu(),the child thread
> entry is ktread.c:ktrhead() which will be preempted by the parent after
> call complete(done) while schedule() is not called yet,then the parent
> will call wait_task_inactive(child) but the child is still on the runqueue,
> so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
> time,especially on startup.
> 
>   parent                             child
> ktrhead_create_on_cpu()
>   wait_fo_completion(&done) -----> ktread.c:ktrhead()
>                              |----- complete(done);--wakeup and preempted by parent
>  kthread_bind() <------------|  |-> schedule();--dequeue here
>   wait_task_inactive(child)     |
>    schedule_hrtimeout(1 jiffy) -|
> 
> So we hope the child just wakeup parent but not preempted by parent, and the
> child is going to call schedule() soon,then the parent will not call
> schedule_hrtimeout(1 jiffy) as the child is already dequeue.
> 
> The same issue for ktrhead_park()&&kthread_parkme().
> This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..8a4e4c9cdc22 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
>  		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
>  			break;
>  
> +		set_tsk_going_to_sched(current);
>  		complete(&self->parked);
>  		schedule();
> +		clear_tsk_going_to_sched(current);
>  	}
>  	__set_current_state(TASK_RUNNING);
>  }
> @@ -245,8 +247,10 @@ static int kthread(void *_create)
>  	/* OK, tell user we're spawned, wait for stop or wakeup */
>  	__set_current_state(TASK_UNINTERRUPTIBLE);
>  	create->result = current;
> +	set_tsk_going_to_sched(current);
>  	complete(done);
>  	schedule();
> +	clear_tsk_going_to_sched(current);
>  
>  	ret = -EINTR;
>  	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {

Were you looking for this? I think it does the same without having
fallen from the ugly tree...

diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..62699ff414f4 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
 
+		preempt_disable()
 		complete(&self->parked);
-		schedule();
+		schedule_preempt_disabled();
+		preempt_enable();
 	}
 	__set_current_state(TASK_RUNNING);
 }
@@ -245,8 +247,10 @@ static int kthread(void *_create)
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
+	preempt_disable()
 	complete(done);
-	schedule();
+	schedule_preempt_disabled();
+	preempt_enable();
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
kernel test robot March 5, 2020, 1:22 p.m. UTC | #3
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on arm64/for-next/core tip/auto-latest linus/master v5.6-rc4 next-20200304]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/cl-rock-chips-com/wait_task_inactive-spend-too-much-time-on-system-startup/20200305-201639
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git a0f03b617c3b2644d3d47bf7d9e60aed01bd5b10
config: nds32-defconfig (attached as .config)
compiler: nds32le-linux-gcc (GCC) 9.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=9.2.0 make.cross ARCH=nds32 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/nds32/kernel/asm-offsets.c:4:
   include/linux/sched.h: In function 'set_tsk_going_to_sched':
>> include/linux/sched.h:1776:27: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function)
    1776 |  set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
         |                           ^~~~~~~~~~~~~~~~~~
   include/linux/sched.h:1776:27: note: each undeclared identifier is reported only once for each function it appears in
   include/linux/sched.h: In function 'clear_tsk_going_to_sched':
   include/linux/sched.h:1781:29: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function)
    1781 |  clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
         |                             ^~~~~~~~~~~~~~~~~~
   In file included from include/linux/kernel.h:11,
                    from include/linux/list.h:9,
                    from include/linux/rculist.h:10,
                    from include/linux/pid.h:5,
                    from include/linux/sched.h:14,
                    from arch/nds32/kernel/asm-offsets.c:4:
   include/linux/sched.h: In function 'test_tsk_going_to_sched':
   include/linux/sched.h:1786:44: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function)
    1786 |  return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
         |                                            ^~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
      78 | # define unlikely(x) __builtin_expect(!!(x), 0)
         |                                          ^
   make[2]: *** [scripts/Makefile.build:101: arch/nds32/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1112: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:179: sub-make] Error 2
   22 real  7 user  11 sys  85.87% cpu 	make prepare

vim +/TIF_GOING_TO_SCHED +1776 include/linux/sched.h

  1773	
  1774	static inline void set_tsk_going_to_sched(struct task_struct *tsk)
  1775	{
> 1776		set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
  1777	}
  1778	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot March 5, 2020, 1:33 p.m. UTC | #4
Hi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tip/sched/core]
[also build test WARNING on arm64/for-next/core tip/auto-latest linus/master v5.6-rc4 next-20200305]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/cl-rock-chips-com/wait_task_inactive-spend-too-much-time-on-system-startup/20200305-201639
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git a0f03b617c3b2644d3d47bf7d9e60aed01bd5b10
config: riscv-randconfig-a001-20200305 (attached as .config)
compiler: riscv64-linux-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=riscv 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from arch/riscv/kernel/asm-offsets.c:10:0:
   include/linux/sched.h: In function 'set_tsk_going_to_sched':
   include/linux/sched.h:1776:27: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_NEED_RESCHED'?
     set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
                              ^~~~~~~~~~~~~~~~~~
                              TIF_NEED_RESCHED
   include/linux/sched.h:1776:27: note: each undeclared identifier is reported only once for each function it appears in
   include/linux/sched.h: In function 'clear_tsk_going_to_sched':
   include/linux/sched.h:1781:29: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_NEED_RESCHED'?
     clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
                                ^~~~~~~~~~~~~~~~~~
                                TIF_NEED_RESCHED
   In file included from arch/riscv/include/asm/bug.h:9:0,
                    from include/linux/bug.h:5,
                    from arch/riscv/include/asm/current.h:13,
                    from include/linux/sched.h:12,
                    from arch/riscv/kernel/asm-offsets.c:10:
   include/linux/sched.h: In function 'test_tsk_going_to_sched':
   include/linux/sched.h:1786:44: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_NEED_RESCHED'?
     return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
                                               ^
   include/linux/compiler.h:33:34: note: in definition of macro '__branch_check__'
       ______r = __builtin_expect(!!(x), expect); \
                                     ^
>> include/linux/sched.h:1786:9: note: in expansion of macro 'unlikely'
     return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
            ^~~~~~~~
   make[2]: *** [scripts/Makefile.build:101: arch/riscv/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1112: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:179: sub-make] Error 2
   21 real  5 user  9 sys  70.27% cpu 	make prepare

vim +/unlikely +1786 include/linux/sched.h

  1783	
  1784	static inline int test_tsk_going_to_sched(struct task_struct *tsk)
  1785	{
> 1786		return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
  1787	}
  1788	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot March 5, 2020, 1:33 p.m. UTC | #5
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on arm64/for-next/core tip/auto-latest linus/master v5.6-rc4 next-20200305]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/cl-rock-chips-com/wait_task_inactive-spend-too-much-time-on-system-startup/20200305-201639
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git a0f03b617c3b2644d3d47bf7d9e60aed01bd5b10
config: m68k-allmodconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   WARNING: unmet direct dependencies detected for NEED_MULTIPLE_NODES
   Depends on DISCONTIGMEM || NUMA
   Selected by
   - SINGLE_MEMORY_CHUNK && MMU
   In file included from arch/m68k/kernel/asm-offsets.c:15:0:
   include/linux/sched.h: In function 'set_tsk_going_to_sched':
>> include/linux/sched.h:1776:27: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean
   set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
   ^~~~~~~~~~~~~~~~~~
   TIF_NEED_RESCHED
   include/linux/sched.h:1776:27: note: each undeclared identifier is reported only once for each function it appears in
   include/linux/sched.h: In function 'clear_tsk_going_to_sched':
   include/linux/sched.h:1781:29: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean
   clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
   ^~~~~~~~~~~~~~~~~~
   TIF_NEED_RESCHED
   In file included from include/linux/kernel.h:11:0,
   from include/linux/list.h:9,
   from include/linux/rculist.h:10,
   from include/linux/pid.h:5,
   from include/linux/sched.h:14,
   from arch/m68k/kernel/asm-offsets.c:15:
   include/linux/sched.h: In function 'test_tsk_going_to_sched':
   include/linux/sched.h:1786:44: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean
   return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
   ^
   include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
   # define unlikely(x) 0)
   ^
   Makefile Module.symvers System.map arch block certs crypto drivers fs include init ipc kernel lib mm modules.builtin modules.builtin.modinfo modules.order net scripts security sound source usr virt vmlinux vmlinux.gz vmlinux.o [scripts/Makefile.build:101: arch/m68k/kernel/asm-offsets.s] Error 1
   Target '__build' not remade because of errors.
   Makefile Module.symvers System.map arch block certs crypto drivers fs include init ipc kernel lib mm modules.builtin modules.builtin.modinfo modules.order net scripts security sound source usr virt vmlinux vmlinux.gz vmlinux.o [Makefile:1112: prepare0] Error 2
   Target 'prepare' not remade because of errors.
   make: Makefile Module.symvers System.map arch block certs crypto drivers fs include init ipc kernel lib mm modules.builtin modules.builtin.modinfo modules.order net scripts security sound source usr virt vmlinux vmlinux.gz vmlinux.o [Makefile:179: sub-make] Error 2
   20 real 6 user 7 sys 67.16% cpu make prepare

vim +/TIF_GOING_TO_SCHED +1776 include/linux/sched.h

  1773	
  1774	static inline void set_tsk_going_to_sched(struct task_struct *tsk)
  1775	{
> 1776		set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
  1777	}
  1778	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot March 5, 2020, 1:59 p.m. UTC | #6
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on arm64/for-next/core tip/auto-latest linus/master v5.6-rc4 next-20200305]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/cl-rock-chips-com/wait_task_inactive-spend-too-much-time-on-system-startup/20200305-201639
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git a0f03b617c3b2644d3d47bf7d9e60aed01bd5b10
config: um-x86_64_defconfig (attached as .config)
compiler: gcc-7 (Debian 7.5.0-5) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um SUBARCH=x86_64

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/x86/um/shared/sysdep/kernel-offsets.h:3:0,
                    from arch/um/kernel/asm-offsets.c:1:
   include/linux/sched.h: In function 'set_tsk_going_to_sched':
>> include/linux/sched.h:1776:27: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_NEED_RESCHED'?
     set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
                              ^~~~~~~~~~~~~~~~~~
                              TIF_NEED_RESCHED
   include/linux/sched.h:1776:27: note: each undeclared identifier is reported only once for each function it appears in
   include/linux/sched.h: In function 'clear_tsk_going_to_sched':
   include/linux/sched.h:1781:29: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_NEED_RESCHED'?
     clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
                                ^~~~~~~~~~~~~~~~~~
                                TIF_NEED_RESCHED
   In file included from include/asm-generic/bug.h:5:0,
                    from ./arch/um/include/generated/asm/bug.h:1,
                    from include/linux/bug.h:5,
                    from include/linux/thread_info.h:12,
                    from include/asm-generic/current.h:5,
                    from ./arch/um/include/generated/asm/current.h:1,
                    from include/linux/sched.h:12,
                    from arch/x86/um/shared/sysdep/kernel-offsets.h:3,
                    from arch/um/kernel/asm-offsets.c:1:
   include/linux/sched.h: In function 'test_tsk_going_to_sched':
   include/linux/sched.h:1786:44: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_NEED_RESCHED'?
     return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
                                               ^
   include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
    # define unlikely(x) __builtin_expect(!!(x), 0)
                                             ^
   make[2]: *** [scripts/Makefile.build:101: arch/um/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1112: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:179: sub-make] Error 2
   49 real  6 user  8 sys  31.64% cpu 	make prepare

vim +1776 include/linux/sched.h

  1773	
  1774	static inline void set_tsk_going_to_sched(struct task_struct *tsk)
  1775	{
> 1776		set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
  1777	}
  1778	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot March 5, 2020, 2:38 p.m. UTC | #7
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on arm64/for-next/core tip/auto-latest linus/master v5.6-rc4 next-20200305]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/cl-rock-chips-com/wait_task_inactive-spend-too-much-time-on-system-startup/20200305-201639
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git a0f03b617c3b2644d3d47bf7d9e60aed01bd5b10
config: s390-zfcpdump_defconfig (attached as .config)
compiler: s390-linux-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=s390 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/kvm_host.h:12:0,
                    from arch/s390/kernel/asm-offsets.c:11:
   include/linux/sched.h: In function 'set_tsk_going_to_sched':
>> include/linux/sched.h:1776:27: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_SINGLE_STEP'?
     set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
                              ^~~~~~~~~~~~~~~~~~
                              TIF_SINGLE_STEP
   include/linux/sched.h:1776:27: note: each undeclared identifier is reported only once for each function it appears in
   include/linux/sched.h: In function 'clear_tsk_going_to_sched':
   include/linux/sched.h:1781:29: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_SINGLE_STEP'?
     clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
                                ^~~~~~~~~~~~~~~~~~
                                TIF_SINGLE_STEP
   In file included from include/linux/kernel.h:11:0,
                    from include/linux/list.h:9,
                    from include/linux/preempt.h:11,
                    from include/linux/hardirq.h:5,
                    from include/linux/kvm_host.h:7,
                    from arch/s390/kernel/asm-offsets.c:11:
   include/linux/sched.h: In function 'test_tsk_going_to_sched':
   include/linux/sched.h:1786:44: error: 'TIF_GOING_TO_SCHED' undeclared (first use in this function); did you mean 'TIF_SINGLE_STEP'?
     return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
                                               ^
   include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
    # define unlikely(x) __builtin_expect(!!(x), 0)
                                             ^
   make[2]: *** [scripts/Makefile.build:101: arch/s390/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1112: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:179: sub-make] Error 2
   142 real  47 user  83 sys  92.07% cpu 	make prepare

vim +1776 include/linux/sched.h

  1773	
  1774	static inline void set_tsk_going_to_sched(struct task_struct *tsk)
  1775	{
> 1776		set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
  1777	}
  1778	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Philip Li March 5, 2020, 3:14 p.m. UTC | #8
> Subject: [kbuild-all] Re: [PATCH v1 1/1] sched/fair: do not preempt current task if
> it is going to call schedule()
> 
> Hi,
> 
> Thank you for the patch! Yet something to improve:
Sorry, there're a few duplicated reports around this patch, kindly
ignore similar reports, we will look for the possible issue.

Thanks

> 
> [auto build test ERROR on tip/sched/core]
> [also build test ERROR on arm64/for-next/core tip/auto-latest linus/master v5.6-
> rc4 next-20200305]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
> base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
> 
> url:    https://github.com/0day-ci/linux/commits/cl-rock-chips-
> com/wait_task_inactive-spend-too-much-time-on-system-startup/20200305-
> 201639
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
> a0f03b617c3b2644d3d47bf7d9e60aed01bd5b10
> config: s390-zfcpdump_defconfig (attached as .config)
> compiler: s390-linux-gcc (GCC) 7.5.0
> reproduce:
>         wget https://raw.githubusercontent.com/intel/lkp-
> tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         GCC_VERSION=7.5.0 make.cross ARCH=s390
> 
> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <lkp@intel.com>
> 
> All errors (new ones prefixed by >>):
> 
>    In file included from include/linux/kvm_host.h:12:0,
>                     from arch/s390/kernel/asm-offsets.c:11:
>    include/linux/sched.h: In function 'set_tsk_going_to_sched':
> >> include/linux/sched.h:1776:27: error: 'TIF_GOING_TO_SCHED' undeclared
> (first use in this function); did you mean 'TIF_SINGLE_STEP'?
>      set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
>                               ^~~~~~~~~~~~~~~~~~
>                               TIF_SINGLE_STEP
>    include/linux/sched.h:1776:27: note: each undeclared identifier is reported only
> once for each function it appears in
>    include/linux/sched.h: In function 'clear_tsk_going_to_sched':
>    include/linux/sched.h:1781:29: error: 'TIF_GOING_TO_SCHED' undeclared (first
> use in this function); did you mean 'TIF_SINGLE_STEP'?
>      clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
>                                 ^~~~~~~~~~~~~~~~~~
>                                 TIF_SINGLE_STEP
>    In file included from include/linux/kernel.h:11:0,
>                     from include/linux/list.h:9,
>                     from include/linux/preempt.h:11,
>                     from include/linux/hardirq.h:5,
>                     from include/linux/kvm_host.h:7,
>                     from arch/s390/kernel/asm-offsets.c:11:
>    include/linux/sched.h: In function 'test_tsk_going_to_sched':
>    include/linux/sched.h:1786:44: error: 'TIF_GOING_TO_SCHED' undeclared (first
> use in this function); did you mean 'TIF_SINGLE_STEP'?
>      return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
>                                                ^
>    include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
>     # define unlikely(x) __builtin_expect(!!(x), 0)
>                                              ^
>    make[2]: *** [scripts/Makefile.build:101: arch/s390/kernel/asm-offsets.s] Error 1
>    make[2]: Target '__build' not remade because of errors.
>    make[1]: *** [Makefile:1112: prepare0] Error 2
>    make[1]: Target 'prepare' not remade because of errors.
>    make: *** [Makefile:179: sub-make] Error 2
>    142 real  47 user  83 sys  92.07% cpu 	make prepare
> 
> vim +1776 include/linux/sched.h
> 
>   1773
>   1774	static inline void set_tsk_going_to_sched(struct task_struct *tsk)
>   1775	{
> > 1776		set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
>   1777	}
>   1778
> 
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Kees Cook March 5, 2020, 5:30 p.m. UTC | #9
On Thu, Mar 05, 2020 at 10:58:03AM +0100, Peter Zijlstra wrote:
> On Thu, Mar 05, 2020 at 04:16:11PM +0800, cl@rock-chips.com wrote:
> > From: Liang Chen <cl@rock-chips.com>
> > 
> > when we create a kthread with ktrhead_create_on_cpu(),the child thread
> > entry is ktread.c:ktrhead() which will be preempted by the parent after
> > call complete(done) while schedule() is not called yet,then the parent
> > will call wait_task_inactive(child) but the child is still on the runqueue,
> > so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
> > time,especially on startup.
> > 
> >   parent                             child
> > ktrhead_create_on_cpu()
> >   wait_fo_completion(&done) -----> ktread.c:ktrhead()
> >                              |----- complete(done);--wakeup and preempted by parent
> >  kthread_bind() <------------|  |-> schedule();--dequeue here
> >   wait_task_inactive(child)     |
> >    schedule_hrtimeout(1 jiffy) -|
> > 
> > So we hope the child just wakeup parent but not preempted by parent, and the
> > child is going to call schedule() soon,then the parent will not call
> > schedule_hrtimeout(1 jiffy) as the child is already dequeue.
> > 
> > The same issue for ktrhead_park()&&kthread_parkme().
> > This patch can save 120ms on rk312x startup with CONFIG_HZ=300.
> 
> > diff --git a/kernel/kthread.c b/kernel/kthread.c
> > index b262f47046ca..8a4e4c9cdc22 100644
> > --- a/kernel/kthread.c
> > +++ b/kernel/kthread.c
> > @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
> >  		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
> >  			break;
> >  
> > +		set_tsk_going_to_sched(current);
> >  		complete(&self->parked);
> >  		schedule();
> > +		clear_tsk_going_to_sched(current);
> >  	}
> >  	__set_current_state(TASK_RUNNING);
> >  }
> > @@ -245,8 +247,10 @@ static int kthread(void *_create)
> >  	/* OK, tell user we're spawned, wait for stop or wakeup */
> >  	__set_current_state(TASK_UNINTERRUPTIBLE);
> >  	create->result = current;
> > +	set_tsk_going_to_sched(current);
> >  	complete(done);
> >  	schedule();
> > +	clear_tsk_going_to_sched(current);
> >  
> >  	ret = -EINTR;
> >  	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
> 
> Were you looking for this? I think it does the same without having
> fallen from the ugly tree...
> 
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index b262f47046ca..62699ff414f4 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
>  		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
>  			break;
>  
> +		preempt_disable()
>  		complete(&self->parked);
> -		schedule();
> +		schedule_preempt_disabled();
> +		preempt_enable();
>  	}
>  	__set_current_state(TASK_RUNNING);
>  }
> @@ -245,8 +247,10 @@ static int kthread(void *_create)
>  	/* OK, tell user we're spawned, wait for stop or wakeup */
>  	__set_current_state(TASK_UNINTERRUPTIBLE);
>  	create->result = current;
> +	preempt_disable()
>  	complete(done);
> -	schedule();
> +	schedule_preempt_disabled();
> +	preempt_enable();
>  
>  	ret = -EINTR;
>  	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {

That's much nicer, yes! :) As I said, I don't know much about the
scheduler. ;)
diff mbox series

Patch

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 0d0d5178e2c3..51802991ba1f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -145,6 +145,7 @@  extern int vfp_restore_user_hwstate(struct user_vfp *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
+#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..332786f11dc3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -78,6 +78,7 @@  void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_SVE_VL_INHERIT	24	/* Inherit sve_vl_onexec across exec */
 #define TIF_SSBD		25	/* Wants SSB mitigation */
 #define TIF_TAGGED_ADDR		26	/* Allow tagged user addresses */
+#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04278493bf15..cb9058d2cf0b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1768,6 +1768,21 @@  static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+static inline void set_tsk_going_to_sched(struct task_struct *tsk)
+{
+	set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
+}
+
+static inline void clear_tsk_going_to_sched(struct task_struct *tsk)
+{
+	clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
+}
+
+static inline int test_tsk_going_to_sched(struct task_struct *tsk)
+{
+	return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
+}
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..8a4e4c9cdc22 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -199,8 +199,10 @@  static void __kthread_parkme(struct kthread *self)
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
 
+		set_tsk_going_to_sched(current);
 		complete(&self->parked);
 		schedule();
+		clear_tsk_going_to_sched(current);
 	}
 	__set_current_state(TASK_RUNNING);
 }
@@ -245,8 +247,10 @@  static int kthread(void *_create)
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
+	set_tsk_going_to_sched(current);
 	complete(done);
 	schedule();
+	clear_tsk_going_to_sched(current);
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3c8a379c357e..28a308743bf8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4330,6 +4330,8 @@  entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 		return;
 #endif
+	if (test_tsk_going_to_sched(rq_of(cfs_rq)->curr))
+		return;
 
 	if (cfs_rq->nr_running > 1)
 		check_preempt_tick(cfs_rq, curr);
@@ -6633,6 +6635,8 @@  static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	 */
 	if (test_tsk_need_resched(curr))
 		return;
+	if (test_tsk_going_to_sched(curr))
+		return;
 
 	/* Idle tasks are by definition preempted by non-idle tasks. */
 	if (unlikely(task_has_idle_policy(curr)) &&