diff mbox series

[linux-next] mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE

Message ID 20221219052128.18190-1-zhouzhouyi@gmail.com (mailing list archive)
State New, archived
Headers show
Series [linux-next] mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE | expand

Commit Message

Zhouyi Zhou Dec. 19, 2022, 5:21 a.m. UTC
mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
reported by KCSAN.

Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
---
During the rcutorture test on linux-next,
./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
following KCSAN BUG is reported:
[   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
[   35.400593] ^M
[   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
[   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
[   35.407162]  do_idle+0xf3/0x2a0^M
[   35.408016]  cpu_startup_entry+0x15/0x20^M
[   35.409084]  start_secondary+0x8f/0x90^M
[   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
[   35.411607] ^M
[   35.412042] no locks held by swapper/3/0.^M
[   35.413172] irq event stamp: 53048^M
[   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
[   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
[   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
[   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
[   35.423685] ^M
[   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
[   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
[   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
[   35.428386]  do_idle+0xf3/0x2a0^M
[   35.429265]  cpu_startup_entry+0x15/0x20^M
[   35.430429]  rest_init+0x20c/0x210^M
[   35.431382]  arch_call_rest_init+0xe/0x10^M
[   35.432508]  start_kernel+0x544/0x600^M
[   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M

fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE

Thanks
Zhouyi
--
 kernel/time/tick-common.c | 16 ++++++++--------
 kernel/time/tick-sched.c  | 28 +++++++++++++++-------------
 2 files changed, 23 insertions(+), 21 deletions(-)

Comments

Paul E. McKenney Jan. 6, 2023, 2:33 p.m. UTC | #1
On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> reported by KCSAN.
> 
> Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>

This does indeed get rid of some KCSAN complaints, thank you!!!

Tested-by: Paul E. McKenney <paulmck@kernel.org>

> ---
> During the rcutorture test on linux-next,
> ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> following KCSAN BUG is reported:
> [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> [   35.400593] ^M
> [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> [   35.407162]  do_idle+0xf3/0x2a0^M
> [   35.408016]  cpu_startup_entry+0x15/0x20^M
> [   35.409084]  start_secondary+0x8f/0x90^M
> [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> [   35.411607] ^M
> [   35.412042] no locks held by swapper/3/0.^M
> [   35.413172] irq event stamp: 53048^M
> [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> [   35.423685] ^M
> [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> [   35.428386]  do_idle+0xf3/0x2a0^M
> [   35.429265]  cpu_startup_entry+0x15/0x20^M
> [   35.430429]  rest_init+0x20c/0x210^M
> [   35.431382]  arch_call_rest_init+0xe/0x10^M
> [   35.432508]  start_kernel+0x544/0x600^M
> [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> 
> fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
> 
> Thanks
> Zhouyi
> --
>  kernel/time/tick-common.c | 16 ++++++++--------
>  kernel/time/tick-sched.c  | 28 +++++++++++++++-------------
>  2 files changed, 23 insertions(+), 21 deletions(-)
> 
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index 46789356f856..a7753465a0be 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -84,7 +84,7 @@ int tick_is_oneshot_available(void)
>   */
>  static void tick_periodic(int cpu)
>  {
> -	if (tick_do_timer_cpu == cpu) {
> +	if (READ_ONCE(tick_do_timer_cpu) == cpu) {
>  		raw_spin_lock(&jiffies_lock);
>  		write_seqcount_begin(&jiffies_seq);
>  
> @@ -184,9 +184,9 @@ static void giveup_do_timer(void *info)
>  {
>  	int cpu = *(unsigned int *)info;
>  
> -	WARN_ON(tick_do_timer_cpu != smp_processor_id());
> +	WARN_ON(READ_ONCE(tick_do_timer_cpu) != smp_processor_id());
>  
> -	tick_do_timer_cpu = cpu;
> +	WRITE_ONCE(tick_do_timer_cpu, cpu);
>  }
>  
>  static void tick_take_do_timer_from_boot(void)
> @@ -217,8 +217,8 @@ static void tick_setup_device(struct tick_device *td,
>  		 * If no cpu took the do_timer update, assign it to
>  		 * this cpu:
>  		 */
> -		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
> -			tick_do_timer_cpu = cpu;
> +		if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
> +			WRITE_ONCE(tick_do_timer_cpu, cpu);
>  
>  			tick_next_period = ktime_get();
>  #ifdef CONFIG_NO_HZ_FULL
> @@ -235,7 +235,7 @@ static void tick_setup_device(struct tick_device *td,
>  						!tick_nohz_full_cpu(cpu)) {
>  			tick_take_do_timer_from_boot();
>  			tick_do_timer_boot_cpu = -1;
> -			WARN_ON(tick_do_timer_cpu != cpu);
> +			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
>  #endif
>  		}
>  
> @@ -407,8 +407,8 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
>   */
>  void tick_handover_do_timer(void)
>  {
> -	if (tick_do_timer_cpu == smp_processor_id())
> -		tick_do_timer_cpu = cpumask_first(cpu_online_mask);
> +	if (READ_ONCE(tick_do_timer_cpu) == smp_processor_id())
> +		WRITE_ONCE(tick_do_timer_cpu, cpumask_first(cpu_online_mask));
>  }
>  
>  /*
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index b0e3c9205946..2dac94b3187f 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -186,16 +186,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
>  	 * If nohz_full is enabled, this should not happen because the
>  	 * tick_do_timer_cpu never relinquishes.
>  	 */
> -	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
> +	if (unlikely(READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_NONE)) {
>  #ifdef CONFIG_NO_HZ_FULL
>  		WARN_ON_ONCE(tick_nohz_full_running);
>  #endif
> -		tick_do_timer_cpu = cpu;
> +		WRITE_ONCE(tick_do_timer_cpu, cpu);
>  	}
>  #endif
>  
>  	/* Check, if the jiffies need an update */
> -	if (tick_do_timer_cpu == cpu)
> +	if (READ_ONCE(tick_do_timer_cpu) == cpu)
>  		tick_do_update_jiffies64(now);
>  
>  	/*
> @@ -534,7 +534,7 @@ static int tick_nohz_cpu_down(unsigned int cpu)
>  	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
>  	 * CPUs. It must remain online when nohz full is enabled.
>  	 */
> -	if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
> +	if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
>  		return -EBUSY;
>  	return 0;
>  }
> @@ -849,8 +849,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
>  	 * Otherwise we can sleep as long as we want.
>  	 */
>  	delta = timekeeping_max_deferment();
> -	if (cpu != tick_do_timer_cpu &&
> -	    (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
> +	if (cpu != READ_ONCE(tick_do_timer_cpu) &&
> +	    (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE
> +	     || !ts->do_timer_last))
>  		delta = KTIME_MAX;
>  
>  	/* Calculate the next expiry time */
> @@ -883,10 +884,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>  	 * do_timer() never invoked. Keep track of the fact that it
>  	 * was the one which had the do_timer() duty last.
>  	 */
> -	if (cpu == tick_do_timer_cpu) {
> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> +	if (cpu == READ_ONCE(tick_do_timer_cpu)) {
> +		WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
>  		ts->do_timer_last = 1;
> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> +	} else if (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE) {
>  		ts->do_timer_last = 0;
>  	}
>  
> @@ -1049,8 +1050,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
>  	 * invoked.
>  	 */
>  	if (unlikely(!cpu_online(cpu))) {
> -		if (cpu == tick_do_timer_cpu)
> -			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> +		if (cpu == READ_ONCE(tick_do_timer_cpu))
> +			WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
>  		/*
>  		 * Make sure the CPU doesn't get fooled by obsolete tick
>  		 * deadline if it comes back online later.
> @@ -1073,11 +1074,12 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
>  		 * Keep the tick alive to guarantee timekeeping progression
>  		 * if there are full dynticks CPUs around
>  		 */
> -		if (tick_do_timer_cpu == cpu)
> +		if (READ_ONCE(tick_do_timer_cpu) == cpu)
>  			return false;
>  
>  		/* Should not happen for nohz-full */
> -		if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
> +		if (WARN_ON_ONCE(READ_ONCE(tick_do_timer_cpu)
> +				 == TICK_DO_TIMER_NONE))
>  			return false;
>  	}
>  
> -- 
> 2.34.1
>
Frederic Weisbecker Jan. 9, 2023, 12:51 p.m. UTC | #2
On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> reported by KCSAN.
> 
> Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
> ---
> During the rcutorture test on linux-next,
> ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> following KCSAN BUG is reported:
> [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> [   35.400593] ^M
> [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> [   35.407162]  do_idle+0xf3/0x2a0^M
> [   35.408016]  cpu_startup_entry+0x15/0x20^M
> [   35.409084]  start_secondary+0x8f/0x90^M
> [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> [   35.411607] ^M
> [   35.412042] no locks held by swapper/3/0.^M
> [   35.413172] irq event stamp: 53048^M
> [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> [   35.423685] ^M
> [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> [   35.428386]  do_idle+0xf3/0x2a0^M
> [   35.429265]  cpu_startup_entry+0x15/0x20^M
> [   35.430429]  rest_init+0x20c/0x210^M
> [   35.431382]  arch_call_rest_init+0xe/0x10^M
> [   35.432508]  start_kernel+0x544/0x600^M
> [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> 
> fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE

This has been discussed before with passion:

http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/

To me data_race() would be more appropriate but that would need a changelog with
proper analysis of the tick_do_timer_cpu state machine.

One more thing on my TODO list, but feel free to beat me at it :-)

Thanks.
Zhouyi Zhou Jan. 9, 2023, 1:13 p.m. UTC | #3
On Mon, Jan 9, 2023 at 8:51 PM Frederic Weisbecker <frederic@kernel.org> wrote:
>
> On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> > reported by KCSAN.
> >
> > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
> > ---
> > During the rcutorture test on linux-next,
> > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> > following KCSAN BUG is reported:
> > [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> > [   35.400593] ^M
> > [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> > [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> > [   35.407162]  do_idle+0xf3/0x2a0^M
> > [   35.408016]  cpu_startup_entry+0x15/0x20^M
> > [   35.409084]  start_secondary+0x8f/0x90^M
> > [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > [   35.411607] ^M
> > [   35.412042] no locks held by swapper/3/0.^M
> > [   35.413172] irq event stamp: 53048^M
> > [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> > [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> > [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > [   35.423685] ^M
> > [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> > [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> > [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> > [   35.428386]  do_idle+0xf3/0x2a0^M
> > [   35.429265]  cpu_startup_entry+0x15/0x20^M
> > [   35.430429]  rest_init+0x20c/0x210^M
> > [   35.431382]  arch_call_rest_init+0xe/0x10^M
> > [   35.432508]  start_kernel+0x544/0x600^M
> > [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> >
> > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
>
> This has been discussed before with passion:
>
> http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/
>
Thank you for your guidance, yes, the discussion is very passionate
;-) and I learned a lot from the above thread ;-)
> To me data_race() would be more appropriate but that would need a changelog with
> proper analysis of the tick_do_timer_cpu state machine.
I Agree, and also I learned to do more rigorous technology research.
>
> One more thing on my TODO list, but feel free to beat me at it :-)
Thank you for your concern ;-)

Thanks
Zhouyi
>
> Thanks.
Paul E. McKenney Jan. 9, 2023, 3:25 p.m. UTC | #4
On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote:
> On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> > reported by KCSAN.
> > 
> > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
> > ---
> > During the rcutorture test on linux-next,
> > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> > following KCSAN BUG is reported:
> > [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> > [   35.400593] ^M
> > [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> > [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> > [   35.407162]  do_idle+0xf3/0x2a0^M
> > [   35.408016]  cpu_startup_entry+0x15/0x20^M
> > [   35.409084]  start_secondary+0x8f/0x90^M
> > [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > [   35.411607] ^M
> > [   35.412042] no locks held by swapper/3/0.^M
> > [   35.413172] irq event stamp: 53048^M
> > [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> > [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> > [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > [   35.423685] ^M
> > [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> > [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> > [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> > [   35.428386]  do_idle+0xf3/0x2a0^M
> > [   35.429265]  cpu_startup_entry+0x15/0x20^M
> > [   35.430429]  rest_init+0x20c/0x210^M
> > [   35.431382]  arch_call_rest_init+0xe/0x10^M
> > [   35.432508]  start_kernel+0x544/0x600^M
> > [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > 
> > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
> 
> This has been discussed before with passion:
> 
> http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/
> 
> To me data_race() would be more appropriate but that would need a changelog with
> proper analysis of the tick_do_timer_cpu state machine.

Please also an analysis of why the compiler cannot do any destructive
optimizations in this case.  Maybe also comments.

> One more thing on my TODO list, but feel free to beat me at it :-)

I know that feeling!  ;-)

							Thanx, Paul
Zhouyi Zhou Jan. 9, 2023, 3:59 p.m. UTC | #5
On Mon, Jan 9, 2023 at 11:25 PM Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote:
> > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> > > reported by KCSAN.
> > >
> > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
> > > ---
> > > During the rcutorture test on linux-next,
> > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> > > following KCSAN BUG is reported:
> > > [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> > > [   35.400593] ^M
> > > [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> > > [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> > > [   35.407162]  do_idle+0xf3/0x2a0^M
> > > [   35.408016]  cpu_startup_entry+0x15/0x20^M
> > > [   35.409084]  start_secondary+0x8f/0x90^M
> > > [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > > [   35.411607] ^M
> > > [   35.412042] no locks held by swapper/3/0.^M
> > > [   35.413172] irq event stamp: 53048^M
> > > [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> > > [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> > > [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > > [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > > [   35.423685] ^M
> > > [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> > > [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> > > [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> > > [   35.428386]  do_idle+0xf3/0x2a0^M
> > > [   35.429265]  cpu_startup_entry+0x15/0x20^M
> > > [   35.430429]  rest_init+0x20c/0x210^M
> > > [   35.431382]  arch_call_rest_init+0xe/0x10^M
> > > [   35.432508]  start_kernel+0x544/0x600^M
> > > [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > >
> > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
> >
> > This has been discussed before with passion:
> >
> > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/
> >
> > To me data_race() would be more appropriate but that would need a changelog with
> > proper analysis of the tick_do_timer_cpu state machine.
>
> Please also an analysis of why the compiler cannot do any destructive
> optimizations in this case.  Maybe also comments.
I want to try the analysis above, as a newbie I have taught myself
LLVM for 3 years in my spare time ;-)
>
> > One more thing on my TODO list, but feel free to beat me at it :-)
Please take your time ;-)   Please don't look my next possible email
as a reminder ;-)
>
> I know that feeling!  ;-)
>
Thanx, Zhouyi
>                                                         Thanx, Paul
Thomas Gleixner Jan. 11, 2023, 9:02 a.m. UTC | #6
On Mon, Jan 09 2023 at 13:51, Frederic Weisbecker wrote:
> On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
>> fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
>
> This has been discussed before with passion:
>
> http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/
>
> To me data_race() would be more appropriate but that would need a changelog with
> proper analysis of the tick_do_timer_cpu state machine.

Proper analysis and comments in the code are required independent of
READ/WRITE_ONCE() or data_race().

Thanks,

        tglx
Zhouyi Zhou Jan. 23, 2023, 12:44 a.m. UTC | #7
On Mon, Jan 9, 2023 at 11:59 PM Zhouyi Zhou <zhouzhouyi@gmail.com> wrote:
>
> On Mon, Jan 9, 2023 at 11:25 PM Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote:
> > > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> > > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> > > > reported by KCSAN.
> > > >
> > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
> > > > ---
> > > > During the rcutorture test on linux-next,
> > > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> > > > following KCSAN BUG is reported:
> > > > [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> > > > [   35.400593] ^M
> > > > [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> > > > [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> > > > [   35.407162]  do_idle+0xf3/0x2a0^M
> > > > [   35.408016]  cpu_startup_entry+0x15/0x20^M
> > > > [   35.409084]  start_secondary+0x8f/0x90^M
> > > > [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > > > [   35.411607] ^M
> > > > [   35.412042] no locks held by swapper/3/0.^M
> > > > [   35.413172] irq event stamp: 53048^M
> > > > [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> > > > [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> > > > [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > > > [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > > > [   35.423685] ^M
> > > > [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> > > > [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> > > > [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> > > > [   35.428386]  do_idle+0xf3/0x2a0^M
> > > > [   35.429265]  cpu_startup_entry+0x15/0x20^M
> > > > [   35.430429]  rest_init+0x20c/0x210^M
> > > > [   35.431382]  arch_call_rest_init+0xe/0x10^M
> > > > [   35.432508]  start_kernel+0x544/0x600^M
> > > > [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > > >
> > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
> > >
> > > This has been discussed before with passion:
> > >
> > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/
> > >
> > > To me data_race() would be more appropriate but that would need a changelog with
> > > proper analysis of the tick_do_timer_cpu state machine.
> >
> > Please also an analysis of why the compiler cannot do any destructive
> > optimizations in this case.  Maybe also comments.
> I want to try the analysis above, as a newbie I have taught myself
> LLVM for 3 years in my spare time ;-)
I am continuing to study LLVM/Clang's optimization behaviour on
tick_do_timer_cpu, it is very interesting and fascinating, but as a
newbie, I still need 3-4 week to go, could you wait for me a little
while? ;-)

Many Thanks
Zhouyi
> >
> > > One more thing on my TODO list, but feel free to beat me at it :-)
> Please take your time ;-)   Please don't look my next possible email
> as a reminder ;-)
> >
> > I know that feeling!  ;-)
> >
> Thanx, Zhouyi
> >                                                         Thanx, Paul
Paul E. McKenney Jan. 23, 2023, 4:54 p.m. UTC | #8
On Mon, Jan 23, 2023 at 08:44:25AM +0800, Zhouyi Zhou wrote:
> On Mon, Jan 9, 2023 at 11:59 PM Zhouyi Zhou <zhouzhouyi@gmail.com> wrote:
> >
> > On Mon, Jan 9, 2023 at 11:25 PM Paul E. McKenney <paulmck@kernel.org> wrote:
> > >
> > > On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote:
> > > > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote:
> > > > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug
> > > > > reported by KCSAN.
> > > > >
> > > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
> > > > > ---
> > > > > During the rcutorture test on linux-next,
> > > > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan  --kcsan-kmake-arg "CC=clang-12"
> > > > > following KCSAN BUG is reported:
> > > > > [   35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M
> > > > > [   35.400593] ^M
> > > > > [   35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M
> > > > > [   35.405325]  tick_nohz_idle_stop_tick+0x14c/0x3e0^M
> > > > > [   35.407162]  do_idle+0xf3/0x2a0^M
> > > > > [   35.408016]  cpu_startup_entry+0x15/0x20^M
> > > > > [   35.409084]  start_secondary+0x8f/0x90^M
> > > > > [   35.410207]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > > > > [   35.411607] ^M
> > > > > [   35.412042] no locks held by swapper/3/0.^M
> > > > > [   35.413172] irq event stamp: 53048^M
> > > > > [   35.414175] hardirqs last  enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M
> > > > > [   35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M
> > > > > [   35.418988] softirqs last  enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > > > > [   35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M
> > > > > [   35.423685] ^M
> > > > > [   35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M
> > > > > [   35.425870]  tick_nohz_next_event+0x233/0x2b0^M
> > > > > [   35.427119]  tick_nohz_idle_stop_tick+0x8f/0x3e0^M
> > > > > [   35.428386]  do_idle+0xf3/0x2a0^M
> > > > > [   35.429265]  cpu_startup_entry+0x15/0x20^M
> > > > > [   35.430429]  rest_init+0x20c/0x210^M
> > > > > [   35.431382]  arch_call_rest_init+0xe/0x10^M
> > > > > [   35.432508]  start_kernel+0x544/0x600^M
> > > > > [   35.433519]  secondary_startup_64_no_verify+0xe1/0xeb^M
> > > > >
> > > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE
> > > >
> > > > This has been discussed before with passion:
> > > >
> > > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/
> > > >
> > > > To me data_race() would be more appropriate but that would need a changelog with
> > > > proper analysis of the tick_do_timer_cpu state machine.
> > >
> > > Please also an analysis of why the compiler cannot do any destructive
> > > optimizations in this case.  Maybe also comments.
> > I want to try the analysis above, as a newbie I have taught myself
> > LLVM for 3 years in my spare time ;-)
> 
> I am continuing to study LLVM/Clang's optimization behaviour on
> tick_do_timer_cpu, it is very interesting and fascinating, but as a
> newbie, I still need 3-4 week to go, could you wait for me a little
> while? ;-)

Please do take the time to get it right and to get it correctly
documented.  ;-)

							Thanx, Paul

> Many Thanks
> Zhouyi
> > >
> > > > One more thing on my TODO list, but feel free to beat me at it :-)
> > Please take your time ;-)   Please don't look my next possible email
> > as a reminder ;-)
> > >
> > > I know that feeling!  ;-)
> > >
> > Thanx, Zhouyi
> > >                                                         Thanx, Paul
diff mbox series

Patch

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 46789356f856..a7753465a0be 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -84,7 +84,7 @@  int tick_is_oneshot_available(void)
  */
 static void tick_periodic(int cpu)
 {
-	if (tick_do_timer_cpu == cpu) {
+	if (READ_ONCE(tick_do_timer_cpu) == cpu) {
 		raw_spin_lock(&jiffies_lock);
 		write_seqcount_begin(&jiffies_seq);
 
@@ -184,9 +184,9 @@  static void giveup_do_timer(void *info)
 {
 	int cpu = *(unsigned int *)info;
 
-	WARN_ON(tick_do_timer_cpu != smp_processor_id());
+	WARN_ON(READ_ONCE(tick_do_timer_cpu) != smp_processor_id());
 
-	tick_do_timer_cpu = cpu;
+	WRITE_ONCE(tick_do_timer_cpu, cpu);
 }
 
 static void tick_take_do_timer_from_boot(void)
@@ -217,8 +217,8 @@  static void tick_setup_device(struct tick_device *td,
 		 * If no cpu took the do_timer update, assign it to
 		 * this cpu:
 		 */
-		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
-			tick_do_timer_cpu = cpu;
+		if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+			WRITE_ONCE(tick_do_timer_cpu, cpu);
 
 			tick_next_period = ktime_get();
 #ifdef CONFIG_NO_HZ_FULL
@@ -235,7 +235,7 @@  static void tick_setup_device(struct tick_device *td,
 						!tick_nohz_full_cpu(cpu)) {
 			tick_take_do_timer_from_boot();
 			tick_do_timer_boot_cpu = -1;
-			WARN_ON(tick_do_timer_cpu != cpu);
+			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
 #endif
 		}
 
@@ -407,8 +407,8 @@  EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
  */
 void tick_handover_do_timer(void)
 {
-	if (tick_do_timer_cpu == smp_processor_id())
-		tick_do_timer_cpu = cpumask_first(cpu_online_mask);
+	if (READ_ONCE(tick_do_timer_cpu) == smp_processor_id())
+		WRITE_ONCE(tick_do_timer_cpu, cpumask_first(cpu_online_mask));
 }
 
 /*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946..2dac94b3187f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -186,16 +186,16 @@  static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 	 * If nohz_full is enabled, this should not happen because the
 	 * tick_do_timer_cpu never relinquishes.
 	 */
-	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+	if (unlikely(READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_NONE)) {
 #ifdef CONFIG_NO_HZ_FULL
 		WARN_ON_ONCE(tick_nohz_full_running);
 #endif
-		tick_do_timer_cpu = cpu;
+		WRITE_ONCE(tick_do_timer_cpu, cpu);
 	}
 #endif
 
 	/* Check, if the jiffies need an update */
-	if (tick_do_timer_cpu == cpu)
+	if (READ_ONCE(tick_do_timer_cpu) == cpu)
 		tick_do_update_jiffies64(now);
 
 	/*
@@ -534,7 +534,7 @@  static int tick_nohz_cpu_down(unsigned int cpu)
 	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
 	 * CPUs. It must remain online when nohz full is enabled.
 	 */
-	if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+	if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
 		return -EBUSY;
 	return 0;
 }
@@ -849,8 +849,9 @@  static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 * Otherwise we can sleep as long as we want.
 	 */
 	delta = timekeeping_max_deferment();
-	if (cpu != tick_do_timer_cpu &&
-	    (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+	if (cpu != READ_ONCE(tick_do_timer_cpu) &&
+	    (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE
+	     || !ts->do_timer_last))
 		delta = KTIME_MAX;
 
 	/* Calculate the next expiry time */
@@ -883,10 +884,10 @@  static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	 * do_timer() never invoked. Keep track of the fact that it
 	 * was the one which had the do_timer() duty last.
 	 */
-	if (cpu == tick_do_timer_cpu) {
-		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+	if (cpu == READ_ONCE(tick_do_timer_cpu)) {
+		WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
 		ts->do_timer_last = 1;
-	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+	} else if (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE) {
 		ts->do_timer_last = 0;
 	}
 
@@ -1049,8 +1050,8 @@  static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	 * invoked.
 	 */
 	if (unlikely(!cpu_online(cpu))) {
-		if (cpu == tick_do_timer_cpu)
-			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+		if (cpu == READ_ONCE(tick_do_timer_cpu))
+			WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
 		/*
 		 * Make sure the CPU doesn't get fooled by obsolete tick
 		 * deadline if it comes back online later.
@@ -1073,11 +1074,12 @@  static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 		 * Keep the tick alive to guarantee timekeeping progression
 		 * if there are full dynticks CPUs around
 		 */
-		if (tick_do_timer_cpu == cpu)
+		if (READ_ONCE(tick_do_timer_cpu) == cpu)
 			return false;
 
 		/* Should not happen for nohz-full */
-		if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+		if (WARN_ON_ONCE(READ_ONCE(tick_do_timer_cpu)
+				 == TICK_DO_TIMER_NONE))
 			return false;
 	}